This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 5adbaa65073511925e191d8d3da83d212f4f4e5f Author: Rakesh Sharma <[email protected]> AuthorDate: Wed Sep 20 12:28:25 2023 +0530 [7X]Fix utilities do not honor -d flag when COORDINATOR_DATA_DIRECTORY is not set. (#16433) * Fix: utilities do not honor -d flag when COORDINATOR_DATA_DIRECTORY is not set. Issue: Following are the gpMgmt utilities that do not honor the -d flag, when COORDINATOR_DATA_DIRECTORY is not set. 1. gpstart 2. gpstop 3. gpstatus 4. gprecoverseg 5. gpaddmirror RCA: to get the coordinator data directory gp.getcoordinator_datadir() function is called from the above-listed utilities. the function does not have any provision to return the coordinator data directory which is provided with the -d flag. currently, it looks for COORDINATOR_DATA_DIRECTORY and MASTER_DATA_DIRECTORY env variable. also in some of the utilities we were creating lock files before parsing the provided options which looks like the design flow that was causing the utilities to crash when looking for coordinator data directory. Fix: Added a global flag which holds the data directory provided with -d option. so when we run the utility and do parsing it sets the flag with the provided datadirectory and the same will be returned when we call gp.gp.getcoordinator_datadir(). Test: Added behave test cases to use the provided data directory if COORDINATOR_DATA_DIRECTORY is not set. Added behave test case to check if the provided coordinator data directory is preferred over the already set coordinator_data_dir env variable. it is tested by setting a wrong COORDINATOR_DATA_DIRECTORY env variable and when we run the utility with the correct data directory using the -d option then the utility should execute successfully. --- gpMgmt/bin/gppylib/commands/gp.py | 21 ++++++- gpMgmt/bin/gppylib/mainUtils.py | 35 ++++++----- gpMgmt/bin/gppylib/system/environment.py | 3 +- gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature | 69 ++++++++++++++++++++++ gpMgmt/test/behave/mgmt_utils/gpstart.feature | 26 ++++++++ gpMgmt/test/behave/mgmt_utils/gpstate.feature | 49 +++++++++++++++ gpMgmt/test/behave/mgmt_utils/gpstop.feature | 20 +++++++ gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py | 9 +++ 8 files changed, 213 insertions(+), 19 deletions(-) diff --git a/gpMgmt/bin/gppylib/commands/gp.py b/gpMgmt/bin/gppylib/commands/gp.py index 7a2b3a7bc1..904440b3a0 100644 --- a/gpMgmt/bin/gppylib/commands/gp.py +++ b/gpMgmt/bin/gppylib/commands/gp.py @@ -1215,16 +1215,31 @@ def get_gphome(): raise GpError('Environment Variable GPHOME not set') return gphome +''' +gprecoverseg, gpstart, gpstate, gpstop, gpaddmirror have -d option to give the coordinator data directory. +but its value was not used throughout the utilities. to fix this the best possible way is +to set and retrieve that set coordinator dir when we call get_coordinatordatadir(). +''' +option_coordinator_datadir = None +def set_coordinatordatadir(coordinator_datadir=None): + global option_coordinator_datadir + option_coordinator_datadir = coordinator_datadir ###### # Support both COORDINATOR_DATA_DIRECTORY and MASTER_DATA_DIRECTORY for backwards compatibility. # If both are set, the former is used and the latter is ignored. +# if -d <coordinator_datadir> is provided with utility, it will be prioritiese over other options. def get_coordinatordatadir(): - coordinator_datadir = os.environ.get('COORDINATOR_DATA_DIRECTORY') - if not coordinator_datadir: - coordinator_datadir = os.environ.get('MASTER_DATA_DIRECTORY') + if option_coordinator_datadir is not None: + coordinator_datadir = option_coordinator_datadir + else: + coordinator_datadir = os.environ.get('COORDINATOR_DATA_DIRECTORY') + if not coordinator_datadir: + coordinator_datadir = os.environ.get('MASTER_DATA_DIRECTORY') + if not coordinator_datadir: raise GpError("Environment Variable COORDINATOR_DATA_DIRECTORY not set!") + return coordinator_datadir ###### diff --git a/gpMgmt/bin/gppylib/mainUtils.py b/gpMgmt/bin/gppylib/mainUtils.py index b191c88156..5660fb61a7 100644 --- a/gpMgmt/bin/gppylib/mainUtils.py +++ b/gpMgmt/bin/gppylib/mainUtils.py @@ -267,6 +267,18 @@ def simple_main(createOptionParserFn, createCommandFn, mainOptions=None): def simple_main_internal(createOptionParserFn, createCommandFn, mainOptions): + + """ + if -d <coordinator_data_dir> option is provided in that case doing parsing after creating + lock file would not be a good idea therefore handling -d option before lock. + """ + parser = createOptionParserFn() + (parserOptions, parserArgs) = parser.parse_args() + + if parserOptions.ensure_value("coordinatorDataDirectory", None) is not None: + parserOptions.coordinator_data_directory = os.path.abspath(parserOptions.coordinatorDataDirectory) + gp.set_coordinatordatadir(parserOptions.coordinator_data_directory) + """ If caller specifies 'pidlockpath' in mainOptions then we manage the specified pid file within the COORDINATOR_DATA_DIRECTORY before proceeding @@ -285,13 +297,13 @@ def simple_main_internal(createOptionParserFn, createCommandFn, mainOptions): # at this point we have whatever lock we require try: - simple_main_locked(createOptionParserFn, createCommandFn, mainOptions) + simple_main_locked(parserOptions, parserArgs, createCommandFn, mainOptions) finally: if sml is not None: sml.release() -def simple_main_locked(createOptionParserFn, createCommandFn, mainOptions): +def simple_main_locked(parserOptions, parserArgs, createCommandFn, mainOptions): """ Not to be called externally -- use simple_main instead """ @@ -307,7 +319,6 @@ def simple_main_locked(createOptionParserFn, createCommandFn, mainOptions): parser = None forceQuiet = mainOptions is not None and mainOptions.get("forceQuietOutput") - options = None if mainOptions is not None and mainOptions.get("programNameOverride"): global gProgramName @@ -323,30 +334,24 @@ def simple_main_locked(createOptionParserFn, createCommandFn, mainOptions): hostname = unix.getLocalHostname() username = unix.getUserName() - parser = createOptionParserFn() - (options, args) = parser.parse_args() - if useHelperToolLogging: gplog.setup_helper_tool_logging(execname, hostname, username) else: gplog.setup_tool_logging(execname, hostname, username, - logdir=options.ensure_value("logfileDirectory", None), nonuser=nonuser) + logdir=parserOptions.ensure_value("logfileDirectory", None), nonuser=nonuser) if forceQuiet: gplog.quiet_stdout_logging() else: - if options.ensure_value("verbose", False): + if parserOptions.ensure_value("verbose", False): gplog.enable_verbose_logging() - if options.ensure_value("quiet", False): + if parserOptions.ensure_value("quiet", False): gplog.quiet_stdout_logging() - if options.ensure_value("coordinatorDataDirectory", None) is not None: - options.coordinator_data_directory = os.path.abspath(options.coordinatorDataDirectory) - if not suppressStartupLogMessage: logger.info("Starting %s with args: %s" % (gProgramName, ' '.join(sys.argv[1:]))) - commandObject = createCommandFn(options, args) + commandObject = createCommandFn(parserOptions, parserArgs) exitCode = commandObject.run() exit_status = exitCode @@ -368,10 +373,10 @@ def simple_main_locked(createOptionParserFn, createCommandFn, mainOptions): e.cmd.results.stderr)) exit_status = 2 except Exception as e: - if options is None: + if parserOptions is None: logger.exception("%s failed. exiting...", gProgramName) else: - if options.ensure_value("verbose", False): + if parserOptions.ensure_value("verbose", False): logger.exception("%s failed. exiting...", gProgramName) else: logger.fatal("%s failed. (Reason='%s') exiting..." % (gProgramName, e)) diff --git a/gpMgmt/bin/gppylib/system/environment.py b/gpMgmt/bin/gppylib/system/environment.py index 04635aeab5..d6724a9e19 100644 --- a/gpMgmt/bin/gppylib/system/environment.py +++ b/gpMgmt/bin/gppylib/system/environment.py @@ -29,7 +29,8 @@ class GpCoordinatorEnvironment: """ if coordinatorDataDir is None: self.__coordinatorDataDir = gp.get_coordinatordatadir() - else: self.__coordinatorDataDir = coordinatorDataDir + else: + self.__coordinatorDataDir = coordinatorDataDir logger.debug("Obtaining coordinator's port from coordinator data directory") pgconf_dict = pgconf.readfile(self.__coordinatorDataDir + "/postgresql.conf") diff --git a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature index 3ba2d27e97..cd9599a42a 100644 --- a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature +++ b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature @@ -230,6 +230,75 @@ Feature: gprecoverseg tests And all the segments are running And the segments are synchronized + Scenario: gprecoverseg runs with given coordinator data directory option + Given the database is running + And all the segments are running + And the segments are synchronized + And user stops all mirror processes + And user can start transactions + And "COORDINATOR_DATA_DIRECTORY" environment variable is not set + Then the user runs utility "gprecoverseg" with coordinator data directory and "-F -a" + And gprecoverseg should return a return code of 0 + And "COORDINATOR_DATA_DIRECTORY" environment variable should be restored + And all the segments are running + And the segments are synchronized + + Scenario: gprecoverseg priorities given coordinator data directory over env option + Given the database is running + And all the segments are running + And the segments are synchronized + And user stops all mirror processes + And user can start transactions + And the environment variable "COORDINATOR_DATA_DIRECTORY" is set to "/tmp/" + Then the user runs utility "gprecoverseg" with coordinator data directory and "-F -a" + And gprecoverseg should return a return code of 0 + And "COORDINATOR_DATA_DIRECTORY" environment variable should be restored + And all the segments are running + And the segments are synchronized + + Scenario: gprecoverseg differential recovery displays rsync progress to the user + Given the database is running + And all the segments are running + And the segments are synchronized + And all files in gpAdminLogs directory are deleted on all hosts in the cluster + And user stops all mirror processes + When user can start transactions + And the user runs "gprecoverseg --differential -a -s" + Then gprecoverseg should return a return code of 0 + And gprecoverseg should print "Initiating segment recovery. Upon completion, will start the successfully recovered segments" to stdout + And gprecoverseg should print "total size" to stdout for each mirror + And gprecoverseg should print "Segments successfully recovered" to stdout + And gpAdminLogs directory has no "pg_basebackup*" files on all segment hosts + And gpAdminLogs directory has no "pg_rewind*" files on all segment hosts + And gpAdminLogs directory has no "rsync*" files on all segment hosts + And gpAdminLogs directory has "gpsegrecovery*" files + And gpAdminLogs directory has "gpsegsetuprecovery*" files + And all the segments are running + And the segments are synchronized + And verify replication slot internal_wal_replication_slot is available on all the segments + And check segment conf: postgresql.conf + + Scenario: gprecoverseg does not display rsync progress to the user when --no-progress option is specified + Given the database is running + And all the segments are running + And the segments are synchronized + And all files in gpAdminLogs directory are deleted on all hosts in the cluster + And user stops all mirror processes + When user can start transactions + And the user runs "gprecoverseg --differential -a -s --no-progress" + Then gprecoverseg should return a return code of 0 + And gprecoverseg should print "Initiating segment recovery. Upon completion, will start the successfully recovered segments" to stdout + And gprecoverseg should not print "total size is .* speedup is .*" to stdout + And gprecoverseg should print "Segments successfully recovered" to stdout + And gpAdminLogs directory has no "pg_basebackup*" files on all segment hosts + And gpAdminLogs directory has no "pg_rewind*" files on all segment hosts + And gpAdminLogs directory has no "rsync*" files on all segment hosts + And gpAdminLogs directory has "gpsegrecovery*" files + And gpAdminLogs directory has "gpsegsetuprecovery*" files + And all the segments are running + And the segments are synchronized + And check segment conf: postgresql.conf + Scenario: When gprecoverseg incremental recovery uses pg_rewind to recover and an existing postmaster.pid on the killed primary segment corresponds to a non postgres process Given the database is running And all the segments are running diff --git a/gpMgmt/test/behave/mgmt_utils/gpstart.feature b/gpMgmt/test/behave/mgmt_utils/gpstart.feature index 138ae04785..c5c38dc15d 100644 --- a/gpMgmt/test/behave/mgmt_utils/gpstart.feature +++ b/gpMgmt/test/behave/mgmt_utils/gpstart.feature @@ -29,6 +29,32 @@ Feature: gpstart behave tests And gpstart should return a return code of 0 And all the segments are running + @demo_cluster + Scenario: gpstart runs with given coordinator data directory option + Given the database is running + And running postgres processes are saved in context + And the user runs "gpstop -a" + And gpstop should return a return code of 0 + And verify no postgres process is running on all hosts + And "COORDINATOR_DATA_DIRECTORY" environment variable is not set + Then the user runs utility "gpstart" with coordinator data directory and "-a" + And gpstart should return a return code of 0 + And "COORDINATOR_DATA_DIRECTORY" environment variable should be restored + And all the segments are running + + @demo_cluster + Scenario: gpstart priorities given coordinator data directory over env option + Given the database is running + And running postgres processes are saved in context + And the user runs "gpstop -a" + And gpstop should return a return code of 0 + And verify no postgres process is running on all hosts + And the environment variable "COORDINATOR_DATA_DIRECTORY" is set to "/tmp/" + Then the user runs utility "gpstart" with coordinator data directory and "-a" + And gpstart should return a return code of 0 + And "COORDINATOR_DATA_DIRECTORY" environment variable should be restored + And all the segments are running + @concourse_cluster @demo_cluster Scenario: gpstart starts even if a segment host is unreachable diff --git a/gpMgmt/test/behave/mgmt_utils/gpstate.feature b/gpMgmt/test/behave/mgmt_utils/gpstate.feature index 5cca6ec192..415fcc8eca 100644 --- a/gpMgmt/test/behave/mgmt_utils/gpstate.feature +++ b/gpMgmt/test/behave/mgmt_utils/gpstate.feature @@ -592,6 +592,55 @@ Feature: gpstate tests And the pg_log files on primary segments should not contain "connections to primary segments are not allowed" And the user drops log_timestamp table + Scenario: gpstate runs with given coordinator data directory option + Given the cluster is generated with "3" primaries only + And "COORDINATOR_DATA_DIRECTORY" environment variable is not set + Then the user runs utility "gpstate" with coordinator data directory and "-a -b" + And gpstate should return a return code of 0 + And gpstate output has rows with keys values + | Coordinator instance = Active | + | Coordinator standby = No coordinator standby configured | + | Total segment instance count from metadata = 3 | + | Primary Segment Status | + | Total primary segments = 3 | + | Total primary segment valid \(at coordinator\) = 3 | + | Total primary segment failures \(at coordinator\) = 0 | + | Total number of postmaster.pid files missing = 0 | + | Total number of postmaster.pid files found = 3 | + | Total number of postmaster.pid PIDs missing = 0 | + | Total number of postmaster.pid PIDs found = 3 | + | Total number of /tmp lock files missing = 0 | + | Total number of /tmp lock files found = 3 | + | Total number postmaster processes missing = 0 | + | Total number postmaster processes found = 3 | + | Mirror Segment Status | + | Mirrors not configured on this array + And "COORDINATOR_DATA_DIRECTORY" environment variable should be restored + + Scenario: gpstate priorities given coordinator data directory over env option + Given the cluster is generated with "3" primaries only + And the environment variable "COORDINATOR_DATA_DIRECTORY" is set to "/tmp/" + Then the user runs utility "gpstate" with coordinator data directory and "-a -b" + And gpstate should return a return code of 0 + And gpstate output has rows with keys values + | Coordinator instance = Active | + | Coordinator standby = No coordinator standby configured | + | Total segment instance count from metadata = 3 | + | Primary Segment Status | + | Total primary segments = 3 | + | Total primary segment valid \(at coordinator\) = 3 | + | Total primary segment failures \(at coordinator\) = 0 | + | Total number of postmaster.pid files missing = 0 | + | Total number of postmaster.pid files found = 3 | + | Total number of postmaster.pid PIDs missing = 0 | + | Total number of postmaster.pid PIDs found = 3 | + | Total number of /tmp lock files missing = 0 | + | Total number of /tmp lock files found = 3 | + | Total number postmaster processes missing = 0 | + | Total number postmaster processes found = 3 | + | Mirror Segment Status | + | Mirrors not configured on this array + And "COORDINATOR_DATA_DIRECTORY" environment variable should be restored ########################### @concourse_cluster tests ########################### # The @concourse_cluster tag denotes the scenario that requires a remote cluster diff --git a/gpMgmt/test/behave/mgmt_utils/gpstop.feature b/gpMgmt/test/behave/mgmt_utils/gpstop.feature index 7ff5b86ae7..49f6a61b06 100644 --- a/gpMgmt/test/behave/mgmt_utils/gpstop.feature +++ b/gpMgmt/test/behave/mgmt_utils/gpstop.feature @@ -8,6 +8,26 @@ Feature: gpstop behave tests When the user runs "gpstop -a" Then gpstop should return a return code of 0 + @demo_cluster + Scenario: gpstop runs with given coordinator data directory option + Given the database is running + And running postgres processes are saved in context + And "COORDINATOR_DATA_DIRECTORY" environment variable is not set + Then the user runs utility "gpstop" with coordinator data directory and "-a" + And gpstop should return a return code of 0 + And "COORDINATOR_DATA_DIRECTORY" environment variable should be restored + And verify no postgres process is running on all hosts + + @demo_cluster + Scenario: gpstop priorities given coordinator data directory over env option + Given the database is running + And running postgres processes are saved in context + And the environment variable "COORDINATOR_DATA_DIRECTORY" is set to "/tmp/" + Then the user runs utility "gpstop" with coordinator data directory and "-a" + And gpstop should return a return code of 0 + And "COORDINATOR_DATA_DIRECTORY" environment variable should be restored + And verify no postgres process is running on all hosts + @concourse_cluster @demo_cluster Scenario: when there are user connections gpstop waits to shutdown until user switches to fast mode diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py index ca7d35f63a..7b112ef29c 100644 --- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py +++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py @@ -1252,6 +1252,15 @@ def impl(context, options): context.execute_steps('''Then the user runs command "gpactivatestandby -a %s" from standby coordinator''' % options) context.standby_was_activated = True + +@given('the user runs utility "{utility}" with coordinator data directory and "{options}"') +@when('the user runs utility "{utility}" with coordinator data directory and "{options}"') +@then('the user runs utility "{utility}" with coordinator data directory and "{options}"') +def impl(context, utility, options): + cmd = "{} -d {} {}".format(utility, coordinator_data_dir, options) + context.execute_steps('''then the user runs command "%s"''' % cmd ) + + @then('gpintsystem logs should {contain} lines about running backout script') def impl(context, contain): string_to_find = 'Run command bash .*backout_gpinitsystem.* on coordinator to remove these changes$' --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
