This is an automated email from the ASF dual-hosted git repository.

yjhjstz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 5adbaa65073511925e191d8d3da83d212f4f4e5f
Author: Rakesh Sharma <[email protected]>
AuthorDate: Wed Sep 20 12:28:25 2023 +0530

    [7X]Fix utilities do not honor -d flag when COORDINATOR_DATA_DIRECTORY is 
not set. (#16433)
    
    * Fix: utilities do not honor -d flag when COORDINATOR_DATA_DIRECTORY is 
not set.
    
    Issue: Following are the gpMgmt utilities that do not honor the -d flag, 
when
    COORDINATOR_DATA_DIRECTORY is not set.
    1. gpstart
    2. gpstop
    3. gpstatus
    4. gprecoverseg
    5. gpaddmirror
    
    RCA: to get the coordinator data directory gp.getcoordinator_datadir()
    function is called from the above-listed utilities. the function does not
    have any provision to return the coordinator data directory which is
    provided with the -d flag. currently, it looks for 
COORDINATOR_DATA_DIRECTORY
    and MASTER_DATA_DIRECTORY env variable.
    
        also in some of the utilities we were creating lock files before
    parsing the provided options which looks like the design flow that was
    causing the utilities to crash when looking for coordinator data
    directory.
    
    Fix: Added a global flag which holds the data directory provided with -d
    option. so when we run the utility and do parsing it sets the flag with
    the provided datadirectory and the same will be returned when we call
    gp.gp.getcoordinator_datadir().
    
    Test:
    Added behave test cases to use the provided data directory if 
COORDINATOR_DATA_DIRECTORY
    is not set.
    Added behave test case to check if the provided coordinator data directory
    is preferred over the already set coordinator_data_dir env variable.
    it is tested by setting a wrong COORDINATOR_DATA_DIRECTORY env variable
    and when we run the utility with the correct data directory using the -d 
option then
    the utility should execute successfully.
---
 gpMgmt/bin/gppylib/commands/gp.py                  | 21 ++++++-
 gpMgmt/bin/gppylib/mainUtils.py                    | 35 ++++++-----
 gpMgmt/bin/gppylib/system/environment.py           |  3 +-
 gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature | 69 ++++++++++++++++++++++
 gpMgmt/test/behave/mgmt_utils/gpstart.feature      | 26 ++++++++
 gpMgmt/test/behave/mgmt_utils/gpstate.feature      | 49 +++++++++++++++
 gpMgmt/test/behave/mgmt_utils/gpstop.feature       | 20 +++++++
 gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py  |  9 +++
 8 files changed, 213 insertions(+), 19 deletions(-)

diff --git a/gpMgmt/bin/gppylib/commands/gp.py 
b/gpMgmt/bin/gppylib/commands/gp.py
index 7a2b3a7bc1..904440b3a0 100644
--- a/gpMgmt/bin/gppylib/commands/gp.py
+++ b/gpMgmt/bin/gppylib/commands/gp.py
@@ -1215,16 +1215,31 @@ def get_gphome():
         raise GpError('Environment Variable GPHOME not set')
     return gphome
 
+'''
+gprecoverseg, gpstart, gpstate, gpstop, gpaddmirror have -d option to give the 
coordinator data directory.
+but its value was not used throughout the utilities. to fix this the best 
possible way is
+to set and retrieve that set coordinator dir when we call 
get_coordinatordatadir().
+'''
+option_coordinator_datadir = None
+def set_coordinatordatadir(coordinator_datadir=None):
+    global option_coordinator_datadir
+    option_coordinator_datadir = coordinator_datadir
 
 ######
 # Support both COORDINATOR_DATA_DIRECTORY and MASTER_DATA_DIRECTORY for 
backwards compatibility.
 # If both are set, the former is used and the latter is ignored.
+# if -d <coordinator_datadir> is provided with utility, it will be prioritiese 
over other options.
 def get_coordinatordatadir():
-    coordinator_datadir = os.environ.get('COORDINATOR_DATA_DIRECTORY')
-    if not coordinator_datadir:
-        coordinator_datadir = os.environ.get('MASTER_DATA_DIRECTORY')
+    if option_coordinator_datadir is not None:
+        coordinator_datadir = option_coordinator_datadir
+    else:
+        coordinator_datadir = os.environ.get('COORDINATOR_DATA_DIRECTORY')
+        if not coordinator_datadir:
+            coordinator_datadir = os.environ.get('MASTER_DATA_DIRECTORY')
+
     if not coordinator_datadir:
         raise GpError("Environment Variable COORDINATOR_DATA_DIRECTORY not 
set!")
+
     return coordinator_datadir
 
 ######
diff --git a/gpMgmt/bin/gppylib/mainUtils.py b/gpMgmt/bin/gppylib/mainUtils.py
index b191c88156..5660fb61a7 100644
--- a/gpMgmt/bin/gppylib/mainUtils.py
+++ b/gpMgmt/bin/gppylib/mainUtils.py
@@ -267,6 +267,18 @@ def simple_main(createOptionParserFn, createCommandFn, 
mainOptions=None):
 
 
 def simple_main_internal(createOptionParserFn, createCommandFn, mainOptions):
+
+    """
+    if -d <coordinator_data_dir> option is provided in that case doing parsing 
after creating
+    lock file would not be a good idea therefore handling -d option before 
lock.
+    """
+    parser = createOptionParserFn()
+    (parserOptions, parserArgs) = parser.parse_args()
+
+    if parserOptions.ensure_value("coordinatorDataDirectory", None) is not 
None:
+        parserOptions.coordinator_data_directory = 
os.path.abspath(parserOptions.coordinatorDataDirectory)
+        gp.set_coordinatordatadir(parserOptions.coordinator_data_directory)
+
     """
     If caller specifies 'pidlockpath' in mainOptions then we manage the
     specified pid file within the COORDINATOR_DATA_DIRECTORY before proceeding
@@ -285,13 +297,13 @@ def simple_main_internal(createOptionParserFn, 
createCommandFn, mainOptions):
 
     # at this point we have whatever lock we require
     try:
-        simple_main_locked(createOptionParserFn, createCommandFn, mainOptions)
+        simple_main_locked(parserOptions, parserArgs, createCommandFn, 
mainOptions)
     finally:
         if sml is not None:
             sml.release()
 
 
-def simple_main_locked(createOptionParserFn, createCommandFn, mainOptions):
+def simple_main_locked(parserOptions, parserArgs, createCommandFn, 
mainOptions):
     """
     Not to be called externally -- use simple_main instead
     """
@@ -307,7 +319,6 @@ def simple_main_locked(createOptionParserFn, 
createCommandFn, mainOptions):
     parser = None
 
     forceQuiet = mainOptions is not None and 
mainOptions.get("forceQuietOutput")
-    options = None
 
     if mainOptions is not None and mainOptions.get("programNameOverride"):
         global gProgramName
@@ -323,30 +334,24 @@ def simple_main_locked(createOptionParserFn, 
createCommandFn, mainOptions):
         hostname = unix.getLocalHostname()
         username = unix.getUserName()
 
-        parser = createOptionParserFn()
-        (options, args) = parser.parse_args()
-
         if useHelperToolLogging:
             gplog.setup_helper_tool_logging(execname, hostname, username)
         else:
             gplog.setup_tool_logging(execname, hostname, username,
-                                     
logdir=options.ensure_value("logfileDirectory", None), nonuser=nonuser)
+                                     
logdir=parserOptions.ensure_value("logfileDirectory", None), nonuser=nonuser)
 
         if forceQuiet:
             gplog.quiet_stdout_logging()
         else:
-            if options.ensure_value("verbose", False):
+            if parserOptions.ensure_value("verbose", False):
                 gplog.enable_verbose_logging()
-            if options.ensure_value("quiet", False):
+            if parserOptions.ensure_value("quiet", False):
                 gplog.quiet_stdout_logging()
 
-        if options.ensure_value("coordinatorDataDirectory", None) is not None:
-            options.coordinator_data_directory = 
os.path.abspath(options.coordinatorDataDirectory)
-
         if not suppressStartupLogMessage:
             logger.info("Starting %s with args: %s" % (gProgramName, ' 
'.join(sys.argv[1:])))
 
-        commandObject = createCommandFn(options, args)
+        commandObject = createCommandFn(parserOptions, parserArgs)
         exitCode = commandObject.run()
         exit_status = exitCode
 
@@ -368,10 +373,10 @@ def simple_main_locked(createOptionParserFn, 
createCommandFn, mainOptions):
                       e.cmd.results.stderr))
         exit_status = 2
     except Exception as e:
-        if options is None:
+        if parserOptions is None:
             logger.exception("%s failed.  exiting...", gProgramName)
         else:
-            if options.ensure_value("verbose", False):
+            if parserOptions.ensure_value("verbose", False):
                 logger.exception("%s failed.  exiting...", gProgramName)
             else:
                 logger.fatal("%s failed. (Reason='%s') exiting..." % 
(gProgramName, e))
diff --git a/gpMgmt/bin/gppylib/system/environment.py 
b/gpMgmt/bin/gppylib/system/environment.py
index 04635aeab5..d6724a9e19 100644
--- a/gpMgmt/bin/gppylib/system/environment.py
+++ b/gpMgmt/bin/gppylib/system/environment.py
@@ -29,7 +29,8 @@ class GpCoordinatorEnvironment:
         """
         if coordinatorDataDir is None:
             self.__coordinatorDataDir = gp.get_coordinatordatadir()
-        else: self.__coordinatorDataDir = coordinatorDataDir
+        else:
+            self.__coordinatorDataDir = coordinatorDataDir
 
         logger.debug("Obtaining coordinator's port from coordinator data 
directory")
         pgconf_dict = pgconf.readfile(self.__coordinatorDataDir + 
"/postgresql.conf")
diff --git a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature 
b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature
index 3ba2d27e97..cd9599a42a 100644
--- a/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature
+++ b/gpMgmt/test/behave/mgmt_utils/gprecoverseg.feature
@@ -230,6 +230,75 @@ Feature: gprecoverseg tests
         And all the segments are running
         And the segments are synchronized
 
+    Scenario: gprecoverseg runs with given coordinator data directory option
+        Given the database is running
+          And all the segments are running
+          And the segments are synchronized
+          And user stops all mirror processes
+          And user can start transactions
+          And "COORDINATOR_DATA_DIRECTORY" environment variable is not set
+         Then the user runs utility "gprecoverseg" with coordinator data 
directory and "-F -a"
+          And gprecoverseg should return a return code of 0
+          And "COORDINATOR_DATA_DIRECTORY" environment variable should be 
restored
+          And all the segments are running
+          And the segments are synchronized
+
+    Scenario: gprecoverseg priorities given coordinator data directory over 
env option
+        Given the database is running
+          And all the segments are running
+          And the segments are synchronized
+          And user stops all mirror processes
+          And user can start transactions
+          And the environment variable "COORDINATOR_DATA_DIRECTORY" is set to 
"/tmp/"
+         Then the user runs utility "gprecoverseg" with coordinator data 
directory and "-F -a"
+          And gprecoverseg should return a return code of 0
+          And "COORDINATOR_DATA_DIRECTORY" environment variable should be 
restored
+          And all the segments are running
+          And the segments are synchronized
+
+    Scenario: gprecoverseg differential recovery displays rsync progress to 
the user
+        Given the database is running
+        And all the segments are running
+        And the segments are synchronized
+        And all files in gpAdminLogs directory are deleted on all hosts in the 
cluster
+        And user stops all mirror processes
+        When user can start transactions
+        And the user runs "gprecoverseg --differential -a -s"
+        Then gprecoverseg should return a return code of 0
+        And gprecoverseg should print "Initiating segment recovery. Upon 
completion, will start the successfully recovered segments" to stdout
+        And gprecoverseg should print "total size" to stdout for each mirror
+        And gprecoverseg should print "Segments successfully recovered" to 
stdout
+        And gpAdminLogs directory has no "pg_basebackup*" files on all segment 
hosts
+        And gpAdminLogs directory has no "pg_rewind*" files on all segment 
hosts
+        And gpAdminLogs directory has no "rsync*" files on all segment hosts
+        And gpAdminLogs directory has "gpsegrecovery*" files
+        And gpAdminLogs directory has "gpsegsetuprecovery*" files
+        And all the segments are running
+        And the segments are synchronized
+        And verify replication slot internal_wal_replication_slot is available 
on all the segments
+        And check segment conf: postgresql.conf
+
+    Scenario: gprecoverseg does not display rsync progress to the user when 
--no-progress option is specified
+        Given the database is running
+        And all the segments are running
+        And the segments are synchronized
+        And all files in gpAdminLogs directory are deleted on all hosts in the 
cluster
+        And user stops all mirror processes
+        When user can start transactions
+        And the user runs "gprecoverseg --differential -a -s --no-progress"
+        Then gprecoverseg should return a return code of 0
+        And gprecoverseg should print "Initiating segment recovery. Upon 
completion, will start the successfully recovered segments" to stdout
+        And gprecoverseg should not print "total size is .*  speedup is .*" to 
stdout
+        And gprecoverseg should print "Segments successfully recovered" to 
stdout
+        And gpAdminLogs directory has no "pg_basebackup*" files on all segment 
hosts
+        And gpAdminLogs directory has no "pg_rewind*" files on all segment 
hosts
+        And gpAdminLogs directory has no "rsync*" files on all segment hosts
+        And gpAdminLogs directory has "gpsegrecovery*" files
+        And gpAdminLogs directory has "gpsegsetuprecovery*" files
+        And all the segments are running
+        And the segments are synchronized
+        And check segment conf: postgresql.conf
+
     Scenario: When gprecoverseg incremental recovery uses pg_rewind to recover 
and an existing postmaster.pid on the killed primary segment corresponds to a 
non postgres process
         Given the database is running
         And all the segments are running
diff --git a/gpMgmt/test/behave/mgmt_utils/gpstart.feature 
b/gpMgmt/test/behave/mgmt_utils/gpstart.feature
index 138ae04785..c5c38dc15d 100644
--- a/gpMgmt/test/behave/mgmt_utils/gpstart.feature
+++ b/gpMgmt/test/behave/mgmt_utils/gpstart.feature
@@ -29,6 +29,32 @@ Feature: gpstart behave tests
           And gpstart should return a return code of 0
           And all the segments are running
 
+    @demo_cluster
+    Scenario: gpstart runs with given coordinator data directory option
+        Given the database is running
+          And running postgres processes are saved in context
+          And the user runs "gpstop -a"
+          And gpstop should return a return code of 0
+          And verify no postgres process is running on all hosts
+          And "COORDINATOR_DATA_DIRECTORY" environment variable is not set
+         Then the user runs utility "gpstart" with coordinator data directory 
and "-a"
+          And gpstart should return a return code of 0
+          And "COORDINATOR_DATA_DIRECTORY" environment variable should be 
restored
+          And all the segments are running
+
+    @demo_cluster
+    Scenario: gpstart priorities given coordinator data directory over env 
option
+        Given the database is running
+          And running postgres processes are saved in context
+          And the user runs "gpstop -a"
+          And gpstop should return a return code of 0
+          And verify no postgres process is running on all hosts
+          And the environment variable "COORDINATOR_DATA_DIRECTORY" is set to 
"/tmp/"
+         Then the user runs utility "gpstart" with coordinator data directory 
and "-a"
+          And gpstart should return a return code of 0
+          And "COORDINATOR_DATA_DIRECTORY" environment variable should be 
restored
+          And all the segments are running
+
     @concourse_cluster
     @demo_cluster
     Scenario: gpstart starts even if a segment host is unreachable
diff --git a/gpMgmt/test/behave/mgmt_utils/gpstate.feature 
b/gpMgmt/test/behave/mgmt_utils/gpstate.feature
index 5cca6ec192..415fcc8eca 100644
--- a/gpMgmt/test/behave/mgmt_utils/gpstate.feature
+++ b/gpMgmt/test/behave/mgmt_utils/gpstate.feature
@@ -592,6 +592,55 @@ Feature: gpstate tests
         And the pg_log files on primary segments should not contain 
"connections to primary segments are not allowed"
         And the user drops log_timestamp table
 
+    Scenario: gpstate runs with given coordinator data directory option
+        Given the cluster is generated with "3" primaries only
+         And "COORDINATOR_DATA_DIRECTORY" environment variable is not set
+        Then the user runs utility "gpstate" with coordinator data directory 
and "-a -b"
+         And gpstate should return a return code of 0
+         And gpstate output has rows with keys values
+            | Coordinator instance                              = Active       
                     |
+            | Coordinator standby                               = No 
coordinator standby configured |
+            | Total segment instance count from metadata        = 3            
                     |
+            | Primary Segment Status                                           
                     |
+            | Total primary segments                            = 3            
                     |
+            | Total primary segment valid \(at coordinator\)    = 3            
                     |
+            | Total primary segment failures \(at coordinator\) = 0            
                     |
+            | Total number of postmaster.pid files missing      = 0            
                     |
+            | Total number of postmaster.pid files found        = 3            
                     |
+            | Total number of postmaster.pid PIDs missing       = 0            
                     |
+            | Total number of postmaster.pid PIDs found         = 3            
                     |
+            | Total number of /tmp lock files missing           = 0            
                     |
+            | Total number of /tmp lock files found             = 3            
                     |
+            | Total number postmaster processes missing         = 0            
                     |
+            | Total number postmaster processes found           = 3            
                     |
+            | Mirror Segment Status                                            
                     |
+            | Mirrors not configured on this array
+         And "COORDINATOR_DATA_DIRECTORY" environment variable should be 
restored
+
+    Scenario: gpstate priorities given coordinator data directory over env 
option
+        Given the cluster is generated with "3" primaries only
+          And the environment variable "COORDINATOR_DATA_DIRECTORY" is set to 
"/tmp/"
+        Then the user runs utility "gpstate" with coordinator data directory 
and "-a -b"
+         And gpstate should return a return code of 0
+         And gpstate output has rows with keys values
+            | Coordinator instance                              = Active       
                     |
+            | Coordinator standby                               = No 
coordinator standby configured |
+            | Total segment instance count from metadata        = 3            
                     |
+            | Primary Segment Status                                           
                     |
+            | Total primary segments                            = 3            
                     |
+            | Total primary segment valid \(at coordinator\)    = 3            
                     |
+            | Total primary segment failures \(at coordinator\) = 0            
                     |
+            | Total number of postmaster.pid files missing      = 0            
                     |
+            | Total number of postmaster.pid files found        = 3            
                     |
+            | Total number of postmaster.pid PIDs missing       = 0            
                     |
+            | Total number of postmaster.pid PIDs found         = 3            
                     |
+            | Total number of /tmp lock files missing           = 0            
                     |
+            | Total number of /tmp lock files found             = 3            
                     |
+            | Total number postmaster processes missing         = 0            
                     |
+            | Total number postmaster processes found           = 3            
                     |
+            | Mirror Segment Status                                            
                     |
+            | Mirrors not configured on this array
+        And "COORDINATOR_DATA_DIRECTORY" environment variable should be 
restored
 
 ########################### @concourse_cluster tests 
###########################
 # The @concourse_cluster tag denotes the scenario that requires a remote 
cluster
diff --git a/gpMgmt/test/behave/mgmt_utils/gpstop.feature 
b/gpMgmt/test/behave/mgmt_utils/gpstop.feature
index 7ff5b86ae7..49f6a61b06 100644
--- a/gpMgmt/test/behave/mgmt_utils/gpstop.feature
+++ b/gpMgmt/test/behave/mgmt_utils/gpstop.feature
@@ -8,6 +8,26 @@ Feature: gpstop behave tests
          When the user runs "gpstop -a"
          Then gpstop should return a return code of 0
 
+    @demo_cluster
+    Scenario: gpstop runs with given coordinator data directory option
+        Given the database is running
+          And running postgres processes are saved in context
+          And "COORDINATOR_DATA_DIRECTORY" environment variable is not set
+         Then the user runs utility "gpstop" with coordinator data directory 
and "-a"
+          And gpstop should return a return code of 0
+          And "COORDINATOR_DATA_DIRECTORY" environment variable should be 
restored
+          And verify no postgres process is running on all hosts
+
+    @demo_cluster
+    Scenario: gpstop priorities given coordinator data directory over env 
option
+        Given the database is running
+          And running postgres processes are saved in context
+          And the environment variable "COORDINATOR_DATA_DIRECTORY" is set to 
"/tmp/"
+         Then the user runs utility "gpstop" with coordinator data directory 
and "-a"
+          And gpstop should return a return code of 0
+          And "COORDINATOR_DATA_DIRECTORY" environment variable should be 
restored
+          And verify no postgres process is running on all hosts
+
     @concourse_cluster
     @demo_cluster
     Scenario: when there are user connections gpstop waits to shutdown until 
user switches to fast mode
diff --git a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py 
b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py
index ca7d35f63a..7b112ef29c 100644
--- a/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py
+++ b/gpMgmt/test/behave/mgmt_utils/steps/mgmt_utils.py
@@ -1252,6 +1252,15 @@ def impl(context, options):
     context.execute_steps('''Then the user runs command "gpactivatestandby -a 
%s" from standby coordinator''' % options)
     context.standby_was_activated = True
 
+
+@given('the user runs utility "{utility}" with coordinator data directory and 
"{options}"')
+@when('the user runs utility "{utility}" with coordinator data directory and 
"{options}"')
+@then('the user runs utility "{utility}" with coordinator data directory and 
"{options}"')
+def impl(context, utility, options):
+    cmd = "{} -d {} {}".format(utility, coordinator_data_dir, options)
+    context.execute_steps('''then the user runs command "%s"''' % cmd )
+
+
 @then('gpintsystem logs should {contain} lines about running backout script')
 def impl(context, contain):
     string_to_find = 'Run command bash .*backout_gpinitsystem.* on coordinator 
to remove these changes$'


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to