This is an automated email from the ASF dual-hosted git repository.

reshke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


The following commit(s) were added to refs/heads/main by this push:
     new 57063e1bf38 Fix gpconfig for resource group v2 on segment hosts (#1507)
57063e1bf38 is described below

commit 57063e1bf38469befd5996a5935dda2e1888db68
Author: zhangyue <[email protected]>
AuthorDate: Thu Jan 1 15:49:48 2026 +0800

    Fix gpconfig for resource group v2 on segment hosts (#1507)
    
    The gpcheckresgroupv2impl script failed on segments when running
    `gpconfig -c gp_resource_manager -v "group-v2"`.
    
    Root cause:
    The validation script tried to connect to localhost:5432 on each host
    to retrieve gp_resource_group_cgroup_parent. However, segment hosts
    don't run the master database - causing "Connection refused" errors.
    
    Fix:
    - Retrieve gp_resource_group_cgroup_parent from master database in
      gpresgroup.py before dispatching validation commands;
    - Pass the cgroup_parent value to gpcheckresgroupv2impl via command
      line argument (--cgroup-parent);
    - Remove database connection logic from gpcheckresgroupv2impl;
---
 gpMgmt/bin/gpcheckresgroupv2impl | 46 +++++++++++-----------------------------
 gpMgmt/bin/gppylib/gpresgroup.py | 30 +++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/gpMgmt/bin/gpcheckresgroupv2impl b/gpMgmt/bin/gpcheckresgroupv2impl
index b71d2562628..e52931f7a1f 100755
--- a/gpMgmt/bin/gpcheckresgroupv2impl
+++ b/gpMgmt/bin/gpcheckresgroupv2impl
@@ -3,16 +3,9 @@
 
 import os
 import sys
+import argparse
 from functools import reduce
 
-# Add the gppylib path to sys.path to import database connection modules
-try:
-    from gppylib.db import dbconn
-    from pg import DatabaseError
-except ImportError as err:
-    sys.exit('Cannot import modules. Please check that you have sourced '
-             'cloudberry-env.sh. Detail: ' + str(err))
-
 
 class ValidationException(Exception):
     def __init__(self, message):
@@ -35,10 +28,10 @@ class CgroupValidation(object):
 
 
 class CgroupValidationVersionTwo(CgroupValidation):
-    def __init__(self):
+    def __init__(self, cgroup_parent=None):
         self.mount_point = self.detect_cgroup_mount_point()
         self.tab = {"r": os.R_OK, "w": os.W_OK, "x": os.X_OK, "f": os.F_OK}
-        self.cgroup_parent = self.get_cgroup_parent()
+        self.cgroup_parent = cgroup_parent if cgroup_parent else "gpdb.service"
 
     def validate_all(self):
         """
@@ -71,29 +64,6 @@ class CgroupValidationVersionTwo(CgroupValidation):
 
         self.validate_permission(self.cgroup_parent + "/io.max", "rw")
 
-    def get_cgroup_parent(self):
-        """
-        Get the cgroup parent directory from the database GUC parameter
-        gp_resource_group_cgroup_parent. If unable to connect to database
-        or retrieve the parameter, report error using die function.
-        """
-        try:
-            dburl = dbconn.DbURL()
-
-            with dbconn.connect(dburl, utility=True) as conn:
-                # Query the GUC parameter value
-                sql = "SHOW gp_resource_group_cgroup_parent"
-                cursor = dbconn.query(conn, sql)
-                result = cursor.fetchone()
-
-                if result and result[0]:
-                    return result[0]
-                else:
-                    self.die("failed to retrieve 
gp_resource_group_cgroup_parent parameter from database")
- 
-        except Exception as e:
-            self.die("failed to retrieve gp_resource_group_cgroup_parent 
parameter: {}".format(str(e)))
-
     def die(self, msg):
         raise ValidationException("cgroup is not properly configured: 
{}".format(msg))
 
@@ -118,7 +88,15 @@ class CgroupValidationVersionTwo(CgroupValidation):
 
 
 if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Validate cgroup v2 
configuration for resource groups')
+    parser.add_argument('--cgroup-parent',
+                        dest='cgroup_parent',
+                        default=None,
+                        help='The cgroup parent directory name 
(gp_resource_group_cgroup_parent value)')
+
+    args = parser.parse_args()
+
     try:
-        CgroupValidationVersionTwo().validate_all()
+        
CgroupValidationVersionTwo(cgroup_parent=args.cgroup_parent).validate_all()
     except ValidationException as e:
         exit(e.message)
diff --git a/gpMgmt/bin/gppylib/gpresgroup.py b/gpMgmt/bin/gppylib/gpresgroup.py
index e66e44e7ffd..7c36659c277 100644
--- a/gpMgmt/bin/gppylib/gpresgroup.py
+++ b/gpMgmt/bin/gppylib/gpresgroup.py
@@ -8,6 +8,7 @@ from gppylib.commands.unix import *
 from gppylib.commands.gp import *
 from gppylib.gparray import GpArray
 from gppylib.gplog import get_default_logger
+from gppylib.db import dbconn
 
 
 class GpResGroup(object):
@@ -40,13 +41,40 @@ class GpResGroup(object):
 
     @staticmethod
     def validate_v2():
+        """
+        Validate cgroup v2 configuration on all hosts.
+
+        This method:
+        1. Connects to the master database to retrieve 
gp_resource_group_cgroup_parent
+        2. Passes this value to gpcheckresgroupv2impl on each host via command 
line
+        3. Each host validates its local cgroup filesystem permissions
+        """
         pool = base.WorkerPool()
         gp_array = GpArray.initFromCatalog(dbconn.DbURL(), utility=True)
         host_list = list(set(gp_array.get_hostlist(True)))
         msg = None
 
+        # Get cgroup_parent value from master database
+        cgroup_parent = None
+        try:
+            # Connect to master database to get the GUC parameter
+            master_dburl = dbconn.DbURL()
+            with dbconn.connect(master_dburl, utility=True) as conn:
+                sql = "SHOW gp_resource_group_cgroup_parent"
+                cursor = dbconn.query(conn, sql)
+                result = cursor.fetchone()
+                if result and result[0]:
+                    cgroup_parent = result[0]
+                else:
+                    return "failed to retrieve gp_resource_group_cgroup_parent 
parameter from master database"
+        except Exception as e:
+            return "failed to retrieve gp_resource_group_cgroup_parent 
parameter: {}".format(str(e))
+
+        # Build command with cgroup_parent parameter
+        cmd_str = "gpcheckresgroupv2impl --cgroup-parent 
'{}'".format(cgroup_parent)
+
         for h in host_list:
-            cmd = Command(h, "gpcheckresgroupv2impl", REMOTE, h)
+            cmd = Command(h, cmd_str, REMOTE, h)
             pool.addCommand(cmd)
         pool.join()
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to