This is an automated email from the ASF dual-hosted git repository.
reshke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git
The following commit(s) were added to refs/heads/main by this push:
new 57063e1bf38 Fix gpconfig for resource group v2 on segment hosts (#1507)
57063e1bf38 is described below
commit 57063e1bf38469befd5996a5935dda2e1888db68
Author: zhangyue <[email protected]>
AuthorDate: Thu Jan 1 15:49:48 2026 +0800
Fix gpconfig for resource group v2 on segment hosts (#1507)
The gpcheckresgroupv2impl script failed on segments when running
`gpconfig -c gp_resource_manager -v "group-v2"`.
Root cause:
The validation script tried to connect to localhost:5432 on each host
to retrieve gp_resource_group_cgroup_parent. However, segment hosts
don't run the master database - causing "Connection refused" errors.
Fix:
- Retrieve gp_resource_group_cgroup_parent from master database in
gpresgroup.py before dispatching validation commands;
- Pass the cgroup_parent value to gpcheckresgroupv2impl via command
line argument (--cgroup-parent);
- Remove database connection logic from gpcheckresgroupv2impl;
---
gpMgmt/bin/gpcheckresgroupv2impl | 46 +++++++++++-----------------------------
gpMgmt/bin/gppylib/gpresgroup.py | 30 +++++++++++++++++++++++++-
2 files changed, 41 insertions(+), 35 deletions(-)
diff --git a/gpMgmt/bin/gpcheckresgroupv2impl b/gpMgmt/bin/gpcheckresgroupv2impl
index b71d2562628..e52931f7a1f 100755
--- a/gpMgmt/bin/gpcheckresgroupv2impl
+++ b/gpMgmt/bin/gpcheckresgroupv2impl
@@ -3,16 +3,9 @@
import os
import sys
+import argparse
from functools import reduce
-# Add the gppylib path to sys.path to import database connection modules
-try:
- from gppylib.db import dbconn
- from pg import DatabaseError
-except ImportError as err:
- sys.exit('Cannot import modules. Please check that you have sourced '
- 'cloudberry-env.sh. Detail: ' + str(err))
-
class ValidationException(Exception):
def __init__(self, message):
@@ -35,10 +28,10 @@ class CgroupValidation(object):
class CgroupValidationVersionTwo(CgroupValidation):
- def __init__(self):
+ def __init__(self, cgroup_parent=None):
self.mount_point = self.detect_cgroup_mount_point()
self.tab = {"r": os.R_OK, "w": os.W_OK, "x": os.X_OK, "f": os.F_OK}
- self.cgroup_parent = self.get_cgroup_parent()
+ self.cgroup_parent = cgroup_parent if cgroup_parent else "gpdb.service"
def validate_all(self):
"""
@@ -71,29 +64,6 @@ class CgroupValidationVersionTwo(CgroupValidation):
self.validate_permission(self.cgroup_parent + "/io.max", "rw")
- def get_cgroup_parent(self):
- """
- Get the cgroup parent directory from the database GUC parameter
- gp_resource_group_cgroup_parent. If unable to connect to database
- or retrieve the parameter, report error using die function.
- """
- try:
- dburl = dbconn.DbURL()
-
- with dbconn.connect(dburl, utility=True) as conn:
- # Query the GUC parameter value
- sql = "SHOW gp_resource_group_cgroup_parent"
- cursor = dbconn.query(conn, sql)
- result = cursor.fetchone()
-
- if result and result[0]:
- return result[0]
- else:
- self.die("failed to retrieve
gp_resource_group_cgroup_parent parameter from database")
-
- except Exception as e:
- self.die("failed to retrieve gp_resource_group_cgroup_parent
parameter: {}".format(str(e)))
-
def die(self, msg):
raise ValidationException("cgroup is not properly configured:
{}".format(msg))
@@ -118,7 +88,15 @@ class CgroupValidationVersionTwo(CgroupValidation):
if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Validate cgroup v2
configuration for resource groups')
+ parser.add_argument('--cgroup-parent',
+ dest='cgroup_parent',
+ default=None,
+ help='The cgroup parent directory name
(gp_resource_group_cgroup_parent value)')
+
+ args = parser.parse_args()
+
try:
- CgroupValidationVersionTwo().validate_all()
+
CgroupValidationVersionTwo(cgroup_parent=args.cgroup_parent).validate_all()
except ValidationException as e:
exit(e.message)
diff --git a/gpMgmt/bin/gppylib/gpresgroup.py b/gpMgmt/bin/gppylib/gpresgroup.py
index e66e44e7ffd..7c36659c277 100644
--- a/gpMgmt/bin/gppylib/gpresgroup.py
+++ b/gpMgmt/bin/gppylib/gpresgroup.py
@@ -8,6 +8,7 @@ from gppylib.commands.unix import *
from gppylib.commands.gp import *
from gppylib.gparray import GpArray
from gppylib.gplog import get_default_logger
+from gppylib.db import dbconn
class GpResGroup(object):
@@ -40,13 +41,40 @@ class GpResGroup(object):
@staticmethod
def validate_v2():
+ """
+ Validate cgroup v2 configuration on all hosts.
+
+ This method:
+ 1. Connects to the master database to retrieve
gp_resource_group_cgroup_parent
+ 2. Passes this value to gpcheckresgroupv2impl on each host via command
line
+ 3. Each host validates its local cgroup filesystem permissions
+ """
pool = base.WorkerPool()
gp_array = GpArray.initFromCatalog(dbconn.DbURL(), utility=True)
host_list = list(set(gp_array.get_hostlist(True)))
msg = None
+ # Get cgroup_parent value from master database
+ cgroup_parent = None
+ try:
+ # Connect to master database to get the GUC parameter
+ master_dburl = dbconn.DbURL()
+ with dbconn.connect(master_dburl, utility=True) as conn:
+ sql = "SHOW gp_resource_group_cgroup_parent"
+ cursor = dbconn.query(conn, sql)
+ result = cursor.fetchone()
+ if result and result[0]:
+ cgroup_parent = result[0]
+ else:
+ return "failed to retrieve gp_resource_group_cgroup_parent
parameter from master database"
+ except Exception as e:
+ return "failed to retrieve gp_resource_group_cgroup_parent
parameter: {}".format(str(e))
+
+ # Build command with cgroup_parent parameter
+ cmd_str = "gpcheckresgroupv2impl --cgroup-parent
'{}'".format(cgroup_parent)
+
for h in host_list:
- cmd = Command(h, "gpcheckresgroupv2impl", REMOTE, h)
+ cmd = Command(h, cmd_str, REMOTE, h)
pool.addCommand(cmd)
pool.join()
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]