This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit c4e0a80cd9d7ce9cc2a3d4f7e2da9438219cb354 Author: RMT <d.r...@outlook.com> AuthorDate: Tue Dec 12 22:49:00 2023 +0800 delete cgroup leaf dir only when use group-v2 (#16830) delete cgroup leaf dir only when use group-v2. There is no leaf directory in gpdb cgroup when use cgroup v1, so the rmdir(leaf_path) will always return non-zero values, then the rmdir(path) will be ignored. When drop some resource groups, when corresponding cgroup dir cannot be removed because the rmdire(path) is not executed, this behavior will cause the failure of CI. This commit add some logic to check resource group version in deleteDir, when use group-v1, rmdir(leaf_path) will be ignored. --- src/backend/utils/resgroup/cgroup.c | 58 +++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/src/backend/utils/resgroup/cgroup.c b/src/backend/utils/resgroup/cgroup.c index 839090f39e4..504c8c2fcba 100644 --- a/src/backend/utils/resgroup/cgroup.c +++ b/src/backend/utils/resgroup/cgroup.c @@ -466,16 +466,20 @@ bool deleteDir(Oid group, CGroupComponentType component, const char *filename, bool unassign, void (*detachcgroup) (Oid group, CGroupComponentType component, int fd_dir)) { - char path[MAX_CGROUP_PATHLEN]; char leaf_path[MAX_CGROUP_PATHLEN]; size_t path_size = sizeof(path); + bool is_v2 = Gp_resource_manager_policy == RESOURCE_MANAGER_POLICY_GROUP_V2; + int path_cnt = 2; + char *paths[2] = {leaf_path, path}; int retry = unassign ? 0 : MAX_RETRY - 1; int fd_dir; + int i; buildPath(group, BASEDIR_GPDB, component, "", path, path_size); - buildPath(group, BASEDIR_GPDB, component, CGROUPV2_LEAF_INDENTIFIER, leaf_path, path_size); + if (is_v2) + buildPath(group, BASEDIR_GPDB, component, CGROUPV2_LEAF_INDENTIFIER, leaf_path, path_size); /* * To prevent race condition between multiple processes we require a dir @@ -494,37 +498,47 @@ deleteDir(Oid group, CGroupComponentType component, const char *filename, bool u if (filename) writeInt64(group, BASEDIR_GPDB, component, filename, 0); + if (!unassign) + detachcgroup = NULL; + + i = is_v2 ? 0 : 1; while (++retry <= MAX_RETRY) { - if (unassign) + if (detachcgroup) detachcgroup(group, component, fd_dir); - if (rmdir(leaf_path) || rmdir(path)) + for (; i < path_cnt; ++i) { - int err = errno; - - if (err == EBUSY && unassign && retry < MAX_RETRY) + if (rmdir(paths[i])) { - elog(DEBUG1, "can't remove dir, will retry: %s: %s", - path, strerror(err)); - pg_usleep(1000); - continue; + int err = errno; + + if (err == EBUSY && unassign && retry < MAX_RETRY) + { + elog(DEBUG1, "can't remove dir, will retry: %s: %s", + paths[i], strerror(err)); + pg_usleep(1000); + break; + } + + if (err != ENOENT) + { + elog(DEBUG1, "can't remove dir, ignore the error: %s: %s", + paths[i], strerror(err)); + goto error; + } } - /* - * we don't check for ENOENT again as we already acquired the lock - * on this dir and the dir still exist at that time, so if then - * it's removed by other processes then it's a bug. - */ - elog(DEBUG1, "can't remove dir, ignore the error: %s: %s", - path, strerror(err)); + detachcgroup = NULL; + + elog(DEBUG1, "cgroup dir '%s' removed", paths[i]); } - break; - } - if (retry <= MAX_RETRY) - elog(DEBUG1, "cgroup dir '%s' removed", path); + if (i >= path_cnt) + break; + } +error: /* close() also releases the lock */ close(fd_dir); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cloudberry.apache.org For additional commands, e-mail: commits-h...@cloudberry.apache.org