Skip to content

Commit c4e0a80

Browse files
RMTTmy-ship-it
authored andcommitted
delete cgroup leaf dir only when use group-v2 (#16830)
delete cgroup leaf dir only when use group-v2. There is no leaf directory in gpdb cgroup when use cgroup v1, so the rmdir(leaf_path) will always return non-zero values, then the rmdir(path) will be ignored. When drop some resource groups, when corresponding cgroup dir cannot be removed because the rmdire(path) is not executed, this behavior will cause the failure of CI. This commit add some logic to check resource group version in deleteDir, when use group-v1, rmdir(leaf_path) will be ignored.
1 parent 723ee28 commit c4e0a80

1 file changed

Lines changed: 36 additions & 22 deletions

File tree

src/backend/utils/resgroup/cgroup.c

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -466,16 +466,20 @@ bool
466466
deleteDir(Oid group, CGroupComponentType component, const char *filename, bool unassign,
467467
void (*detachcgroup) (Oid group, CGroupComponentType component, int fd_dir))
468468
{
469-
470469
char path[MAX_CGROUP_PATHLEN];
471470
char leaf_path[MAX_CGROUP_PATHLEN];
472471
size_t path_size = sizeof(path);
473472

473+
bool is_v2 = Gp_resource_manager_policy == RESOURCE_MANAGER_POLICY_GROUP_V2;
474+
int path_cnt = 2;
475+
char *paths[2] = {leaf_path, path};
474476
int retry = unassign ? 0 : MAX_RETRY - 1;
475477
int fd_dir;
478+
int i;
476479

477480
buildPath(group, BASEDIR_GPDB, component, "", path, path_size);
478-
buildPath(group, BASEDIR_GPDB, component, CGROUPV2_LEAF_INDENTIFIER, leaf_path, path_size);
481+
if (is_v2)
482+
buildPath(group, BASEDIR_GPDB, component, CGROUPV2_LEAF_INDENTIFIER, leaf_path, path_size);
479483

480484
/*
481485
* To prevent race condition between multiple processes we require a dir
@@ -494,37 +498,47 @@ deleteDir(Oid group, CGroupComponentType component, const char *filename, bool u
494498
if (filename)
495499
writeInt64(group, BASEDIR_GPDB, component, filename, 0);
496500

501+
if (!unassign)
502+
detachcgroup = NULL;
503+
504+
i = is_v2 ? 0 : 1;
497505
while (++retry <= MAX_RETRY)
498506
{
499-
if (unassign)
507+
if (detachcgroup)
500508
detachcgroup(group, component, fd_dir);
501509

502-
if (rmdir(leaf_path) || rmdir(path))
510+
for (; i < path_cnt; ++i)
503511
{
504-
int err = errno;
505-
506-
if (err == EBUSY && unassign && retry < MAX_RETRY)
512+
if (rmdir(paths[i]))
507513
{
508-
elog(DEBUG1, "can't remove dir, will retry: %s: %s",
509-
path, strerror(err));
510-
pg_usleep(1000);
511-
continue;
514+
int err = errno;
515+
516+
if (err == EBUSY && unassign && retry < MAX_RETRY)
517+
{
518+
elog(DEBUG1, "can't remove dir, will retry: %s: %s",
519+
paths[i], strerror(err));
520+
pg_usleep(1000);
521+
break;
522+
}
523+
524+
if (err != ENOENT)
525+
{
526+
elog(DEBUG1, "can't remove dir, ignore the error: %s: %s",
527+
paths[i], strerror(err));
528+
goto error;
529+
}
512530
}
513531

514-
/*
515-
* we don't check for ENOENT again as we already acquired the lock
516-
* on this dir and the dir still exist at that time, so if then
517-
* it's removed by other processes then it's a bug.
518-
*/
519-
elog(DEBUG1, "can't remove dir, ignore the error: %s: %s",
520-
path, strerror(err));
532+
detachcgroup = NULL;
533+
534+
elog(DEBUG1, "cgroup dir '%s' removed", paths[i]);
521535
}
522-
break;
523-
}
524536

525-
if (retry <= MAX_RETRY)
526-
elog(DEBUG1, "cgroup dir '%s' removed", path);
537+
if (i >= path_cnt)
538+
break;
539+
}
527540

541+
error:
528542
/* close() also releases the lock */
529543
close(fd_dir);
530544

0 commit comments

Comments
 (0)