Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 12 additions & 34 deletions gpMgmt/bin/gpcheckresgroupv2impl
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,9 @@

import os
import sys
import argparse
from functools import reduce

# Add the gppylib path to sys.path to import database connection modules
try:
from gppylib.db import dbconn
from pg import DatabaseError
except ImportError as err:
sys.exit('Cannot import modules. Please check that you have sourced '
'cloudberry-env.sh. Detail: ' + str(err))


class ValidationException(Exception):
def __init__(self, message):
Expand All @@ -35,10 +28,10 @@ class CgroupValidation(object):


class CgroupValidationVersionTwo(CgroupValidation):
def __init__(self):
def __init__(self, cgroup_parent=None):
self.mount_point = self.detect_cgroup_mount_point()
self.tab = {"r": os.R_OK, "w": os.W_OK, "x": os.X_OK, "f": os.F_OK}
self.cgroup_parent = self.get_cgroup_parent()
self.cgroup_parent = cgroup_parent if cgroup_parent else "gpdb.service"

def validate_all(self):
"""
Expand Down Expand Up @@ -71,29 +64,6 @@ class CgroupValidationVersionTwo(CgroupValidation):

self.validate_permission(self.cgroup_parent + "/io.max", "rw")

def get_cgroup_parent(self):
"""
Get the cgroup parent directory from the database GUC parameter
gp_resource_group_cgroup_parent. If unable to connect to database
or retrieve the parameter, report error using die function.
"""
try:
dburl = dbconn.DbURL()

with dbconn.connect(dburl, utility=True) as conn:
# Query the GUC parameter value
sql = "SHOW gp_resource_group_cgroup_parent"
cursor = dbconn.query(conn, sql)
result = cursor.fetchone()

if result and result[0]:
return result[0]
else:
self.die("failed to retrieve gp_resource_group_cgroup_parent parameter from database")

except Exception as e:
self.die("failed to retrieve gp_resource_group_cgroup_parent parameter: {}".format(str(e)))

def die(self, msg):
raise ValidationException("cgroup is not properly configured: {}".format(msg))

Expand All @@ -118,7 +88,15 @@ class CgroupValidationVersionTwo(CgroupValidation):


if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Validate cgroup v2 configuration for resource groups')
parser.add_argument('--cgroup-parent',
dest='cgroup_parent',
default=None,
help='The cgroup parent directory name (gp_resource_group_cgroup_parent value)')

args = parser.parse_args()

try:
CgroupValidationVersionTwo().validate_all()
CgroupValidationVersionTwo(cgroup_parent=args.cgroup_parent).validate_all()
except ValidationException as e:
exit(e.message)
30 changes: 29 additions & 1 deletion gpMgmt/bin/gppylib/gpresgroup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from gppylib.commands.gp import *
from gppylib.gparray import GpArray
from gppylib.gplog import get_default_logger
from gppylib.db import dbconn


class GpResGroup(object):
Expand Down Expand Up @@ -40,13 +41,40 @@ def validate():

@staticmethod
def validate_v2():
"""
Validate cgroup v2 configuration on all hosts.

This method:
1. Connects to the master database to retrieve gp_resource_group_cgroup_parent
2. Passes this value to gpcheckresgroupv2impl on each host via command line
3. Each host validates its local cgroup filesystem permissions
"""
pool = base.WorkerPool()
gp_array = GpArray.initFromCatalog(dbconn.DbURL(), utility=True)
host_list = list(set(gp_array.get_hostlist(True)))
msg = None

# Get cgroup_parent value from master database
cgroup_parent = None
try:
# Connect to master database to get the GUC parameter
master_dburl = dbconn.DbURL()
with dbconn.connect(master_dburl, utility=True) as conn:
sql = "SHOW gp_resource_group_cgroup_parent"
cursor = dbconn.query(conn, sql)
result = cursor.fetchone()
if result and result[0]:
cgroup_parent = result[0]
else:
return "failed to retrieve gp_resource_group_cgroup_parent parameter from master database"
except Exception as e:
return "failed to retrieve gp_resource_group_cgroup_parent parameter: {}".format(str(e))

# Build command with cgroup_parent parameter
cmd_str = "gpcheckresgroupv2impl --cgroup-parent '{}'".format(cgroup_parent)

for h in host_list:
cmd = Command(h, "gpcheckresgroupv2impl", REMOTE, h)
cmd = Command(h, cmd_str, REMOTE, h)
pool.addCommand(cmd)
pool.join()

Expand Down
Loading