-
Notifications
You must be signed in to change notification settings - Fork 118
CNF-20404: DRA: disable Kubelet resources and topology managers #1445
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
d94517a
ff6ed3b
2bd7267
5c318e1
ac51808
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ package kubeletconfig | |
|
|
||
| import ( | ||
| "encoding/json" | ||
| "fmt" | ||
| "time" | ||
|
|
||
| corev1 "k8s.io/api/core/v1" | ||
|
|
@@ -43,8 +44,12 @@ const ( | |
| evictionHardNodefsInodesFree = "nodefs.inodesFree" | ||
| ) | ||
|
|
||
| // New returns new KubeletConfig object for performance sensetive workflows | ||
| // New returns new KubeletConfig object for performance sensitive workflows | ||
| func New(profile *performancev2.PerformanceProfile, opts *components.KubeletConfigOptions) (*machineconfigv1.KubeletConfig, error) { | ||
| if err := validateOptions(opts); err != nil { | ||
| return nil, fmt.Errorf("KubeletConfig options validation failed: %w", err) | ||
| } | ||
|
|
||
| name := components.GetComponentName(profile.Name, components.ComponentNamePrefix) | ||
| kubeletConfig := &kubeletconfigv1beta1.KubeletConfiguration{} | ||
| if v, ok := profile.Annotations[experimentalKubeletSnippetAnnotation]; ok { | ||
|
|
@@ -58,6 +63,61 @@ func New(profile *performancev2.PerformanceProfile, opts *components.KubeletConf | |
| Kind: "KubeletConfiguration", | ||
| } | ||
|
|
||
| // when DRA resource management is enabled, all kubeletconfig settings should be disabled. | ||
| // this is because the DRA plugin will manage the resource allocation. | ||
| // if the kubeletconfig CPU and Memory Manager settings are not disabled, it will conflict with the DRA. | ||
| if opts.DRAResourceManagement { | ||
| if err := setKubeletConfigForDRAManagement(kubeletConfig, opts); err != nil { | ||
| return nil, err | ||
| } | ||
| } else { | ||
| if err := setKubeletConfigForCPUAndMemoryManagers(profile, kubeletConfig, opts); err != nil { | ||
| return nil, err | ||
| } | ||
| } | ||
|
|
||
| raw, err := json.Marshal(kubeletConfig) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
| return &machineconfigv1.KubeletConfig{ | ||
| TypeMeta: metav1.TypeMeta{ | ||
| APIVersion: machineconfigv1.GroupVersion.String(), | ||
| Kind: "KubeletConfig", | ||
| }, | ||
| ObjectMeta: metav1.ObjectMeta{ | ||
| Name: name, | ||
| }, | ||
| Spec: machineconfigv1.KubeletConfigSpec{ | ||
| MachineConfigPoolSelector: &metav1.LabelSelector{ | ||
| MatchLabels: opts.MachineConfigPoolSelector, | ||
| }, | ||
| KubeletConfig: &runtime.RawExtension{ | ||
| Raw: raw, | ||
| }, | ||
| }, | ||
| }, nil | ||
| } | ||
|
|
||
| func addStringToQuantity(q *resource.Quantity, value string) error { | ||
| v, err := resource.ParseQuantity(value) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| q.Add(v) | ||
| return nil | ||
| } | ||
|
|
||
| func setKubeletConfigForDRAManagement(kubeletConfig *kubeletconfigv1beta1.KubeletConfiguration, opts *components.KubeletConfigOptions) error { | ||
| kubeletConfig.CPUManagerPolicy = "none" | ||
| kubeletConfig.CPUManagerPolicyOptions = map[string]string{} | ||
| kubeletConfig.TopologyManagerPolicy = kubeletconfigv1beta1.NoneTopologyManagerPolicy | ||
| kubeletConfig.MemoryManagerPolicy = kubeletconfigv1beta1.NoneMemoryManagerPolicy | ||
| return nil | ||
| } | ||
|
|
||
| func setKubeletConfigForCPUAndMemoryManagers(profile *performancev2.PerformanceProfile, kubeletConfig *kubeletconfigv1beta1.KubeletConfiguration, opts *components.KubeletConfigOptions) error { | ||
| kubeletConfig.CPUManagerPolicy = cpuManagerPolicyStatic | ||
| kubeletConfig.CPUManagerReconcilePeriod = metav1.Duration{Duration: 5 * time.Second} | ||
| kubeletConfig.TopologyManagerPolicy = kubeletconfigv1beta1.BestEffortTopologyManagerPolicy | ||
|
|
@@ -102,11 +162,11 @@ func New(profile *performancev2.PerformanceProfile, opts *components.KubeletConf | |
| if opts.MixedCPUsEnabled { | ||
| sharedCPUs, err := cpuset.Parse(string(*profile.Spec.CPU.Shared)) | ||
| if err != nil { | ||
| return nil, err | ||
| return err | ||
| } | ||
| reservedCPUs, err := cpuset.Parse(string(*profile.Spec.CPU.Reserved)) | ||
| if err != nil { | ||
| return nil, err | ||
| return err | ||
| } | ||
| kubeletConfig.ReservedSystemCPUs = reservedCPUs.Union(sharedCPUs).String() | ||
| } | ||
|
|
@@ -125,13 +185,13 @@ func New(profile *performancev2.PerformanceProfile, opts *components.KubeletConf | |
| if kubeletConfig.ReservedMemory == nil { | ||
| reservedMemory := resource.NewQuantity(0, resource.DecimalSI) | ||
| if err := addStringToQuantity(reservedMemory, kubeletConfig.KubeReserved[string(corev1.ResourceMemory)]); err != nil { | ||
| return nil, err | ||
| return err | ||
| } | ||
| if err := addStringToQuantity(reservedMemory, kubeletConfig.SystemReserved[string(corev1.ResourceMemory)]); err != nil { | ||
| return nil, err | ||
| return err | ||
| } | ||
| if err := addStringToQuantity(reservedMemory, kubeletConfig.EvictionHard[evictionHardMemoryAvailable]); err != nil { | ||
| return nil, err | ||
| return err | ||
| } | ||
|
|
||
| kubeletConfig.ReservedMemory = []kubeletconfigv1beta1.MemoryReservation{ | ||
|
|
@@ -159,37 +219,16 @@ func New(profile *performancev2.PerformanceProfile, opts *components.KubeletConf | |
| } | ||
| } | ||
| } | ||
|
|
||
| raw, err := json.Marshal(kubeletConfig) | ||
| if err != nil { | ||
| return nil, err | ||
| } | ||
|
|
||
| return &machineconfigv1.KubeletConfig{ | ||
| TypeMeta: metav1.TypeMeta{ | ||
| APIVersion: machineconfigv1.GroupVersion.String(), | ||
| Kind: "KubeletConfig", | ||
| }, | ||
| ObjectMeta: metav1.ObjectMeta{ | ||
| Name: name, | ||
| }, | ||
| Spec: machineconfigv1.KubeletConfigSpec{ | ||
| MachineConfigPoolSelector: &metav1.LabelSelector{ | ||
| MatchLabels: opts.MachineConfigPoolSelector, | ||
| }, | ||
| KubeletConfig: &runtime.RawExtension{ | ||
| Raw: raw, | ||
| }, | ||
| }, | ||
| }, nil | ||
| return nil | ||
| } | ||
| func validateOptions(opts *components.KubeletConfigOptions) error { | ||
| if opts == nil { | ||
| return nil | ||
| } | ||
|
|
||
| func addStringToQuantity(q *resource.Quantity, value string) error { | ||
| v, err := resource.ParseQuantity(value) | ||
| if err != nil { | ||
| return err | ||
| if opts.MixedCPUsEnabled && opts.DRAResourceManagement { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This makes sense, although I would hope the resource management through DRA would supersede the mixed CPU feature that we have. But for now let's make sure this is documented in this repo or by liaising with the docs team.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, let's add a test to validate that the operator returns an error when both are enabled.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Theoretically that's correct, but at this point in time we do not have a DRA plugin that can provide this kind of functionality, and since MixedCPUsEnabled depends on CPUManager behavior (which gets disabled when DRA is ON) they cannot co-exist.
Thanks, i'll add. |
||
| return fmt.Errorf("invalid configuration: mixed CPUs mode and DRA resource management features are mutually exclusive. please disable one of the features before continuing") | ||
| } | ||
| q.Add(v) | ||
|
|
||
| return nil | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should the introduction of this new annotation in the PerformanceProfile API (along with its expected behavior) be documented somewhere?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was added as an annotation since we don't want this to be part of the official API.
At this point in time we mainly want this for experimental usage.