diff --git a/api/v1alpha1/sentinelrunjob_types.go b/api/v1alpha1/sentinelrunjob_types.go new file mode 100644 index 000000000..7a49d20cc --- /dev/null +++ b/api/v1alpha1/sentinelrunjob_types.go @@ -0,0 +1,54 @@ +package v1alpha1 + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status +//+kubebuilder:resource:scope=Namespaced +//+kubebuilder:printcolumn:name="ID",type="string",JSONPath=".status.id",description="ID of the SentinelRunJob in the Console API." +//+kubebuilder:printcolumn:name="JobStatus",type="string",JSONPath=".status.jobStatus",description="Status of the Job created by this SentinelRunJob." + +// SentinelRunJob is the Schema for the sentinel run job +type SentinelRunJob struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec SentinelRunJobSpec `json:"spec,omitempty"` + Status SentinelRunJobStatus `json:"status,omitempty"` +} + +type SentinelRunJobSpec struct { + // RunID from Console API + RunID string `json:"runId"` +} + +type SentinelRunJobStatus struct { + Status `json:",inline"` + + // JobRef Reference to the created Job + JobRef *v1.LocalObjectReference `json:"jobRef,omitempty"` + + // JobStatus is the status of the Job. + JobStatus string `json:"jobStatus,omitempty"` +} + +//+kubebuilder:object:root=true + +// SentinelRunJobList contains a list of SentinelRunJob +type SentinelRunJobList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []SentinelRunJob `json:"items"` +} + +func (s *SentinelRunJob) SetCondition(condition metav1.Condition) { + meta.SetStatusCondition(&s.Status.Conditions, condition) +} + +func init() { + SchemeBuilder.Register(&SentinelRunJob{}, &SentinelRunJobList{}) +} diff --git a/api/v1alpha1/stackrunjob_types.go b/api/v1alpha1/stackrunjob_types.go new file mode 100644 index 000000000..fb533268a --- /dev/null +++ b/api/v1alpha1/stackrunjob_types.go @@ -0,0 +1,54 @@ +package v1alpha1 + +import ( + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status +//+kubebuilder:resource:scope=Namespaced +//+kubebuilder:printcolumn:name="ID",type="string",JSONPath=".status.id",description="ID of the StackRun in the Console API." +//+kubebuilder:printcolumn:name="JobStatus",type="string",JSONPath=".status.jobStatus",description="Status of the Job created by this StackRunJob." + +// StackRunJob is the Schema for the stack run job +type StackRunJob struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec StackRunJobSpec `json:"spec,omitempty"` + Status StackRunJobStatus `json:"status,omitempty"` +} + +type StackRunJobSpec struct { + // RunID from Console API + RunID string `json:"runId"` +} + +type StackRunJobStatus struct { + Status `json:",inline"` + + // JobRef Reference to the created Job + JobRef *v1.LocalObjectReference `json:"jobRef,omitempty"` + + // JobStatus is the status of the Job. + JobStatus string `json:"jobStatus,omitempty"` +} + +//+kubebuilder:object:root=true + +// SentinelRunJobList contains a list of SentinelRunJob +type StackRunJobList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []StackRunJob `json:"items"` +} + +func (s *StackRunJob) SetCondition(condition metav1.Condition) { + meta.SetStatusCondition(&s.Status.Conditions, condition) +} + +func init() { + SchemeBuilder.Register(&StackRunJob{}, &StackRunJobList{}) +} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 8d7c3a4d7..b9b2737ce 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -1673,6 +1673,196 @@ func (in *RecommendationsSettings) DeepCopy() *RecommendationsSettings { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SentinelRunJob) DeepCopyInto(out *SentinelRunJob) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Spec = in.Spec + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SentinelRunJob. +func (in *SentinelRunJob) DeepCopy() *SentinelRunJob { + if in == nil { + return nil + } + out := new(SentinelRunJob) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *SentinelRunJob) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SentinelRunJobList) DeepCopyInto(out *SentinelRunJobList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]SentinelRunJob, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SentinelRunJobList. +func (in *SentinelRunJobList) DeepCopy() *SentinelRunJobList { + if in == nil { + return nil + } + out := new(SentinelRunJobList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *SentinelRunJobList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SentinelRunJobSpec) DeepCopyInto(out *SentinelRunJobSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SentinelRunJobSpec. +func (in *SentinelRunJobSpec) DeepCopy() *SentinelRunJobSpec { + if in == nil { + return nil + } + out := new(SentinelRunJobSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SentinelRunJobStatus) DeepCopyInto(out *SentinelRunJobStatus) { + *out = *in + in.Status.DeepCopyInto(&out.Status) + if in.JobRef != nil { + in, out := &in.JobRef, &out.JobRef + *out = new(v1.LocalObjectReference) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SentinelRunJobStatus. +func (in *SentinelRunJobStatus) DeepCopy() *SentinelRunJobStatus { + if in == nil { + return nil + } + out := new(SentinelRunJobStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *StackRunJob) DeepCopyInto(out *StackRunJob) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Spec = in.Spec + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StackRunJob. +func (in *StackRunJob) DeepCopy() *StackRunJob { + if in == nil { + return nil + } + out := new(StackRunJob) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *StackRunJob) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *StackRunJobList) DeepCopyInto(out *StackRunJobList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]StackRunJob, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StackRunJobList. +func (in *StackRunJobList) DeepCopy() *StackRunJobList { + if in == nil { + return nil + } + out := new(StackRunJobList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *StackRunJobList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *StackRunJobSpec) DeepCopyInto(out *StackRunJobSpec) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StackRunJobSpec. +func (in *StackRunJobSpec) DeepCopy() *StackRunJobSpec { + if in == nil { + return nil + } + out := new(StackRunJobSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *StackRunJobStatus) DeepCopyInto(out *StackRunJobStatus) { + *out = *in + in.Status.DeepCopyInto(&out.Status) + if in.JobRef != nil { + in, out := &in.JobRef, &out.JobRef + *out = new(v1.LocalObjectReference) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StackRunJobStatus. +func (in *StackRunJobStatus) DeepCopy() *StackRunJobStatus { + if in == nil { + return nil + } + out := new(StackRunJobStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Status) DeepCopyInto(out *Status) { *out = *in diff --git a/charts/deployment-operator/crds/deployments.plural.sh_sentinelrunjobs.yaml b/charts/deployment-operator/crds/deployments.plural.sh_sentinelrunjobs.yaml new file mode 100644 index 000000000..5a2fb721e --- /dev/null +++ b/charts/deployment-operator/crds/deployments.plural.sh_sentinelrunjobs.yaml @@ -0,0 +1,147 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.3 + name: sentinelrunjobs.deployments.plural.sh +spec: + group: deployments.plural.sh + names: + kind: SentinelRunJob + listKind: SentinelRunJobList + plural: sentinelrunjobs + singular: sentinelrunjob + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: ID of the SentinelRunJob in the Console API. + jsonPath: .status.id + name: ID + type: string + - description: Status of the Job created by this SentinelRunJob. + jsonPath: .status.jobStatus + name: JobStatus + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: SentinelRunJob is the Schema for the sentinel run job + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + runId: + description: RunID from Console API + type: string + required: + - runId + type: object + status: + properties: + conditions: + description: Represents the observations of a PrAutomation's current + state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + id: + description: ID of the resource in the Console API. + type: string + jobRef: + description: JobRef Reference to the created Job + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + jobStatus: + description: JobStatus is the status of the Job. + type: string + sha: + description: SHA of last applied configuration. + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/charts/deployment-operator/crds/deployments.plural.sh_stackrunjobs.yaml b/charts/deployment-operator/crds/deployments.plural.sh_stackrunjobs.yaml new file mode 100644 index 000000000..5a1f82902 --- /dev/null +++ b/charts/deployment-operator/crds/deployments.plural.sh_stackrunjobs.yaml @@ -0,0 +1,147 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.3 + name: stackrunjobs.deployments.plural.sh +spec: + group: deployments.plural.sh + names: + kind: StackRunJob + listKind: StackRunJobList + plural: stackrunjobs + singular: stackrunjob + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: ID of the StackRun in the Console API. + jsonPath: .status.id + name: ID + type: string + - description: Status of the Job created by this StackRunJob. + jsonPath: .status.jobStatus + name: JobStatus + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: StackRunJob is the Schema for the stack run job + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + runId: + description: RunID from Console API + type: string + required: + - runId + type: object + status: + properties: + conditions: + description: Represents the observations of a PrAutomation's current + state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + id: + description: ID of the resource in the Console API. + type: string + jobRef: + description: JobRef Reference to the created Job + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + jobStatus: + description: JobStatus is the status of the Job. + type: string + sha: + description: SHA of last applied configuration. + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/cmd/agent/kubernetes.go b/cmd/agent/kubernetes.go index 721be222d..8d672ea7f 100644 --- a/cmd/agent/kubernetes.go +++ b/cmd/agent/kubernetes.go @@ -226,6 +226,8 @@ func registerKubeReconcilersOrDie( Client: manager.GetClient(), Scheme: manager.GetScheme(), ConsoleClient: extConsoleClient, + ConsoleURL: consoleURL, + DeployToken: deployToken, }).SetupWithManager(manager); err != nil { setupLog.Error(err, "unable to create controller", "controller", "StackRun") } @@ -327,4 +329,13 @@ func registerKubeReconcilersOrDie( }).SetupWithManager(manager); err != nil { setupLog.Error(err, "unable to create controller", "controller", "PluralCAPIClusterController") } + if err := (&controller.SentinelRunJobReconciler{ + Client: manager.GetClient(), + Scheme: manager.GetScheme(), + ConsoleClient: extConsoleClient, + ConsoleURL: consoleURL, + DeployToken: deployToken, + }).SetupWithManager(manager); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "SentinelRunJob") + } } diff --git a/config/crd/bases/deployments.plural.sh_sentinelrunjobs.yaml b/config/crd/bases/deployments.plural.sh_sentinelrunjobs.yaml new file mode 100644 index 000000000..5a2fb721e --- /dev/null +++ b/config/crd/bases/deployments.plural.sh_sentinelrunjobs.yaml @@ -0,0 +1,147 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.3 + name: sentinelrunjobs.deployments.plural.sh +spec: + group: deployments.plural.sh + names: + kind: SentinelRunJob + listKind: SentinelRunJobList + plural: sentinelrunjobs + singular: sentinelrunjob + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: ID of the SentinelRunJob in the Console API. + jsonPath: .status.id + name: ID + type: string + - description: Status of the Job created by this SentinelRunJob. + jsonPath: .status.jobStatus + name: JobStatus + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: SentinelRunJob is the Schema for the sentinel run job + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + runId: + description: RunID from Console API + type: string + required: + - runId + type: object + status: + properties: + conditions: + description: Represents the observations of a PrAutomation's current + state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + id: + description: ID of the resource in the Console API. + type: string + jobRef: + description: JobRef Reference to the created Job + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + jobStatus: + description: JobStatus is the status of the Job. + type: string + sha: + description: SHA of last applied configuration. + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/deployments.plural.sh_stackrunjobs.yaml b/config/crd/bases/deployments.plural.sh_stackrunjobs.yaml new file mode 100644 index 000000000..5a1f82902 --- /dev/null +++ b/config/crd/bases/deployments.plural.sh_stackrunjobs.yaml @@ -0,0 +1,147 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.3 + name: stackrunjobs.deployments.plural.sh +spec: + group: deployments.plural.sh + names: + kind: StackRunJob + listKind: StackRunJobList + plural: stackrunjobs + singular: stackrunjob + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: ID of the StackRun in the Console API. + jsonPath: .status.id + name: ID + type: string + - description: Status of the Job created by this StackRunJob. + jsonPath: .status.jobStatus + name: JobStatus + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: StackRunJob is the Schema for the stack run job + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + properties: + runId: + description: RunID from Console API + type: string + required: + - runId + type: object + status: + properties: + conditions: + description: Represents the observations of a PrAutomation's current + state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + id: + description: ID of the resource in the Console API. + type: string + jobRef: + description: JobRef Reference to the created Job + properties: + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + type: object + x-kubernetes-map-type: atomic + jobStatus: + description: JobStatus is the status of the Job. + type: string + sha: + description: SHA of last applied configuration. + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/internal/controller/sentinelrunjob_controller.go b/internal/controller/sentinelrunjob_controller.go new file mode 100644 index 000000000..b6544d2fe --- /dev/null +++ b/internal/controller/sentinelrunjob_controller.go @@ -0,0 +1,146 @@ +package controller + +import ( + "context" + + console "github.com/pluralsh/console/go/client" + "github.com/pluralsh/deployment-operator/api/v1alpha1" + internalerror "github.com/pluralsh/deployment-operator/internal/errors" + "github.com/pluralsh/deployment-operator/internal/utils" + consoleclient "github.com/pluralsh/deployment-operator/pkg/client" + "github.com/pluralsh/deployment-operator/pkg/common" + "github.com/samber/lo" + batchv1 "k8s.io/api/batch/v1" + apierrs "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +const SentinelRunJobFinalizer = "deployments.plural.sh/sentinel-run-job-protection" + +type SentinelRunJobReconciler struct { + client.Client + ConsoleClient consoleclient.Client + Scheme *runtime.Scheme + ConsoleURL string + DeployToken string +} + +func (r *SentinelRunJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ reconcile.Result, retErr error) { + fromContext := log.FromContext(ctx) + fromContext.Info("Reconciling SentinelRunJob", "name", req.Name, "namespace", req.Namespace) + + srj := &v1alpha1.SentinelRunJob{} + if err := r.Get(ctx, req.NamespacedName, srj); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + scope, err := NewDefaultScope(ctx, r.Client, srj) + if err != nil { + return ctrl.Result{}, err + } + + // Always patch object when exiting this function, so we can persist any object changes. + defer func() { + if err := scope.PatchObject(); err != nil && retErr == nil { + retErr = err + } + }() + utils.MarkCondition(srj.SetCondition, v1alpha1.ReadyConditionType, metav1.ConditionFalse, v1alpha1.ReadyConditionReason, "") + utils.MarkCondition(srj.SetCondition, v1alpha1.SynchronizedConditionType, metav1.ConditionFalse, v1alpha1.SynchronizedConditionReason, "") + + // Finalizer is needed to ensure that the Job and Secret are cleaned up after the StackRun reaches terminal state and will be deleted by the controller. + // The object can be deleted before defer patches the status update with terminal state, so we need to ensure that the finalizer is removed and the object is deleted to avoid orphaned resources. + if srj.DeletionTimestamp.IsZero() && !controllerutil.ContainsFinalizer(srj, SentinelRunJobFinalizer) { + controllerutil.AddFinalizer(srj, SentinelRunJobFinalizer) + } + if !srj.DeletionTimestamp.IsZero() && controllerutil.ContainsFinalizer(srj, SentinelRunJobFinalizer) { + controllerutil.RemoveFinalizer(srj, SentinelRunJobFinalizer) + return ctrl.Result{}, nil + } + + run, err := r.ConsoleClient.GetSentinelRunJob(srj.Spec.RunID) + if err != nil { + if !internalerror.IsNotFound(err) { + return ctrl.Result{}, err + } + return jitterRequeue(requeueAfter, jitter), nil + } + srj.Status.ID = lo.ToPtr(run.ID) + + secret, err := r.reconcileRunSecret(ctx, req.Name, req.Namespace, srj.Spec.RunID, string(run.Format)) + if err != nil { + return ctrl.Result{}, err + } + + job, err := r.reconcileRunJob(ctx, srj, run) + if err != nil { + return ctrl.Result{}, err + } + + if err := utils.TryAddOwnerRef(ctx, r.Client, job, secret, r.Scheme); err != nil { + return ctrl.Result{}, err + } + if err := utils.TryAddControllerRef(ctx, r.Client, srj, job, r.Scheme); err != nil { + return ctrl.Result{}, err + } + + unstructuredJob, err := common.ToUnstructured(job) + if err != nil { + return ctrl.Result{}, err + } + health, err := common.GetResourceHealth(unstructuredJob) + if err != nil { + return ctrl.Result{}, err + } + + status := run.Status + if health != nil { + srj.Status.JobStatus = string(health.Status) + if health.Status == common.HealthStatusDegraded { + status = console.SentinelRunJobStatusFailed + } + } + + if err := r.ConsoleClient.UpdateSentinelRunJobStatus(srj.Spec.RunID, &console.SentinelRunJobUpdateAttributes{ + Status: &status, + Reference: &console.NamespacedName{ + Name: job.Name, + Namespace: job.Namespace, + }, + }); err != nil { + return ctrl.Result{}, err + } + + if isTerminalSentinelRunStatus(status) { + if err := r.Delete(ctx, srj); err != nil && !apierrs.IsNotFound(err) { + return ctrl.Result{}, err + } + } + + utils.MarkCondition(srj.SetCondition, v1alpha1.ReadyConditionType, metav1.ConditionTrue, v1alpha1.ReadyConditionReason, "") + utils.MarkCondition(srj.SetCondition, v1alpha1.SynchronizedConditionType, metav1.ConditionTrue, v1alpha1.SynchronizedConditionReason, "") + return ctrl.Result{}, nil +} + +// isTerminalSentinelRunStatus returns true when the given SentinelRunJobStatus is in a terminal state, meaning the job has completed and will not transition to any other state. +func isTerminalSentinelRunStatus(status console.SentinelRunJobStatus) bool { + return status == console.SentinelRunJobStatusSuccess +} + +// SetupWithManager configures the controller with the manager. +func (r *SentinelRunJobReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + WithOptions(controller.Options{MaxConcurrentReconciles: 1}). + For(&v1alpha1.SentinelRunJob{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). + Owns(&batchv1.Job{}). + Complete(r) +} diff --git a/pkg/controller/sentinel/job.go b/internal/controller/sentinelrunjob_job.go similarity index 50% rename from pkg/controller/sentinel/job.go rename to internal/controller/sentinelrunjob_job.go index b9a6c8b7f..e9fac9d69 100644 --- a/pkg/controller/sentinel/job.go +++ b/internal/controller/sentinelrunjob_job.go @@ -1,39 +1,38 @@ -package sentinel +package controller import ( "context" "fmt" "os" - "k8s.io/apimachinery/pkg/api/resource" - - "github.com/pluralsh/deployment-operator/pkg/common" - console "github.com/pluralsh/console/go/client" + "github.com/pluralsh/deployment-operator/api/v1alpha1" + "github.com/pluralsh/deployment-operator/pkg/common" "github.com/pluralsh/polly/algorithms" "github.com/samber/lo" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" apierrs "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/log" - - "github.com/pluralsh/deployment-operator/internal/utils" - consoleclient "github.com/pluralsh/deployment-operator/pkg/client" ) +func init() { + imageTag := os.Getenv("IMAGE_TAG") + if len(imageTag) > 0 { + defaultImageTag = imageTag + } +} + const ( - podDefaultContainerAnnotation = "kubectl.kubernetes.io/default-container" - jobSelector = "sentinelrun.deployments.plural.sh" - DefaultJobContainer = "default" - defaultJobVolumeName = "default" - defaultJobVolumePath = "/plural" - defaultJobTmpVolumeName = "default-tmp" - defaultJobTmpVolumePath = "/tmp" - nonRootUID = int64(65532) - nonRootGID = nonRootUID - defaultContainerImage = "ghcr.io/pluralsh/sentinel-harness" + sentinelRunJobSelector = "sentinelrun.deployments.plural.sh" + defaultJobContainer = "default" + sentinelRunJobDefaultContainerImage = "ghcr.io/pluralsh/sentinel-harness" + defaultJobVolumeName = "default" + defaultJobVolumePath = "/plural" + defaultJobTmpVolumeName = "default-tmp" + defaultJobTmpVolumePath = "/tmp" ) var ( @@ -64,107 +63,33 @@ var ( } ) -func init() { - imageTag := os.Getenv("IMAGE_TAG") - if len(imageTag) > 0 { - defaultImageTag = imageTag - } -} - -func (r *SentinelReconciler) reconcileRunJob(ctx context.Context, run *console.SentinelRunJobFragment) (*batchv1.Job, error) { - logger := log.FromContext(ctx) - - name := GetRunResourceName(run) - jobSpec := getRunJobSpec(name, run.JobSpec) - namespace := r.GetRunResourceNamespace(jobSpec) - - if err := r.namespaceCache.EnsureNamespace(ctx, namespace, &console.ServiceDeploymentForAgent_SyncConfig{ - CreateNamespace: lo.ToPtr(true), - }); err != nil { - return nil, err - } - +func (r *SentinelRunJobReconciler) reconcileRunJob(ctx context.Context, srj *v1alpha1.SentinelRunJob, run *console.SentinelRunJobFragment) (*batchv1.Job, error) { foundJob := &batchv1.Job{} - if err := r.k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, foundJob); err != nil { + if err := r.Get(ctx, types.NamespacedName{Name: srj.Name, Namespace: srj.Namespace}, foundJob); err != nil { if !apierrs.IsNotFound(err) { return nil, err } - secret, err := r.upsertRunSecret(ctx, name, namespace, run.ID, run.Format.String()) - if err != nil { - return nil, err - } - - job, err := r.GenerateRunJob(run, jobSpec, name, namespace) + jobSpec := common.GetRunJobSpec(srj.Name, run.JobSpec) + job, err := r.GenerateRunJob(run, jobSpec, srj.Name, srj.Namespace) if err != nil { return nil, err } - logger.V(2).Info("creating job for sentinel run", "id", run.ID, "namespace", job.Namespace, "name", job.Name) - if err := r.k8sClient.Create(ctx, job); err != nil { - logger.Error(err, "unable to create job") + if err := r.Create(ctx, job); err != nil { return nil, err } - - if err := r.consoleClient.UpdateSentinelRunJobStatus(run.ID, &console.SentinelRunJobUpdateAttributes{ - Status: lo.ToPtr(run.Status), - Reference: &console.NamespacedName{ - Name: job.Name, - Namespace: job.Namespace, - }, - }); err != nil { - return nil, err - } - - if err := utils.TryAddOwnerRef(ctx, r.k8sClient, job, secret, r.scheme); err != nil { - logger.Error(err, "error setting owner reference for job secret") - return nil, err - } - return job, nil } - unstructuredJob, err := common.ToUnstructured(foundJob) - if err != nil { - return nil, err - } - health, err := common.GetResourceHealth(unstructuredJob) - if err != nil { - return nil, err - } - if health != nil && health.Status == common.HealthStatusDegraded { - if err := r.consoleClient.UpdateSentinelRunJobStatus(run.ID, &console.SentinelRunJobUpdateAttributes{ - Status: lo.ToPtr(console.SentinelRunJobStatusFailed), - Reference: &console.NamespacedName{ - Name: foundJob.Name, - Namespace: foundJob.Namespace, - }, - }); err != nil { - return nil, err - } - } - return foundJob, nil } // GetRunResourceName returns a resource name used for a job and a secret connected to a given run. -func GetRunResourceName(run *console.SentinelRunJobFragment) string { +func (r *SentinelRunJobReconciler) GetRunResourceName(run *console.SentinelRunJobFragment) string { return fmt.Sprintf("sentinel-%s", run.ID) } -// GetRunResourceNamespace returns a resource namespace used for a job and a secret connected to a given run. -func (r *SentinelReconciler) GetRunResourceNamespace(jobSpec *batchv1.JobSpec) (namespace string) { - if jobSpec != nil { - namespace = jobSpec.Template.Namespace - } - - if namespace == "" { - namespace = r.namespace - } - - return -} - -func (r *SentinelReconciler) GenerateRunJob(run *console.SentinelRunJobFragment, jobSpec *batchv1.JobSpec, name, namespace string) (*batchv1.Job, error) { +func (r *SentinelRunJobReconciler) GenerateRunJob(run *console.SentinelRunJobFragment, jobSpec *batchv1.JobSpec, name, namespace string) (*batchv1.Job, error) { var err error // If user-defined job spec was not available initialize it here. if jobSpec == nil { @@ -178,7 +103,7 @@ func (r *SentinelReconciler) GenerateRunJob(run *console.SentinelRunJobFragment, if jobSpec.Template.Annotations == nil { jobSpec.Template.Annotations = map[string]string{} } - jobSpec.Template.Annotations[podDefaultContainerAnnotation] = DefaultJobContainer + jobSpec.Template.Annotations[podDefaultContainerAnnotation] = defaultJobContainer jobSpec.Template.Spec.RestartPolicy = corev1.RestartPolicyNever @@ -192,56 +117,22 @@ func (r *SentinelReconciler) GenerateRunJob(run *console.SentinelRunJobFragment, return nil, err } - jobSpec.Template.Spec.Volumes = ensureDefaultVolumes(jobSpec.Template.Spec.Volumes) + jobSpec.Template.Spec.Volumes = r.ensureDefaultVolumes(jobSpec.Template.Spec.Volumes) - jobSpec.Template.Spec.SecurityContext = ensureDefaultPodSecurityContext(jobSpec.Template.Spec.SecurityContext) + jobSpec.Template.Spec.SecurityContext = r.ensureDefaultPodSecurityContext(jobSpec.Template.Spec.SecurityContext) return &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: namespace, - Annotations: map[string]string{jobSelector: name}, - Labels: map[string]string{jobSelector: name}, + Annotations: map[string]string{sentinelRunJobSelector: name}, + Labels: map[string]string{sentinelRunJobSelector: name}, }, Spec: *jobSpec, }, nil } -func getRunJobSpec(name string, jobSpecFragment *console.JobSpecFragment) *batchv1.JobSpec { - if jobSpecFragment == nil { - return nil - } - var jobSpec *batchv1.JobSpec - var err error - if jobSpecFragment.Raw != nil && *jobSpecFragment.Raw != "null" { - jobSpec, err = consoleclient.JobSpecFromYaml(*jobSpecFragment.Raw) - if err != nil { - return nil - } - } else { - jobSpec = &batchv1.JobSpec{ - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: jobSpecFragment.Namespace, - Labels: consoleclient.StringMapFromInterfaceMap(jobSpecFragment.Labels), - Annotations: consoleclient.StringMapFromInterfaceMap(jobSpecFragment.Annotations), - }, - Spec: corev1.PodSpec{ - Containers: consoleclient.ContainersFromContainerSpecFragments(name, jobSpecFragment.Containers, jobSpecFragment.Requests), - }, - }, - } - - if jobSpecFragment.ServiceAccount != nil { - jobSpec.Template.Spec.ServiceAccountName = *jobSpecFragment.ServiceAccount - } - } - - return jobSpec -} - -func (r *SentinelReconciler) ensureDefaultContainer( +func (r *SentinelRunJobReconciler) ensureDefaultContainer( containers []corev1.Container, run *console.SentinelRunJobFragment, ) []corev1.Container { @@ -249,7 +140,7 @@ func (r *SentinelReconciler) ensureDefaultContainer( if len(containers) > 0 { // optionally normalize the default container (if they used the default name) index := algorithms.Index(containers, func(c corev1.Container) bool { - return c.Name == DefaultJobContainer + return c.Name == defaultJobContainer }) if index != -1 { @@ -260,7 +151,7 @@ func (r *SentinelReconciler) ensureDefaultContainer( } for i := range containers { - containers[i].VolumeMounts = ensureDefaultVolumeMounts(containers[i].VolumeMounts) + containers[i].VolumeMounts = r.ensureDefaultVolumeMounts(containers[i].VolumeMounts) containers[i].Env = make([]corev1.EnvVar, 0) containers[i].EnvFrom = r.getDefaultContainerEnvFrom(run) } @@ -271,38 +162,37 @@ func (r *SentinelReconciler) ensureDefaultContainer( return []corev1.Container{r.getDefaultContainer(run)} } -func (r *SentinelReconciler) getDefaultContainer(run *console.SentinelRunJobFragment) corev1.Container { +func (r *SentinelRunJobReconciler) getDefaultContainer(run *console.SentinelRunJobFragment) corev1.Container { return corev1.Container{ - Name: DefaultJobContainer, + Name: defaultJobContainer, Image: r.getDefaultContainerImage(), VolumeMounts: []corev1.VolumeMount{ defaultJobContainerVolumeMount, defaultJobTmpContainerVolumeMount, }, - SecurityContext: ensureDefaultContainerSecurityContext(nil), + SecurityContext: r.ensureDefaultContainerSecurityContext(nil), Env: make([]corev1.EnvVar, 0), EnvFrom: r.getDefaultContainerEnvFrom(run), } } -func (r *SentinelReconciler) getDefaultContainerEnvFrom(run *console.SentinelRunJobFragment) []corev1.EnvFromSource { +func (r *SentinelRunJobReconciler) getDefaultContainerEnvFrom(run *console.SentinelRunJobFragment) []corev1.EnvFromSource { return []corev1.EnvFromSource{ { SecretRef: &corev1.SecretEnvSource{ LocalObjectReference: corev1.LocalObjectReference{ - Name: GetRunResourceName(run), + Name: r.GetRunResourceName(run), }, }, }, } } -func ensureDefaultVolumeMounts(mounts []corev1.VolumeMount) []corev1.VolumeMount { +func (r *SentinelRunJobReconciler) ensureDefaultVolumeMounts(mounts []corev1.VolumeMount) []corev1.VolumeMount { return append( algorithms.Filter(mounts, func(v corev1.VolumeMount) bool { switch v.Name { - case defaultJobVolumeName: - case defaultJobTmpVolumeName: + case defaultJobVolumeName, defaultJobTmpVolumeName: return false } @@ -313,14 +203,14 @@ func ensureDefaultVolumeMounts(mounts []corev1.VolumeMount) []corev1.VolumeMount ) } -func ensureDefaultVolumes(volumes []corev1.Volume) []corev1.Volume { +func (r *SentinelRunJobReconciler) ensureDefaultVolumes(volumes []corev1.Volume) []corev1.Volume { return append(volumes, defaultJobVolume, defaultJobTmpVolume, ) } -func ensureDefaultPodSecurityContext(psc *corev1.PodSecurityContext) *corev1.PodSecurityContext { +func (r *SentinelRunJobReconciler) ensureDefaultPodSecurityContext(psc *corev1.PodSecurityContext) *corev1.PodSecurityContext { if psc != nil { return psc } @@ -332,7 +222,7 @@ func ensureDefaultPodSecurityContext(psc *corev1.PodSecurityContext) *corev1.Pod } } -func ensureDefaultContainerSecurityContext(sc *corev1.SecurityContext) *corev1.SecurityContext { +func (r *SentinelRunJobReconciler) ensureDefaultContainerSecurityContext(sc *corev1.SecurityContext) *corev1.SecurityContext { if sc != nil { return sc } @@ -346,7 +236,7 @@ func ensureDefaultContainerSecurityContext(sc *corev1.SecurityContext) *corev1.S } } -func (r *SentinelReconciler) ensureDefaultContainerResourcesRequests(containers []corev1.Container, run *console.SentinelRunJobFragment) ([]corev1.Container, error) { +func (r *SentinelRunJobReconciler) ensureDefaultContainerResourcesRequests(containers []corev1.Container, run *console.SentinelRunJobFragment) ([]corev1.Container, error) { if run.JobSpec == nil || run.JobSpec.Requests == nil { return containers, nil } @@ -398,11 +288,7 @@ func (r *SentinelReconciler) ensureDefaultContainerResourcesRequests(containers return containers, nil } -func (r *SentinelReconciler) getDefaultContainerImage() string { +func (r *SentinelRunJobReconciler) getDefaultContainerImage() string { // Use default image with default tag (can be overridden by IMAGE_TAG env var) - return fmt.Sprintf("%s:%s", getDefaultContainerImage(), defaultImageTag) -} - -func getDefaultContainerImage() string { - return common.GetConfigurationManager().SwapBaseRegistry(defaultContainerImage) + return fmt.Sprintf("%s:%s", common.GetConfigurationManager().SwapBaseRegistry(sentinelRunJobDefaultContainerImage), defaultImageTag) } diff --git a/pkg/controller/sentinel/secret.go b/internal/controller/sentinelrunjob_secret.go similarity index 65% rename from pkg/controller/sentinel/secret.go rename to internal/controller/sentinelrunjob_secret.go index 7e53d49d4..4dfbc55b8 100644 --- a/pkg/controller/sentinel/secret.go +++ b/internal/controller/sentinelrunjob_secret.go @@ -1,4 +1,4 @@ -package sentinel +package controller import ( "context" @@ -17,28 +17,28 @@ const ( envFormat = "PLRL_OUTPUT_FORMAT" ) -func (r *SentinelReconciler) getRunSecretData(runID, format string) map[string]string { +func (r *SentinelRunJobReconciler) getRunSecretData(runID, format string) map[string]string { return map[string]string{ - envConsoleURL: r.consoleURL, - envConsoleToken: r.deployToken, + envConsoleURL: r.ConsoleURL, + envConsoleToken: r.DeployToken, envRunID: runID, envFormat: format, } } -func (r *SentinelReconciler) hasRunSecretData(data map[string][]byte, runID string) bool { +func (r *SentinelRunJobReconciler) hasRunSecretData(data map[string][]byte, runID string) bool { token, hasToken := data[envConsoleToken] url, hasUrl := data[envConsoleURL] - id, hasID := data[envConsoleURL] + id, hasID := data[envRunID] return hasToken && hasUrl && hasID && - string(token) == r.deployToken && string(url) == r.consoleURL && string(id) == runID + string(token) == r.DeployToken && string(url) == r.ConsoleURL && string(id) == runID } -func (r *SentinelReconciler) upsertRunSecret(ctx context.Context, name, namespace, runID, format string) (*corev1.Secret, error) { +func (r *SentinelRunJobReconciler) reconcileRunSecret(ctx context.Context, name, namespace, runID, format string) (*corev1.Secret, error) { logger := log.FromContext(ctx) secret := &corev1.Secret{} - if err := r.k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, secret); err != nil { + if err := r.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, secret); err != nil { if !apierrs.IsNotFound(err) { return nil, err } @@ -48,7 +48,7 @@ func (r *SentinelReconciler) upsertRunSecret(ctx context.Context, name, namespac StringData: r.getRunSecretData(runID, format), } logger.V(2).Info("creating secret", "namespace", secret.Namespace, "name", secret.Name) - if err := r.k8sClient.Create(ctx, secret); err != nil { + if err := r.Create(ctx, secret); err != nil { logger.Error(err, "unable to create secret") return nil, err } @@ -59,7 +59,7 @@ func (r *SentinelReconciler) upsertRunSecret(ctx context.Context, name, namespac if !r.hasRunSecretData(secret.Data, runID) { logger.V(2).Info("updating secret", "namespace", secret.Namespace, "name", secret.Name) secret.StringData = r.getRunSecretData(runID, format) - if err := r.k8sClient.Update(ctx, secret); err != nil { + if err := r.Update(ctx, secret); err != nil { logger.Error(err, "unable to update secret") return nil, err } diff --git a/internal/controller/stackrunjob_controller.go b/internal/controller/stackrunjob_controller.go index 4f6aa4af3..71ee12624 100644 --- a/internal/controller/stackrunjob_controller.go +++ b/internal/controller/stackrunjob_controller.go @@ -19,14 +19,15 @@ package controller import ( "context" "fmt" - "strings" "time" console "github.com/pluralsh/console/go/client" + "github.com/pluralsh/deployment-operator/api/v1alpha1" clienterrors "github.com/pluralsh/deployment-operator/internal/errors" + "github.com/pluralsh/deployment-operator/internal/utils" "github.com/pluralsh/deployment-operator/pkg/client" - "github.com/pluralsh/deployment-operator/pkg/controller/stacks" "github.com/pluralsh/polly/algorithms" + "github.com/samber/lo" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" apierrs "k8s.io/apimachinery/pkg/api/errors" @@ -34,12 +35,17 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" k8sClient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) -const jobSelector = "stackrun.deployments.plural.sh" +const StackRunJobFinalizer = "deployments.plural.sh/stack-run-job-protection" + const jobTimeout = time.Minute * 40 const podTimeout = time.Minute * 2 @@ -48,67 +54,138 @@ type StackRunJobReconciler struct { k8sClient.Client Scheme *runtime.Scheme ConsoleClient client.Client + ConsoleURL string + DeployToken string } // Reconcile StackRun's Job ensure that Console stays in sync with Kubernetes cluster. -func (r *StackRunJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (r *StackRunJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ reconcile.Result, retErr error) { logger := log.FromContext(ctx) - // Read resource from Kubernetes cluster. - job := &batchv1.Job{} - if err := r.Get(ctx, req.NamespacedName, job); err != nil { - logger.Error(err, "unable to fetch job") + run := &v1alpha1.StackRunJob{} + if err := r.Get(ctx, req.NamespacedName, run); err != nil { + logger.Error(err, "unable to fetch StackRunJob") return ctrl.Result{}, k8sClient.IgnoreNotFound(err) } - stackRunID := getStackRunID(job) - stackRun, err := r.ConsoleClient.GetStackRun(stackRunID) + + scope, err := NewDefaultScope(ctx, r.Client, run) + if err != nil { + return ctrl.Result{}, err + } + + // Always patch object when exiting this function, so we can persist any object changes. + // Registered second so it runs first (before the delete defer above). + defer func() { + if err := scope.PatchObject(); err != nil && retErr == nil { + retErr = err + } + }() + + utils.MarkCondition(run.SetCondition, v1alpha1.ReadyConditionType, metav1.ConditionFalse, v1alpha1.ReadyConditionReason, "") + utils.MarkCondition(run.SetCondition, v1alpha1.SynchronizedConditionType, metav1.ConditionFalse, v1alpha1.SynchronizedConditionReason, "") + + // Finalizer is needed to ensure that the Job and Secret are cleaned up after the StackRun reaches terminal state and will be deleted by the controller. + // The object can be deleted before defer patches the status update with terminal state, so we need to ensure that the finalizer is removed and the object is deleted to avoid orphaned resources. + if run.DeletionTimestamp.IsZero() && !controllerutil.ContainsFinalizer(run, StackRunJobFinalizer) { + controllerutil.AddFinalizer(run, StackRunJobFinalizer) + } + if !run.DeletionTimestamp.IsZero() && controllerutil.ContainsFinalizer(run, StackRunJobFinalizer) { + controllerutil.RemoveFinalizer(run, StackRunJobFinalizer) + return ctrl.Result{}, nil + } + + stackRun, err := r.ConsoleClient.GetStackRun(run.Spec.RunID) if err != nil { if clienterrors.IsNotFound(err) { - return ctrl.Result{}, nil + return jitterRequeue(requeueAfter, jitter), nil } return ctrl.Result{}, err } + run.Status.ID = lo.ToPtr(stackRun.ID) + secret, err := r.reconcileSecret(ctx, run) + if err != nil { + return ctrl.Result{}, err + } + + job, err := r.reconcileJob(ctx, run, stackRun) + if err != nil { + return ctrl.Result{}, err + } + + run.Status.JobRef = &corev1.LocalObjectReference{Name: job.Name} + + if err := utils.TryAddOwnerRef(ctx, r.Client, job, secret, r.Scheme); err != nil { + return ctrl.Result{}, err + } + if err := utils.TryAddControllerRef(ctx, r.Client, run, job, r.Scheme); err != nil { + return ctrl.Result{}, err + } + + status := stackRun.Status + switch { // Exit if stack run is not in running state (run status already updated), // or if the job is still running (harness controls run status). - if stackRun.Status != console.StackStatusRunning || job.Status.CompletionTime.IsZero() { + case stackRun.Status != console.StackStatusRunning || job.Status.CompletionTime.IsZero(): if isActiveJobTimout(stackRun.Status, job) || r.isActiveJobPodFailed(ctx, stackRun.Status, job) { if err := r.killJob(ctx, job); err != nil { return ctrl.Result{}, err } + run.Status.JobStatus = string(console.StackStatusFailed) logger.V(2).Info("stack run job failed", "name", job.Name, "namespace", job.Namespace) - err := r.ConsoleClient.UpdateStackRun(stackRunID, console.StackRunAttributes{ - Status: console.StackStatusFailed, - }) - return ctrl.Result{}, err + status = console.StackStatusFailed } - return jitterRequeue(requeueAfter, jitter), nil - } - - if hasSucceeded(job) { + case hasSucceeded(job): logger.V(2).Info("stack run job succeeded", "name", job.Name, "namespace", job.Namespace) - err := r.ConsoleClient.UpdateStackRun(stackRunID, console.StackRunAttributes{ - Status: console.StackStatusSuccessful, - }) - - return ctrl.Result{}, err - } - - if hasFailed(job) { + run.Status.JobStatus = string(console.StackStatusSuccessful) + status = console.StackStatusSuccessful + case hasFailed(job): logger.V(2).Info("stack run job failed", "name", job.Name, "namespace", job.Namespace) - status, err := r.getJobPodStatus(ctx, job.Spec.Selector.MatchLabels) + status, err = r.getJobPodStatus(ctx, job.Spec.Selector.MatchLabels) if err != nil { + status = console.StackStatusFailed logger.Error(err, "unable to get job pod status") } - err = r.ConsoleClient.UpdateStackRun(stackRunID, console.StackRunAttributes{ - Status: status, - }) + run.Status.JobStatus = string(status) + } + + if err := r.ConsoleClient.UpdateStackRun(stackRun.ID, console.StackRunAttributes{ + Status: status, + JobRef: &console.NamespacedName{ + Name: job.Name, + Namespace: job.Namespace, + }, + }); err != nil { return ctrl.Result{}, err } + if isTerminalStackRunStatus(status) { + logger.V(2).Info("stack run reached terminal state, cleaning up CRD", "name", run.Name, "namespace", run.Namespace) + if err := r.Delete(ctx, run); err != nil && !apierrs.IsNotFound(err) { + return ctrl.Result{}, err + } + } + + utils.MarkCondition(run.SetCondition, v1alpha1.ReadyConditionType, metav1.ConditionTrue, v1alpha1.ReadyConditionReason, "") + utils.MarkCondition(run.SetCondition, v1alpha1.SynchronizedConditionType, metav1.ConditionTrue, v1alpha1.SynchronizedConditionReason, "") return ctrl.Result{}, nil } +// isTerminalStackRunStatus returns true when the stack run has reached a final state +// that requires no further reconciliation. +func isTerminalStackRunStatus(status console.StackStatus) bool { + switch status { + case console.StackStatusSuccessful, console.StackStatusCancelled: + return true + } + return false +} + +// GetRunResourceName returns a resource name used for a job and a secret connected to a given run. +func (r *StackRunJobReconciler) GetRunResourceName(run *console.StackRunMinimalFragment) string { + return fmt.Sprintf("stack-%s", run.ID) +} + func (r *StackRunJobReconciler) getJobPodStatus(ctx context.Context, selector map[string]string) (console.StackStatus, error) { pod, err := r.getJobPod(ctx, selector) if err != nil { @@ -133,10 +210,10 @@ func (r *StackRunJobReconciler) getJobPod(ctx context.Context, selector map[stri func (r *StackRunJobReconciler) getPodStatus(pod *corev1.Pod) (console.StackStatus, error) { statusIndex := algorithms.Index(pod.Status.ContainerStatuses, func(status corev1.ContainerStatus) bool { - return status.Name == stacks.DefaultJobContainer + return status.Name == stackRunDefaultJobContainer }) if statusIndex == -1 { - return console.StackStatusFailed, fmt.Errorf("no job container with name %s found", stacks.DefaultJobContainer) + return console.StackStatusFailed, fmt.Errorf("no job container with name %s found", stackRunDefaultJobContainer) } containerStatus := pod.Status.ContainerStatuses[statusIndex] @@ -164,8 +241,7 @@ func (r *StackRunJobReconciler) killJob(ctx context.Context, job *batchv1.Job) e func getExitCodeStatus(exitCode int32) console.StackStatus { switch exitCode { - case 64: - case 66: + case 64, 66: return console.StackStatusCancelled case 65: return console.StackStatusFailed @@ -174,10 +250,6 @@ func getExitCodeStatus(exitCode int32) console.StackStatus { return console.StackStatusFailed } -func getStackRunID(job *batchv1.Job) string { - return strings.TrimPrefix(job.Name, "stack-") -} - func isActiveJob(stackStatus console.StackStatus, job *batchv1.Job) bool { return stackStatus == console.StackStatusPending && job.Status.CompletionTime.IsZero() && !job.Status.StartTime.IsZero() } @@ -202,18 +274,9 @@ func (r *StackRunJobReconciler) isActiveJobPodFailed(ctx context.Context, stackS // SetupWithManager sets up the controller with the Manager. func (r *StackRunJobReconciler) SetupWithManager(mgr ctrl.Manager) error { - byAnnotation := predicate.NewPredicateFuncs(func(object k8sClient.Object) bool { - annotations := object.GetAnnotations() - if annotations == nil { - return false - } - - _, ok := annotations[jobSelector] - return ok - }) - return ctrl.NewControllerManagedBy(mgr). - For(&batchv1.Job{}). - WithEventFilter(byAnnotation). + WithOptions(controller.Options{MaxConcurrentReconciles: 1}). + For(&v1alpha1.StackRunJob{}, builder.WithPredicates(predicate.GenerationChangedPredicate{})). + Owns(&batchv1.Job{}). Complete(r) } diff --git a/internal/controller/stackrunjob_controller_test.go b/internal/controller/stackrunjob_controller_test.go index b756ee698..5a6bc67cc 100644 --- a/internal/controller/stackrunjob_controller_test.go +++ b/internal/controller/stackrunjob_controller_test.go @@ -2,7 +2,6 @@ package controller import ( "context" - "strings" "time" . "github.com/onsi/ginkgo/v2" @@ -10,6 +9,7 @@ import ( console "github.com/pluralsh/console/go/client" "github.com/samber/lo" "github.com/stretchr/testify/mock" + "github.com/vektah/gqlparser/v2/gqlerror" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -17,77 +17,37 @@ import ( "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/reconcile" - "github.com/pluralsh/deployment-operator/pkg/controller/stacks" + "github.com/Yamashou/gqlgenc/clientv2" + "github.com/pluralsh/deployment-operator/api/v1alpha1" + "github.com/pluralsh/deployment-operator/pkg/test/common" "github.com/pluralsh/deployment-operator/pkg/test/mocks" ) -var _ = Describe("Stack Run Job Controller", Ordered, func() { +var _ = Describe("StackRunJob Controller", Ordered, func() { Context("When reconciling a resource", func() { const ( - completedName = "stack-1" - runningName = "stack-2" - namespace = "default" + runName = "stack-test-run" + namespace = "default" + runID = "test-run-123" + consoleURL = "https://console.plural.sh" + deployToken = "test-token" + stackRunName = "stack-test-run-123" ) ctx := context.Background() - - completedJobNamespacedName := types.NamespacedName{Name: completedName, Namespace: namespace} - runningJobNamespacedName := types.NamespacedName{Name: runningName, Namespace: namespace} - - completedJob := &batchv1.Job{} - runningJob := &batchv1.Job{} + runNamespacedName := types.NamespacedName{Name: runName, Namespace: namespace} BeforeAll(func() { - By("Creating stack run completed job") - err := kClient.Get(ctx, completedJobNamespacedName, completedJob) - if err != nil && errors.IsNotFound(err) { - resource := &batchv1.Job{ - ObjectMeta: metav1.ObjectMeta{ - Name: completedName, - Namespace: namespace, - }, - Spec: batchv1.JobSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: stacks.DefaultJobContainer, - Image: "image:v1.0.0", - Args: []string{}, - }}, - RestartPolicy: corev1.RestartPolicyNever, - }, - }, - }, - Status: batchv1.JobStatus{ - CompletionTime: lo.ToPtr(metav1.NewTime(time.Now())), - Conditions: []batchv1.JobCondition{{ - Type: batchv1.JobComplete, - Status: corev1.ConditionTrue, - }}, - }, - } - Expect(kClient.Create(ctx, resource)).To(Succeed()) - } - - By("Creating stack run running job") - err = kClient.Get(ctx, runningJobNamespacedName, runningJob) + By("Creating StackRunJob") + err := kClient.Get(ctx, runNamespacedName, &v1alpha1.StackRunJob{}) if err != nil && errors.IsNotFound(err) { - resource := &batchv1.Job{ + resource := &v1alpha1.StackRunJob{ ObjectMeta: metav1.ObjectMeta{ - Name: runningName, + Name: runName, Namespace: namespace, }, - Spec: batchv1.JobSpec{ - Template: corev1.PodTemplateSpec{ - Spec: corev1.PodSpec{ - Containers: []corev1.Container{{ - Name: stacks.DefaultJobContainer, - Image: "image:v1.0.0", - Args: []string{}, - }}, - RestartPolicy: corev1.RestartPolicyNever, - }, - }, + Spec: v1alpha1.StackRunJobSpec{ + RunID: runID, }, } Expect(kClient.Create(ctx, resource)).To(Succeed()) @@ -95,86 +55,666 @@ var _ = Describe("Stack Run Job Controller", Ordered, func() { }) AfterAll(func() { - By("Cleanup stack run running job") - runningJob := &batchv1.Job{} - Expect(kClient.Get(ctx, runningJobNamespacedName, runningJob)).NotTo(HaveOccurred()) - Expect(kClient.Delete(ctx, runningJob)).To(Succeed()) + By("Cleaning up StackRunJob") + resource := &v1alpha1.StackRunJob{} + err := kClient.Get(ctx, runNamespacedName, resource) + if err == nil { + Expect(kClient.Delete(ctx, resource)).To(Succeed()) + } - By("Cleanup stack run completed job") - completedJob := &batchv1.Job{} - Expect(kClient.Get(ctx, completedJobNamespacedName, completedJob)).NotTo(HaveOccurred()) - Expect(kClient.Delete(ctx, completedJob)).To(Succeed()) - }) + // Clean up job if exists + job := &batchv1.Job{} + err = kClient.Get(ctx, runNamespacedName, job) + if err == nil { + Expect(kClient.Delete(ctx, job)).To(Succeed()) + } - It("should exit without errors and try to update stack run status", func() { - runId := strings.TrimPrefix(completedName, "stack-") + // Clean up secret if exists + secret := &corev1.Secret{} + err = kClient.Get(ctx, runNamespacedName, secret) + if err == nil { + Expect(kClient.Delete(ctx, secret)).To(Succeed()) + } + }) + It("should create secret and job for StackRunJob", func() { fakeConsoleClient := mocks.NewClientMock(mocks.TestingT) - fakeConsoleClient.On("GetStackRun", mock.Anything).Return(&console.StackRunMinimalFragment{ - ID: runId, - Status: console.StackStatusSuccessful, + fakeConsoleClient.On("GetStackRun", runID).Return(&console.StackRunMinimalFragment{ + ID: stackRunName, + Type: console.StackTypeTerraform, + Status: console.StackStatusPending, + Configuration: console.StackConfigurationFragment{ + Version: lo.ToPtr("1.8.2"), + }, }, nil) - fakeConsoleClient.On("UpdateStackRun", runId, mock.Anything).Return(&console.StackRunMinimalFragment{}, nil) + fakeConsoleClient.On("UpdateStackRun", mock.Anything, mock.Anything).Return(nil) reconciler := &StackRunJobReconciler{ Client: kClient, + ConsoleClient: fakeConsoleClient, Scheme: kClient.Scheme(), + ConsoleURL: consoleURL, + DeployToken: deployToken, + } + + _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: runNamespacedName}) + Expect(err).NotTo(HaveOccurred()) + + // Verify secret was created + secret := &corev1.Secret{} + Expect(kClient.Get(ctx, runNamespacedName, secret)).NotTo(HaveOccurred()) + Expect(string(secret.Data[envConsoleURL])).Should(Equal(consoleURL)) + Expect(string(secret.Data[envConsoleToken])).Should(Equal(deployToken)) + Expect(string(secret.Data[envStackRunID])).Should(Equal(runID)) + + // Verify job was created + job := &batchv1.Job{} + Expect(kClient.Get(ctx, runNamespacedName, job)).NotTo(HaveOccurred()) + Expect(job.Spec.Template.Spec.Containers).Should(HaveLen(1)) + Expect(job.Spec.Template.Spec.Containers[0].Name).Should(Equal(stackRunDefaultJobContainer)) + + // Verify StackRunJob status + stackRunJob := &v1alpha1.StackRunJob{} + Expect(kClient.Get(ctx, runNamespacedName, stackRunJob)).NotTo(HaveOccurred()) + Expect(stackRunJob.Status.ID).ShouldNot(BeNil()) + Expect(*stackRunJob.Status.ID).Should(Equal(stackRunName)) + Expect(stackRunJob.Status.JobRef).ShouldNot(BeNil()) + Expect(stackRunJob.Status.JobRef.Name).Should(Equal(runName)) + }) + + It("should update status to successful when job completes", func() { + fakeConsoleClient := mocks.NewClientMock(mocks.TestingT) + fakeConsoleClient.On("GetStackRun", runID).Return(&console.StackRunMinimalFragment{ + ID: stackRunName, + Type: console.StackTypeTerraform, + Status: console.StackStatusRunning, + Configuration: console.StackConfigurationFragment{ + Version: lo.ToPtr("1.8.2"), + }, + }, nil) + fakeConsoleClient.On("UpdateStackRun", stackRunName, console.StackRunAttributes{ + Status: console.StackStatusSuccessful, + JobRef: &console.NamespacedName{ + Name: runName, + Namespace: namespace, + }, + }).Return(nil) + + reconciler := &StackRunJobReconciler{ + Client: kClient, ConsoleClient: fakeConsoleClient, + Scheme: kClient.Scheme(), + ConsoleURL: consoleURL, + DeployToken: deployToken, } - _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: completedJobNamespacedName}) + + // Update job to completed state + job := &batchv1.Job{} + Expect(kClient.Get(ctx, runNamespacedName, job)).NotTo(HaveOccurred()) + Expect(common.MaybePatch(kClient, job, + func(p *batchv1.Job) { + now := metav1.Now() + p.Status.CompletionTime = &now + p.Status.Conditions = []batchv1.JobCondition{ + { + Type: batchv1.JobComplete, + Status: corev1.ConditionTrue, + }, + } + })).To(Succeed()) + + _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: runNamespacedName}) Expect(err).NotTo(HaveOccurred()) + + // Verify StackRunJob status + stackRunJob := &v1alpha1.StackRunJob{} + Expect(kClient.Get(ctx, runNamespacedName, stackRunJob)).NotTo(HaveOccurred()) + Expect(stackRunJob.Status.JobStatus).Should(Equal(string(console.StackStatusSuccessful))) }) - It("should exit without errors as stack run was already updated", func() { - runId := strings.TrimPrefix(completedName, "stack-") + It("should update status to failed when job fails", func() { + By("Creating a new StackRunJob for failure test") + failedRunName := "stack-test-run-failed" + failedRunID := "test-run-failed-456" + failedNamespacedName := types.NamespacedName{Name: failedRunName, Namespace: namespace} + + resource := &v1alpha1.StackRunJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: failedRunName, + Namespace: namespace, + }, + Spec: v1alpha1.StackRunJobSpec{ + RunID: failedRunID, + }, + } + Expect(kClient.Create(ctx, resource)).To(Succeed()) fakeConsoleClient := mocks.NewClientMock(mocks.TestingT) - fakeConsoleClient.On("GetStackRun", mock.Anything).Return(&console.StackRunMinimalFragment{ - ID: runId, - Status: console.StackStatusSuccessful, + fakeConsoleClient.On("GetStackRun", failedRunID).Return(&console.StackRunMinimalFragment{ + ID: "stack-failed-123", + Type: console.StackTypeTerraform, + Status: console.StackStatusRunning, + Configuration: console.StackConfigurationFragment{ + Version: lo.ToPtr("1.8.2"), + }, }, nil) + fakeConsoleClient.On("UpdateStackRun", "stack-failed-123", mock.Anything).Return(nil) reconciler := &StackRunJobReconciler{ Client: kClient, - Scheme: kClient.Scheme(), ConsoleClient: fakeConsoleClient, + Scheme: kClient.Scheme(), + ConsoleURL: consoleURL, + DeployToken: deployToken, } - _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: completedJobNamespacedName}) + + // First reconcile to create resources + _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: failedNamespacedName}) + Expect(err).NotTo(HaveOccurred()) + + // Create a pod for the job to get status from + job := &batchv1.Job{} + Expect(kClient.Get(ctx, failedNamespacedName, job)).NotTo(HaveOccurred()) + + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: failedRunName + "-pod", + Namespace: namespace, + Labels: job.Spec.Selector.MatchLabels, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: stackRunDefaultJobContainer, + Image: "test-image", + }, + }, + }, + } + Expect(kClient.Create(ctx, pod)).To(Succeed()) + + // Update pod to terminated state with failure exit code + Expect(common.MaybePatch(kClient, pod, + func(p *corev1.Pod) { + p.Status.ContainerStatuses = []corev1.ContainerStatus{ + { + Name: stackRunDefaultJobContainer, + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 65, // Failure exit code + }, + }, + }, + } + })).To(Succeed()) + + // Update job to failed state + Expect(common.MaybePatch(kClient, job, + func(p *batchv1.Job) { + now := metav1.Now() + p.Status.CompletionTime = &now + p.Status.Conditions = []batchv1.JobCondition{ + { + Type: batchv1.JobFailed, + Status: corev1.ConditionTrue, + }, + } + })).To(Succeed()) + + _, err = reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: failedNamespacedName}) Expect(err).NotTo(HaveOccurred()) + + // Verify StackRunJob status + stackRunJob := &v1alpha1.StackRunJob{} + Expect(kClient.Get(ctx, failedNamespacedName, stackRunJob)).NotTo(HaveOccurred()) + Expect(stackRunJob.Status.JobStatus).Should(Equal(string(console.StackStatusFailed))) + + // Cleanup + Expect(kClient.Delete(ctx, pod)).To(Succeed()) + Expect(kClient.Delete(ctx, job)).To(Succeed()) + Expect(kClient.Delete(ctx, stackRunJob)).To(Succeed()) + secret := &corev1.Secret{} + err = kClient.Get(ctx, failedNamespacedName, secret) + if err == nil { + Expect(kClient.Delete(ctx, secret)).To(Succeed()) + } + }) + }) + + Context("Helper functions", func() { + It("should correctly determine exit code status", func() { + Expect(getExitCodeStatus(64)).Should(Equal(console.StackStatusCancelled)) + Expect(getExitCodeStatus(66)).Should(Equal(console.StackStatusCancelled)) + Expect(getExitCodeStatus(65)).Should(Equal(console.StackStatusFailed)) + Expect(getExitCodeStatus(0)).Should(Equal(console.StackStatusFailed)) + Expect(getExitCodeStatus(1)).Should(Equal(console.StackStatusFailed)) + }) + + It("should correctly identify active job", func() { + job := &batchv1.Job{ + Status: batchv1.JobStatus{ + StartTime: &metav1.Time{Time: time.Now().Add(-1 * time.Minute)}, + }, + } + Expect(isActiveJob(console.StackStatusPending, job)).Should(BeTrue()) + Expect(isActiveJob(console.StackStatusRunning, job)).Should(BeFalse()) + + jobWithCompletion := &batchv1.Job{ + Status: batchv1.JobStatus{ + StartTime: &metav1.Time{Time: time.Now().Add(-1 * time.Minute)}, + CompletionTime: &metav1.Time{Time: time.Now()}, + }, + } + Expect(isActiveJob(console.StackStatusPending, jobWithCompletion)).Should(BeFalse()) }) - It("should exit without errors as stack run status was already updated", func() { + It("should correctly identify job timeout", func() { + oldJob := &batchv1.Job{ + Status: batchv1.JobStatus{ + StartTime: &metav1.Time{Time: time.Now().Add(-45 * time.Minute)}, + }, + } + Expect(isActiveJobTimout(console.StackStatusPending, oldJob)).Should(BeTrue()) + + recentJob := &batchv1.Job{ + Status: batchv1.JobStatus{ + StartTime: &metav1.Time{Time: time.Now().Add(-1 * time.Minute)}, + }, + } + Expect(isActiveJobTimout(console.StackStatusPending, recentJob)).Should(BeFalse()) + }) + + It("should generate correct resource name", func() { + reconciler := &StackRunJobReconciler{} + run := &console.StackRunMinimalFragment{ + ID: "test-123", + } + Expect(reconciler.GetRunResourceName(run)).Should(Equal("stack-test-123")) + }) + }) + + Context("Secret reconciliation", func() { + It("should create secret data correctly", func() { + reconciler := &StackRunJobReconciler{ + ConsoleURL: "https://console.test.com", + DeployToken: "test-token-123", + } + runID := "run-456" + + data := reconciler.getRunSecretData(runID) + Expect(data).Should(HaveLen(3)) + Expect(data[envConsoleURL]).Should(Equal("https://console.test.com")) + Expect(data[envConsoleToken]).Should(Equal("test-token-123")) + Expect(data[envStackRunID]).Should(Equal("run-456")) + }) + + It("should verify secret data correctly", func() { + reconciler := &StackRunJobReconciler{ + ConsoleURL: "https://console.test.com", + DeployToken: "test-token-123", + } + runID := "run-789" + + secretData := map[string][]byte{ + envConsoleURL: []byte("https://console.test.com"), + envConsoleToken: []byte("test-token-123"), + envStackRunID: []byte("run-789"), + } + + Expect(reconciler.hasRunSecretData(secretData, runID)).Should(BeTrue()) + + // Wrong URL + wrongSecretData := map[string][]byte{ + envConsoleURL: []byte("https://wrong.url.com"), + envConsoleToken: []byte("test-token-123"), + envStackRunID: []byte("run-789"), + } + Expect(reconciler.hasRunSecretData(wrongSecretData, runID)).Should(BeFalse()) + + // Wrong run ID + wrongRunIDData := map[string][]byte{ + envConsoleURL: []byte("https://console.test.com"), + envConsoleToken: []byte("test-token-123"), + envStackRunID: []byte("wrong-run-id"), + } + Expect(reconciler.hasRunSecretData(wrongRunIDData, runID)).Should(BeFalse()) + }) + }) + + Context("Pod status checks", func() { + It("should get pod status from exit code", func() { + reconciler := &StackRunJobReconciler{} + + pod := &corev1.Pod{ + Status: corev1.PodStatus{ + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: stackRunDefaultJobContainer, + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 65, + }, + }, + }, + }, + }, + } + + status, err := reconciler.getPodStatus(pod) + Expect(err).NotTo(HaveOccurred()) + Expect(status).Should(Equal(console.StackStatusFailed)) + }) + + It("should return error when container not found", func() { + reconciler := &StackRunJobReconciler{} + + pod := &corev1.Pod{ + Status: corev1.PodStatus{ + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "wrong-container-name", + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 0, + }, + }, + }, + }, + }, + } + + _, err := reconciler.getPodStatus(pod) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).Should(ContainSubstring("no job container")) + }) + + It("should return error when container not terminated", func() { + reconciler := &StackRunJobReconciler{} + + pod := &corev1.Pod{ + Status: corev1.PodStatus{ + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: stackRunDefaultJobContainer, + State: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{}, + }, + }, + }, + }, + } + + _, err := reconciler.getPodStatus(pod) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).Should(ContainSubstring("not in terminated state")) + }) + }) + + Context("Job timeout and cancellation", func() { + const ( + timeoutRunName = "stack-test-timeout" + timeoutRunID = "test-timeout-789" + namespace = "default" + ) + + ctx := context.Background() + timeoutNamespacedName := types.NamespacedName{Name: timeoutRunName, Namespace: namespace} + + It("should handle job timeout when pending too long", func() { + By("Creating StackRunJob for timeout test") + resource := &v1alpha1.StackRunJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: timeoutRunName, + Namespace: namespace, + }, + Spec: v1alpha1.StackRunJobSpec{ + RunID: timeoutRunID, + }, + } + Expect(kClient.Create(ctx, resource)).To(Succeed()) + fakeConsoleClient := mocks.NewClientMock(mocks.TestingT) - fakeConsoleClient.On("GetStackRun", mock.Anything).Return(&console.StackRunMinimalFragment{ - ID: "2", - Status: console.StackStatusFailed, + fakeConsoleClient.On("GetStackRun", timeoutRunID).Return(&console.StackRunMinimalFragment{ + ID: "stack-timeout-123", + Type: console.StackTypeTerraform, + Status: console.StackStatusPending, + Configuration: console.StackConfigurationFragment{ + Version: lo.ToPtr("1.8.2"), + }, }, nil) + fakeConsoleClient.On("UpdateStackRun", "stack-timeout-123", console.StackRunAttributes{ + Status: console.StackStatusPending, + JobRef: &console.NamespacedName{ + Name: timeoutRunName, + Namespace: namespace, + }, + }).Return(nil).Once() + fakeConsoleClient.On("UpdateStackRun", "stack-timeout-123", console.StackRunAttributes{ + Status: console.StackStatusFailed, + JobRef: &console.NamespacedName{ + Name: timeoutRunName, + Namespace: namespace, + }, + }).Return(nil).Once() reconciler := &StackRunJobReconciler{ Client: kClient, + ConsoleClient: fakeConsoleClient, Scheme: kClient.Scheme(), + ConsoleURL: "https://console.test.com", + DeployToken: "test-token", + } + + // First reconcile to create resources + _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: timeoutNamespacedName}) + Expect(err).NotTo(HaveOccurred()) + + // Update job to be old (started more than 40 minutes ago) + job := &batchv1.Job{} + Expect(kClient.Get(ctx, timeoutNamespacedName, job)).NotTo(HaveOccurred()) + Expect(common.MaybePatch(kClient, job, + func(p *batchv1.Job) { + oldTime := metav1.Time{Time: time.Now().Add(-45 * time.Minute)} + p.Status.StartTime = &oldTime + })).To(Succeed()) + + // Reconcile again to trigger timeout + _, err = reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: timeoutNamespacedName}) + Expect(err).NotTo(HaveOccurred()) + + // Verify status was updated to failed + stackRunJob := &v1alpha1.StackRunJob{} + Expect(kClient.Get(ctx, timeoutNamespacedName, stackRunJob)).NotTo(HaveOccurred()) + Expect(stackRunJob.Status.JobStatus).Should(Equal(string(console.StackStatusFailed))) + + // Cleanup + Expect(kClient.Delete(ctx, stackRunJob)).To(Succeed()) + secret := &corev1.Secret{} + err = kClient.Get(ctx, timeoutNamespacedName, secret) + if err == nil { + Expect(kClient.Delete(ctx, secret)).To(Succeed()) + } + }) + }) + + Context("Reconciliation edge cases", func() { + const ( + edgeCaseRunName = "stack-edge-case" + edgeCaseRunID = "test-edge-999" + namespace = "default" + ) + + ctx := context.Background() + edgeCaseNamespacedName := types.NamespacedName{Name: edgeCaseRunName, Namespace: namespace} + + It("should handle StackRun not found in Console", func() { + By("Creating StackRunJob") + resource := &v1alpha1.StackRunJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: edgeCaseRunName, + Namespace: namespace, + }, + Spec: v1alpha1.StackRunJobSpec{ + RunID: edgeCaseRunID, + }, + } + Expect(kClient.Create(ctx, resource)).To(Succeed()) + + fakeConsoleClient := mocks.NewClientMock(mocks.TestingT) + // Return a not found error + notFoundErr := clientv2.ErrorResponse{ + GqlErrors: &gqlerror.List{ + { + Message: "could not find resource", + }, + }, + } + fakeConsoleClient.On("GetStackRun", edgeCaseRunID).Return(nil, ¬FoundErr) + + reconciler := &StackRunJobReconciler{ + Client: kClient, ConsoleClient: fakeConsoleClient, + Scheme: kClient.Scheme(), + ConsoleURL: "https://console.test.com", + DeployToken: "test-token", } - _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: runningJobNamespacedName}) + + // Reconcile should requeue without error when stack run not found + result, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: edgeCaseNamespacedName}) Expect(err).NotTo(HaveOccurred()) + Expect(result.RequeueAfter).Should(BeNumerically(">", 0)) + + // Cleanup + stackRunJob := &v1alpha1.StackRunJob{} + err = kClient.Get(ctx, edgeCaseNamespacedName, stackRunJob) + if err == nil { + Expect(kClient.Delete(ctx, stackRunJob)).To(Succeed()) + } }) - It("should exit without errors as stack run job is still running", func() { - runId := strings.TrimPrefix(runningName, "stack-") + It("should update job ref when job is still running", func() { + By("Creating StackRunJob") + runningName := "stack-running" + runningID := "running-123" + resource := &v1alpha1.StackRunJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: runningName, + Namespace: namespace, + }, + Spec: v1alpha1.StackRunJobSpec{ + RunID: runningID, + }, + } + Expect(kClient.Create(ctx, resource)).To(Succeed()) fakeConsoleClient := mocks.NewClientMock(mocks.TestingT) - fakeConsoleClient.On("GetStackRun", mock.Anything).Return(&console.StackRunMinimalFragment{ - ID: runId, + fakeConsoleClient.On("GetStackRun", runningID).Return(&console.StackRunMinimalFragment{ + ID: "stack-running-456", + Type: console.StackTypeTerraform, Status: console.StackStatusRunning, + Configuration: console.StackConfigurationFragment{ + Version: lo.ToPtr("1.8.2"), + }, }, nil) + fakeConsoleClient.On("UpdateStackRun", "stack-running-456", console.StackRunAttributes{ + Status: console.StackStatusRunning, + JobRef: &console.NamespacedName{ + Name: runningName, + Namespace: namespace, + }, + }).Return(nil) reconciler := &StackRunJobReconciler{ Client: kClient, - Scheme: kClient.Scheme(), ConsoleClient: fakeConsoleClient, + Scheme: kClient.Scheme(), + ConsoleURL: "https://console.test.com", + DeployToken: "test-token", } - _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: runningJobNamespacedName}) + + runningNamespacedName := types.NamespacedName{Name: runningName, Namespace: namespace} + _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: runningNamespacedName}) Expect(err).NotTo(HaveOccurred()) + + // Job should be created but status should not be updated yet + stackRunJob := &v1alpha1.StackRunJob{} + Expect(kClient.Get(ctx, runningNamespacedName, stackRunJob)).NotTo(HaveOccurred()) + Expect(stackRunJob.Status.JobRef).ShouldNot(BeNil()) + Expect(stackRunJob.Status.JobStatus).Should(BeEmpty()) + + // Cleanup + job := &batchv1.Job{} + err = kClient.Get(ctx, runningNamespacedName, job) + if err == nil { + Expect(kClient.Delete(ctx, job)).To(Succeed()) + } + Expect(kClient.Delete(ctx, stackRunJob)).To(Succeed()) + secret := &corev1.Secret{} + err = kClient.Get(ctx, runningNamespacedName, secret) + if err == nil { + Expect(kClient.Delete(ctx, secret)).To(Succeed()) + } + }) + }) + + Context("Job status helper functions", func() { + It("should correctly identify succeeded jobs", func() { + succeededJob := &batchv1.Job{ + Status: batchv1.JobStatus{ + Conditions: []batchv1.JobCondition{ + { + Type: batchv1.JobComplete, + Status: corev1.ConditionTrue, + }, + }, + }, + } + Expect(hasSucceeded(succeededJob)).Should(BeTrue()) + + failedJob := &batchv1.Job{ + Status: batchv1.JobStatus{ + Conditions: []batchv1.JobCondition{ + { + Type: batchv1.JobFailed, + Status: corev1.ConditionTrue, + }, + }, + }, + } + Expect(hasSucceeded(failedJob)).Should(BeFalse()) + }) + + It("should correctly identify failed jobs", func() { + failedJob := &batchv1.Job{ + Status: batchv1.JobStatus{ + Conditions: []batchv1.JobCondition{ + { + Type: batchv1.JobFailed, + Status: corev1.ConditionTrue, + }, + }, + }, + } + Expect(hasFailed(failedJob)).Should(BeTrue()) + + succeededJob := &batchv1.Job{ + Status: batchv1.JobStatus{ + Conditions: []batchv1.JobCondition{ + { + Type: batchv1.JobComplete, + Status: corev1.ConditionTrue, + }, + }, + }, + } + Expect(hasFailed(succeededJob)).Should(BeFalse()) + + runningJob := &batchv1.Job{ + Status: batchv1.JobStatus{ + Conditions: []batchv1.JobCondition{}, + }, + } + Expect(hasFailed(runningJob)).Should(BeFalse()) }) }) }) diff --git a/pkg/controller/stacks/job.go b/internal/controller/stackrunjob_job.go similarity index 53% rename from pkg/controller/stacks/job.go rename to internal/controller/stackrunjob_job.go index d15fc6914..76c493e4b 100644 --- a/pkg/controller/stacks/job.go +++ b/internal/controller/stackrunjob_job.go @@ -1,4 +1,4 @@ -package stacks +package controller import ( "context" @@ -6,144 +6,86 @@ import ( "os" "strings" - "github.com/pluralsh/deployment-operator/pkg/common" - "k8s.io/apimachinery/pkg/api/resource" - console "github.com/pluralsh/console/go/client" - "github.com/pluralsh/deployment-operator/internal/metrics" - "github.com/pluralsh/deployment-operator/internal/utils" - consoleclient "github.com/pluralsh/deployment-operator/pkg/client" + "github.com/pluralsh/deployment-operator/api/v1alpha1" + "github.com/pluralsh/deployment-operator/pkg/common" "github.com/pluralsh/polly/algorithms" "github.com/samber/lo" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" apierrs "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/log" ) const ( - podDefaultContainerAnnotation = "kubectl.kubernetes.io/default-container" - jobSelector = "stackrun.deployments.plural.sh" - DefaultJobContainer = "default" - defaultJobVolumeName = "default" - defaultJobVolumePath = "/plural" - defaultJobTmpVolumeName = "default-tmp" - defaultJobTmpVolumePath = "/tmp" - nonRootUID = int64(65532) - nonRootGID = nonRootUID - defaultContainerImage = "ghcr.io/pluralsh/harness" + stackRunjobSelector = "stackrun.deployments.plural.sh" + stackRunDefaultJobContainer = "default" + stackRunDefaultContainerImage = "ghcr.io/pluralsh/harness" ) var ( - defaultContainerVersions = map[console.StackType]string{ + stackRunDefaultContainerVersions = map[console.StackType]string{ console.StackTypeTerraform: "1.8.2", console.StackTypeAnsible: "latest", } - defaultJobVolume = corev1.Volume{ + stackRunDefaultJobVolume = corev1.Volume{ Name: defaultJobVolumeName, VolumeSource: corev1.VolumeSource{ EmptyDir: &corev1.EmptyDirVolumeSource{}, }, } - defaultJobContainerVolumeMount = corev1.VolumeMount{ + stackRunDefaultJobContainerVolumeMount = corev1.VolumeMount{ Name: defaultJobVolumeName, MountPath: defaultJobVolumePath, } - defaultJobTmpVolume = corev1.Volume{ + stackRunDefaultJobTmpVolume = corev1.Volume{ Name: defaultJobTmpVolumeName, VolumeSource: corev1.VolumeSource{ EmptyDir: &corev1.EmptyDirVolumeSource{}, }, } - defaultJobTmpContainerVolumeMount = corev1.VolumeMount{ + stackRunDefaultJobTmpContainerVolumeMount = corev1.VolumeMount{ Name: defaultJobTmpVolumeName, MountPath: defaultJobTmpVolumePath, } - defaultImageTag = "0.6.18" + stackRunDefaultImageTag = "0.6.18" ) func init() { if os.Getenv("IMAGE_TAG") != "" { - defaultImageTag = os.Getenv("IMAGE_TAG") + stackRunDefaultImageTag = os.Getenv("IMAGE_TAG") } } -func (r *StackReconciler) reconcileRunJob(ctx context.Context, run *console.StackRunMinimalFragment) (*batchv1.Job, error) { - logger := log.FromContext(ctx) - - name := GetRunResourceName(run) - jobSpec := getRunJobSpec(name, run.JobSpec) - namespace := r.GetRunResourceNamespace(jobSpec) - +func (r *StackRunJobReconciler) reconcileJob(ctx context.Context, run *v1alpha1.StackRunJob, stackRun *console.StackRunMinimalFragment) (*batchv1.Job, error) { foundJob := &batchv1.Job{} - if err := r.k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, foundJob); err != nil { + if err := r.Get(ctx, types.NamespacedName{Name: run.Name, Namespace: run.Namespace}, foundJob); err != nil { if !apierrs.IsNotFound(err) { return nil, err } - secret, err := r.upsertRunSecret(ctx, name, namespace, run.ID) + jobSpec := common.GetRunJobSpec(run.Name, stackRun.JobSpec) + job, err := r.GenerateRunJob(stackRun, jobSpec, run.Name, run.Namespace) if err != nil { return nil, err } - - job, err := r.GenerateRunJob(run, jobSpec, name, namespace) - if err != nil { + if err := r.Create(ctx, job); err != nil { return nil, err } - logger.V(2).Info("creating job for stack run", "id", run.ID, "namespace", job.Namespace, "name", job.Name) - if err := r.k8sClient.Create(ctx, job); err != nil { - logger.Error(err, "unable to create job") - return nil, err - } - - if err := utils.TryAddOwnerRef(ctx, r.k8sClient, job, secret, r.scheme); err != nil { - logger.Error(err, "error setting owner reference for job secret") - return nil, err - } - - metrics.Record().StackRunJobCreation() - if err := r.consoleClient.UpdateStackRun(run.ID, console.StackRunAttributes{ - Status: run.Status, - JobRef: &console.NamespacedName{ - Name: job.Name, - Namespace: job.Namespace, - }, - }); err != nil { - return nil, err - } - return job, nil } return foundJob, nil } -// GetRunResourceName returns a resource name used for a job and a secret connected to a given run. -func GetRunResourceName(run *console.StackRunMinimalFragment) string { - return fmt.Sprintf("stack-%s", run.ID) -} - -// GetRunResourceNamespace returns a resource namespace used for a job and a secret connected to a given run. -func (r *StackReconciler) GetRunResourceNamespace(jobSpec *batchv1.JobSpec) (namespace string) { - if jobSpec != nil { - namespace = jobSpec.Template.Namespace - } - - if namespace == "" { - namespace = r.namespace - } - - return -} - -func (r *StackReconciler) GenerateRunJob(run *console.StackRunMinimalFragment, jobSpec *batchv1.JobSpec, name, namespace string) (*batchv1.Job, error) { +func (r *StackRunJobReconciler) GenerateRunJob(run *console.StackRunMinimalFragment, jobSpec *batchv1.JobSpec, name, namespace string) (*batchv1.Job, error) { var err error // If user-defined job spec was not available initialize it here. if jobSpec == nil { @@ -157,7 +99,7 @@ func (r *StackReconciler) GenerateRunJob(run *console.StackRunMinimalFragment, j if jobSpec.Template.Annotations == nil { jobSpec.Template.Annotations = map[string]string{} } - jobSpec.Template.Annotations[podDefaultContainerAnnotation] = DefaultJobContainer + jobSpec.Template.Annotations[podDefaultContainerAnnotation] = stackRunDefaultJobContainer jobSpec.Template.Spec.RestartPolicy = corev1.RestartPolicyNever @@ -171,57 +113,104 @@ func (r *StackReconciler) GenerateRunJob(run *console.StackRunMinimalFragment, j return nil, err } - jobSpec.Template.Spec.Volumes = ensureDefaultVolumes(jobSpec.Template.Spec.Volumes) + jobSpec.Template.Spec.Volumes = r.ensureDefaultVolumes(jobSpec.Template.Spec.Volumes) - jobSpec.Template.Spec.SecurityContext = ensureDefaultPodSecurityContext(jobSpec.Template.Spec.SecurityContext) + jobSpec.Template.Spec.SecurityContext = r.ensureDefaultPodSecurityContext(jobSpec.Template.Spec.SecurityContext) return &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: namespace, - Annotations: map[string]string{jobSelector: name}, - Labels: map[string]string{jobSelector: name}, + Annotations: map[string]string{stackRunjobSelector: name}, + Labels: map[string]string{stackRunjobSelector: name}, }, Spec: *jobSpec, }, nil } -func getRunJobSpec(name string, jobSpecFragment *console.JobSpecFragment) *batchv1.JobSpec { - if jobSpecFragment == nil { - return nil +func (r *StackRunJobReconciler) ensureDefaultPodSecurityContext(psc *corev1.PodSecurityContext) *corev1.PodSecurityContext { + if psc != nil { + return psc } - var jobSpec *batchv1.JobSpec - var err error - if jobSpecFragment.Raw != nil && *jobSpecFragment.Raw != "null" { - jobSpec, err = consoleclient.JobSpecFromYaml(*jobSpecFragment.Raw) - if err != nil { - return nil - } - } else { - jobSpec = &batchv1.JobSpec{ - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: jobSpecFragment.Namespace, - Labels: consoleclient.StringMapFromInterfaceMap(jobSpecFragment.Labels), - Annotations: consoleclient.StringMapFromInterfaceMap(jobSpecFragment.Annotations), - }, - Spec: corev1.PodSpec{ - Containers: consoleclient.ContainersFromContainerSpecFragments(name, jobSpecFragment.Containers, jobSpecFragment.Requests), - }, - }, - } - if jobSpecFragment.ServiceAccount != nil { - jobSpec.Template.Spec.ServiceAccountName = *jobSpecFragment.ServiceAccount + return &corev1.PodSecurityContext{ + RunAsNonRoot: lo.ToPtr(true), + RunAsUser: lo.ToPtr(nonRootUID), + RunAsGroup: lo.ToPtr(nonRootGID), + } +} + +func (r *StackRunJobReconciler) ensureDefaultVolumes(volumes []corev1.Volume) []corev1.Volume { + return append( + algorithms.Filter(volumes, func(v corev1.Volume) bool { + switch v.Name { + case defaultJobVolumeName: + case defaultJobTmpVolumeName: + return false + } + + return true + }), + stackRunDefaultJobVolume, + stackRunDefaultJobTmpVolume, + ) +} + +func (r *StackRunJobReconciler) ensureDefaultContainerResourcesRequests(containers []corev1.Container, run *console.StackRunMinimalFragment) ([]corev1.Container, error) { + if run.JobSpec == nil || run.JobSpec.Requests == nil { + return containers, nil + } + if run.JobSpec.Requests.Requests == nil && run.JobSpec.Requests.Limits == nil { + return containers, nil + } + + for i, container := range containers { + if run.JobSpec.Requests.Requests != nil { + if len(container.Resources.Requests) == 0 { + containers[i].Resources.Requests = map[corev1.ResourceName]resource.Quantity{} + } + if run.JobSpec.Requests.Requests.CPU != nil { + cpu, err := resource.ParseQuantity(*run.JobSpec.Requests.Requests.CPU) + if err != nil { + return nil, err + } + containers[i].Resources.Requests[corev1.ResourceCPU] = cpu + } + if run.JobSpec.Requests.Requests.Memory != nil { + memory, err := resource.ParseQuantity(*run.JobSpec.Requests.Requests.Memory) + if err != nil { + return nil, err + } + containers[i].Resources.Requests[corev1.ResourceMemory] = memory + } + } + if run.JobSpec.Requests.Limits != nil { + if len(container.Resources.Limits) == 0 { + containers[i].Resources.Limits = map[corev1.ResourceName]resource.Quantity{} + } + if run.JobSpec.Requests.Limits.CPU != nil { + cpu, err := resource.ParseQuantity(*run.JobSpec.Requests.Limits.CPU) + if err != nil { + return nil, err + } + containers[i].Resources.Limits[corev1.ResourceCPU] = cpu + } + if run.JobSpec.Requests.Limits.Memory != nil { + memory, err := resource.ParseQuantity(*run.JobSpec.Requests.Limits.Memory) + if err != nil { + return nil, err + } + containers[i].Resources.Limits[corev1.ResourceMemory] = memory + } } } - return jobSpec + + return containers, nil } -func (r *StackReconciler) ensureDefaultContainer(containers []corev1.Container, run *console.StackRunMinimalFragment) []corev1.Container { +func (r *StackRunJobReconciler) ensureDefaultContainer(containers []corev1.Container, run *console.StackRunMinimalFragment) []corev1.Container { if index := algorithms.Index(containers, func(container corev1.Container) bool { - return container.Name == DefaultJobContainer + return container.Name == stackRunDefaultJobContainer }); index == -1 { containers = append(containers, r.getDefaultContainer(run)) } else { @@ -231,26 +220,74 @@ func (r *StackReconciler) ensureDefaultContainer(containers []corev1.Container, containers[index].EnvFrom = r.getDefaultContainerEnvFrom(run) - containers[index].VolumeMounts = ensureDefaultVolumeMounts(containers[index].VolumeMounts) + containers[index].VolumeMounts = r.ensureDefaultVolumeMounts(containers[index].VolumeMounts) + containers[index].SecurityContext = r.ensureDefaultContainerSecurityContext(containers[index].SecurityContext, run) } return containers } -func (r *StackReconciler) getDefaultContainer(run *console.StackRunMinimalFragment) corev1.Container { +func (r *StackRunJobReconciler) ensureDefaultVolumeMounts(mounts []corev1.VolumeMount) []corev1.VolumeMount { + return append( + algorithms.Filter(mounts, func(v corev1.VolumeMount) bool { + switch v.Name { + case defaultJobVolumeName: + case defaultJobTmpVolumeName: + return false + } + + return true + }), + stackRunDefaultJobContainerVolumeMount, + stackRunDefaultJobTmpContainerVolumeMount, + ) +} + +func (r *StackRunJobReconciler) getDefaultContainerEnvFrom(run *console.StackRunMinimalFragment) []corev1.EnvFromSource { + return []corev1.EnvFromSource{ + { + SecretRef: &corev1.SecretEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: r.GetRunResourceName(run), + }, + }, + }, + } +} + +func (r *StackRunJobReconciler) getDefaultContainer(run *console.StackRunMinimalFragment) corev1.Container { return corev1.Container{ - Name: DefaultJobContainer, + Name: stackRunDefaultJobContainer, Image: r.getDefaultContainerImage(run), VolumeMounts: []corev1.VolumeMount{ defaultJobContainerVolumeMount, defaultJobTmpContainerVolumeMount, }, - SecurityContext: ensureDefaultContainerSecurityContext(nil, run), + SecurityContext: r.ensureDefaultContainerSecurityContext(nil, run), Env: make([]corev1.EnvVar, 0), EnvFrom: r.getDefaultContainerEnvFrom(run), } } -func (r *StackReconciler) getDefaultContainerImage(run *console.StackRunMinimalFragment) string { +func (r *StackRunJobReconciler) ensureDefaultContainerSecurityContext(sc *corev1.SecurityContext, run *console.StackRunMinimalFragment) *corev1.SecurityContext { + if sc != nil { + if run != nil && run.Type == console.StackTypeAnsible { + sc.ReadOnlyRootFilesystem = lo.ToPtr(false) + } + return sc + } + + readOnlyRootFilesystem := run == nil || run.Type != console.StackTypeAnsible + + return &corev1.SecurityContext{ + AllowPrivilegeEscalation: lo.ToPtr(false), + ReadOnlyRootFilesystem: lo.ToPtr(readOnlyRootFilesystem), + RunAsNonRoot: lo.ToPtr(true), + RunAsUser: lo.ToPtr(nonRootUID), + RunAsGroup: lo.ToPtr(nonRootGID), + } +} + +func (r *StackRunJobReconciler) getDefaultContainerImage(run *console.StackRunMinimalFragment) string { // In case image is not provided, it will use our default image. // Image name format: : // Note: User has to make sure that the tag is correct and matches our naming scheme. @@ -280,169 +317,38 @@ func (r *StackReconciler) getDefaultContainerImage(run *console.StackRunMinimalF return fmt.Sprintf("%s:%s-%s-%s", r.getImage(run), r.getTag(run), strings.ToLower(string(run.Type)), r.getVersion(run)) } -func (r *StackReconciler) hasCustomImage(run *console.StackRunMinimalFragment) bool { +func (r *StackRunJobReconciler) hasCustomImage(run *console.StackRunMinimalFragment) bool { return run.Configuration.Image != nil && len(*run.Configuration.Image) > 0 } -func (r *StackReconciler) getImage(run *console.StackRunMinimalFragment) string { +func (r *StackRunJobReconciler) getImage(run *console.StackRunMinimalFragment) string { if r.hasCustomImage(run) { return *run.Configuration.Image } - return getDefaultContainerImage() + return common.GetConfigurationManager().SwapBaseRegistry(stackRunDefaultContainerImage) } -func (r *StackReconciler) hasCustomVersion(run *console.StackRunMinimalFragment) bool { +func (r *StackRunJobReconciler) hasCustomVersion(run *console.StackRunMinimalFragment) bool { return run.Configuration.Version != nil && len(*run.Configuration.Version) > 0 } -func (r *StackReconciler) getVersion(run *console.StackRunMinimalFragment) string { +func (r *StackRunJobReconciler) getVersion(run *console.StackRunMinimalFragment) string { if r.hasCustomVersion(run) { return *run.Configuration.Version } - return defaultContainerVersions[run.Type] + return stackRunDefaultContainerVersions[run.Type] } -func (r *StackReconciler) hasCustomTag(run *console.StackRunMinimalFragment) bool { +func (r *StackRunJobReconciler) hasCustomTag(run *console.StackRunMinimalFragment) bool { return run.Configuration.Tag != nil && len(*run.Configuration.Tag) > 0 } -func (r *StackReconciler) getTag(run *console.StackRunMinimalFragment) string { +func (r *StackRunJobReconciler) getTag(run *console.StackRunMinimalFragment) string { if r.hasCustomTag(run) { return *run.Configuration.Tag } - return defaultImageTag -} - -func (r *StackReconciler) getDefaultContainerEnvFrom(run *console.StackRunMinimalFragment) []corev1.EnvFromSource { - return []corev1.EnvFromSource{ - { - SecretRef: &corev1.SecretEnvSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: GetRunResourceName(run), - }, - }, - }, - } -} - -func ensureDefaultVolumeMounts(mounts []corev1.VolumeMount) []corev1.VolumeMount { - return append( - algorithms.Filter(mounts, func(v corev1.VolumeMount) bool { - switch v.Name { - case defaultJobVolumeName: - case defaultJobTmpVolumeName: - return false - } - - return true - }), - defaultJobContainerVolumeMount, - defaultJobTmpContainerVolumeMount, - ) -} - -func ensureDefaultVolumes(volumes []corev1.Volume) []corev1.Volume { - return append( - algorithms.Filter(volumes, func(v corev1.Volume) bool { - switch v.Name { - case defaultJobVolumeName: - case defaultJobTmpVolumeName: - return false - } - - return true - }), - defaultJobVolume, - defaultJobTmpVolume, - ) -} - -func ensureDefaultPodSecurityContext(psc *corev1.PodSecurityContext) *corev1.PodSecurityContext { - if psc != nil { - return psc - } - - return &corev1.PodSecurityContext{ - RunAsNonRoot: lo.ToPtr(true), - RunAsUser: lo.ToPtr(nonRootUID), - RunAsGroup: lo.ToPtr(nonRootGID), - } -} - -func ensureDefaultContainerSecurityContext(sc *corev1.SecurityContext, run *console.StackRunMinimalFragment) *corev1.SecurityContext { - if sc != nil { - if run != nil && run.Type == console.StackTypeAnsible { - sc.ReadOnlyRootFilesystem = lo.ToPtr(false) - } - return sc - } - - readOnlyRootFilesystem := run == nil || run.Type != console.StackTypeAnsible - - return &corev1.SecurityContext{ - AllowPrivilegeEscalation: lo.ToPtr(false), - ReadOnlyRootFilesystem: lo.ToPtr(readOnlyRootFilesystem), - RunAsNonRoot: lo.ToPtr(true), - RunAsUser: lo.ToPtr(nonRootUID), - RunAsGroup: lo.ToPtr(nonRootGID), - } -} - -func (r *StackReconciler) ensureDefaultContainerResourcesRequests(containers []corev1.Container, run *console.StackRunMinimalFragment) ([]corev1.Container, error) { - if run.JobSpec == nil || run.JobSpec.Requests == nil { - return containers, nil - } - if run.JobSpec.Requests.Requests == nil && run.JobSpec.Requests.Limits == nil { - return containers, nil - } - - for i, container := range containers { - if run.JobSpec.Requests.Requests != nil { - if len(container.Resources.Requests) == 0 { - containers[i].Resources.Requests = map[corev1.ResourceName]resource.Quantity{} - } - if run.JobSpec.Requests.Requests.CPU != nil { - cpu, err := resource.ParseQuantity(*run.JobSpec.Requests.Requests.CPU) - if err != nil { - return nil, err - } - containers[i].Resources.Requests[corev1.ResourceCPU] = cpu - } - if run.JobSpec.Requests.Requests.Memory != nil { - memory, err := resource.ParseQuantity(*run.JobSpec.Requests.Requests.Memory) - if err != nil { - return nil, err - } - containers[i].Resources.Requests[corev1.ResourceMemory] = memory - } - } - if run.JobSpec.Requests.Limits != nil { - if len(container.Resources.Limits) == 0 { - containers[i].Resources.Limits = map[corev1.ResourceName]resource.Quantity{} - } - if run.JobSpec.Requests.Limits.CPU != nil { - cpu, err := resource.ParseQuantity(*run.JobSpec.Requests.Limits.CPU) - if err != nil { - return nil, err - } - containers[i].Resources.Limits[corev1.ResourceCPU] = cpu - } - if run.JobSpec.Requests.Limits.Memory != nil { - memory, err := resource.ParseQuantity(*run.JobSpec.Requests.Limits.Memory) - if err != nil { - return nil, err - } - containers[i].Resources.Limits[corev1.ResourceMemory] = memory - } - } - } - - return containers, nil -} - -func getDefaultContainerImage() string { - return common.GetConfigurationManager().SwapBaseRegistry(defaultContainerImage) + return stackRunDefaultImageTag } diff --git a/pkg/controller/stacks/job_test.go b/internal/controller/stackrunjob_job_test.go similarity index 90% rename from pkg/controller/stacks/job_test.go rename to internal/controller/stackrunjob_job_test.go index b7736e22b..1a5571bb7 100644 --- a/pkg/controller/stacks/job_test.go +++ b/internal/controller/stackrunjob_job_test.go @@ -1,31 +1,31 @@ -package stacks +package controller import ( "fmt" "testing" - "time" + "github.com/pluralsh/deployment-operator/pkg/common" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" console "github.com/pluralsh/console/go/client" + "github.com/pluralsh/deployment-operator/pkg/test/mocks" "github.com/samber/lo" "github.com/stretchr/testify/assert" "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/pluralsh/deployment-operator/pkg/test/mocks" ) const defaultName = "default" func TestGetDefaultContainerImage(t *testing.T) { - var kClient client.Client fakeConsoleClient := mocks.NewClientMock(t) - namespace := "default" - reconciler := NewStackReconciler(fakeConsoleClient, kClient, scheme.Scheme, time.Minute, 0, namespace, "", "") + reconciler := StackRunJobReconciler{ + ConsoleClient: fakeConsoleClient, + Scheme: scheme.Scheme, + } + cases := []struct { name string run *console.StackRunMinimalFragment @@ -113,11 +113,9 @@ func TestGetDefaultContainerImage(t *testing.T) { } func TestGenerateRunJob(t *testing.T) { - var kClient client.Client - fakeConsoleClient := mocks.NewClientMock(t) namespace := defaultName runID := "1" - reconciler := NewStackReconciler(fakeConsoleClient, kClient, scheme.Scheme, time.Minute, 0, namespace, "", "") + reconciler := StackRunJobReconciler{} cases := []struct { name string run *console.StackRunMinimalFragment @@ -221,8 +219,8 @@ func TestGenerateRunJob(t *testing.T) { for _, test := range cases { t.Run(test.name, func(t *testing.T) { - name := GetRunResourceName(test.run) - jobSpec := getRunJobSpec(name, test.run.JobSpec) + name := reconciler.GetRunResourceName(test.run) + jobSpec := common.GetRunJobSpec(name, test.run.JobSpec) job, err := reconciler.GenerateRunJob(test.run, jobSpec, test.name, namespace) assert.Nil(t, err) assert.NotNil(t, job) @@ -232,13 +230,15 @@ func TestGenerateRunJob(t *testing.T) { } func genDefaultJobSpec(namespace, name, runID string) batchv1.JobSpec { + r := StackRunJobReconciler{} + run := &console.StackRunMinimalFragment{Type: console.StackTypeTerraform} return batchv1.JobSpec{ Template: corev1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: namespace, Labels: map[string]string{}, - Annotations: map[string]string{podDefaultContainerAnnotation: DefaultJobContainer}, + Annotations: map[string]string{podDefaultContainerAnnotation: "default"}, }, Spec: corev1.PodSpec{ Containers: []corev1.Container{ @@ -257,19 +257,19 @@ func genDefaultJobSpec(namespace, name, runID string) batchv1.JobSpec { }, Env: make([]corev1.EnvVar, 0), Resources: corev1.ResourceRequirements{}, - VolumeMounts: ensureDefaultVolumeMounts(nil), + VolumeMounts: r.ensureDefaultVolumeMounts(nil), TerminationMessagePath: "", TerminationMessagePolicy: "", ImagePullPolicy: "", - SecurityContext: ensureDefaultContainerSecurityContext(nil, nil), + SecurityContext: r.ensureDefaultContainerSecurityContext(nil, run), Stdin: false, StdinOnce: false, TTY: false, }, }, RestartPolicy: corev1.RestartPolicyNever, - Volumes: ensureDefaultVolumes(nil), - SecurityContext: ensureDefaultPodSecurityContext(nil), + Volumes: r.ensureDefaultVolumes(nil), + SecurityContext: r.ensureDefaultPodSecurityContext(nil), }, }, TTLSecondsAfterFinished: lo.ToPtr(int32(60 * 60)), diff --git a/internal/controller/stackrunjob_secret.go b/internal/controller/stackrunjob_secret.go new file mode 100644 index 000000000..bcc768d23 --- /dev/null +++ b/internal/controller/stackrunjob_secret.go @@ -0,0 +1,66 @@ +package controller + +import ( + "context" + + "github.com/pluralsh/deployment-operator/api/v1alpha1" + corev1 "k8s.io/api/core/v1" + apierrs "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +const ( + envStackRunID = "PLRL_STACK_RUN_ID" +) + +func (r *StackRunJobReconciler) getRunSecretData(runID string) map[string]string { + return map[string]string{ + envConsoleURL: r.ConsoleURL, + envConsoleToken: r.DeployToken, + envStackRunID: runID, + } +} + +func (r *StackRunJobReconciler) hasRunSecretData(data map[string][]byte, runID string) bool { + token, hasToken := data[envConsoleToken] + url, hasUrl := data[envConsoleURL] + id, hasID := data[envStackRunID] + return hasToken && hasUrl && hasID && + string(token) == r.DeployToken && string(url) == r.ConsoleURL && string(id) == runID +} + +func (r *StackRunJobReconciler) reconcileSecret(ctx context.Context, run *v1alpha1.StackRunJob) (*corev1.Secret, error) { + logger := log.FromContext(ctx) + + secret := &corev1.Secret{} + if err := r.Get(ctx, types.NamespacedName{Name: run.Name, Namespace: run.Namespace}, secret); err != nil { + if !apierrs.IsNotFound(err) { + return nil, err + } + + secret = &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: run.Name, Namespace: run.Namespace}, + StringData: r.getRunSecretData(run.Spec.RunID), + } + logger.V(2).Info("creating secret", "namespace", secret.Namespace, "name", secret.Name) + if err := r.Create(ctx, secret); err != nil { + logger.Error(err, "unable to create secret") + return nil, err + } + + return secret, nil + } + + if !r.hasRunSecretData(secret.Data, run.Spec.RunID) { + logger.V(2).Info("updating secret", "namespace", secret.Namespace, "name", secret.Name) + secret.StringData = r.getRunSecretData(run.Spec.RunID) + if err := r.Update(ctx, secret); err != nil { + logger.Error(err, "unable to update secret") + return nil, err + } + } + + return secret, nil +} diff --git a/pkg/common/job.go b/pkg/common/job.go new file mode 100644 index 000000000..bea37212c --- /dev/null +++ b/pkg/common/job.go @@ -0,0 +1,43 @@ +package common + +import ( + console "github.com/pluralsh/console/go/client" + consoleclient "github.com/pluralsh/deployment-operator/pkg/client" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func GetRunJobSpec(name string, jobSpecFragment *console.JobSpecFragment) *batchv1.JobSpec { + if jobSpecFragment == nil { + return nil + } + var jobSpec *batchv1.JobSpec + var err error + if jobSpecFragment.Raw != nil && *jobSpecFragment.Raw != "null" { + jobSpec, err = consoleclient.JobSpecFromYaml(*jobSpecFragment.Raw) + if err != nil { + return nil + } + } else { + jobSpec = &batchv1.JobSpec{ + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: jobSpecFragment.Namespace, + Labels: consoleclient.StringMapFromInterfaceMap(jobSpecFragment.Labels), + Annotations: consoleclient.StringMapFromInterfaceMap(jobSpecFragment.Annotations), + }, + Spec: corev1.PodSpec{ + Containers: consoleclient.ContainersFromContainerSpecFragments(name, jobSpecFragment.Containers, jobSpecFragment.Requests), + }, + }, + } + + if jobSpecFragment.ServiceAccount != nil { + jobSpec.Template.Spec.ServiceAccountName = *jobSpecFragment.ServiceAccount + } + } + + return jobSpec +} diff --git a/pkg/controller/sentinel/reconciler.go b/pkg/controller/sentinel/reconciler.go index f383960d5..30659a09b 100644 --- a/pkg/controller/sentinel/reconciler.go +++ b/pkg/controller/sentinel/reconciler.go @@ -6,10 +6,16 @@ import ( "time" console "github.com/pluralsh/console/go/client" + "github.com/pluralsh/deployment-operator/api/v1alpha1" "github.com/pluralsh/deployment-operator/pkg/streamline" "github.com/pluralsh/polly/algorithms" "github.com/pluralsh/polly/cache" + "github.com/samber/lo" + batchv1 "k8s.io/api/batch/v1" + apierrs "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/util/workqueue" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" @@ -18,6 +24,7 @@ import ( clienterrors "github.com/pluralsh/deployment-operator/internal/errors" "github.com/pluralsh/deployment-operator/internal/utils" "github.com/pluralsh/deployment-operator/pkg/client" + pkgcommon "github.com/pluralsh/deployment-operator/pkg/common" "github.com/pluralsh/deployment-operator/pkg/controller/common" "github.com/pluralsh/deployment-operator/pkg/websocket" ) @@ -150,6 +157,56 @@ func (r *SentinelReconciler) Reconcile(ctx context.Context, id string) (reconcil if run.Status != console.SentinelRunJobStatusPending { return reconcile.Result{}, nil } - _, err = r.reconcileRunJob(ctx, run) - return reconcile.Result{}, err + + return reconcile.Result{}, r.reconcileSentinelRunJobCR(ctx, run) +} + +func (r *SentinelReconciler) reconcileSentinelRunJobCR(ctx context.Context, run *console.SentinelRunJobFragment) error { + logger := log.FromContext(ctx) + + name := GetRunResourceName(run) + namespace := r.GetRunResourceNamespace(pkgcommon.GetRunJobSpec(name, run.JobSpec)) + + if err := r.namespaceCache.EnsureNamespace(ctx, namespace, &console.ServiceDeploymentForAgent_SyncConfig{ + CreateNamespace: lo.ToPtr(true), + }); err != nil { + return err + } + cr := &v1alpha1.SentinelRunJob{} + if err := r.k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, cr); err != nil { + if !apierrs.IsNotFound(err) { + return err + } + cr = &v1alpha1.SentinelRunJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: v1alpha1.SentinelRunJobSpec{ + RunID: run.ID, + }, + } + + logger.Info("creating SentinelRunJob CR", "name", name, "namespace", namespace, "runID", run.ID) + return r.k8sClient.Create(ctx, cr) + } + return nil +} + +// GetRunResourceName returns a resource name used for a job and a secret connected to a given run. +func GetRunResourceName(run *console.SentinelRunJobFragment) string { + return fmt.Sprintf("sentinel-%s", run.ID) +} + +// GetRunResourceNamespace returns a resource namespace used for a job and a secret connected to a given run. +func (r *SentinelReconciler) GetRunResourceNamespace(jobSpec *batchv1.JobSpec) (namespace string) { + if jobSpec != nil { + namespace = jobSpec.Template.Namespace + } + + if namespace == "" { + namespace = r.namespace + } + + return } diff --git a/pkg/controller/stacks/reconciler.go b/pkg/controller/stacks/reconciler.go index fccf4c452..8f207182c 100644 --- a/pkg/controller/stacks/reconciler.go +++ b/pkg/controller/stacks/reconciler.go @@ -5,8 +5,12 @@ import ( "fmt" "time" - configuration "github.com/pluralsh/deployment-operator/pkg/common" + "github.com/pluralsh/deployment-operator/api/v1alpha1" "github.com/pluralsh/polly/cache" + batchv1 "k8s.io/api/batch/v1" + apierrs "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" console "github.com/pluralsh/console/go/client" "github.com/pluralsh/polly/algorithms" @@ -19,6 +23,7 @@ import ( clienterrors "github.com/pluralsh/deployment-operator/internal/errors" "github.com/pluralsh/deployment-operator/internal/utils" "github.com/pluralsh/deployment-operator/pkg/client" + pkgcommon "github.com/pluralsh/deployment-operator/pkg/common" "github.com/pluralsh/deployment-operator/pkg/controller/common" "github.com/pluralsh/deployment-operator/pkg/websocket" ) @@ -75,7 +80,7 @@ func (r *StackReconciler) Shutdown() { func (r *StackReconciler) GetPollInterval() func() time.Duration { return func() time.Duration { - if stackPollInterval := configuration.GetConfigurationManager().GetStackPollInterval(); stackPollInterval != nil { + if stackPollInterval := pkgcommon.GetConfigurationManager().GetStackPollInterval(); stackPollInterval != nil { return *stackPollInterval } return r.pollInterval @@ -153,6 +158,49 @@ func (r *StackReconciler) Reconcile(ctx context.Context, id string) (reconcile.R if stackRun.Status != console.StackStatusPending { return reconcile.Result{}, nil } - _, err = r.reconcileRunJob(ctx, stackRun) - return reconcile.Result{}, err + + return reconcile.Result{}, r.reconcileStackRunJobCR(ctx, stackRun) +} + +func (r *StackReconciler) reconcileStackRunJobCR(ctx context.Context, run *console.StackRunMinimalFragment) error { + logger := log.FromContext(ctx) + name := GetRunResourceName(run) + namespace := r.GetRunResourceNamespace(pkgcommon.GetRunJobSpec(name, run.JobSpec)) + cr := &v1alpha1.StackRunJob{} + if err := r.k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, cr); err != nil { + if !apierrs.IsNotFound(err) { + return err + } + cr = &v1alpha1.StackRunJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: v1alpha1.StackRunJobSpec{ + RunID: run.ID, + }, + } + + logger.Info("creating StackRunJob CR", "name", name, "namespace", namespace, "runID", run.ID) + return r.k8sClient.Create(ctx, cr) + } + return nil +} + +// GetRunResourceName returns a resource name used for a job and a secret connected to a given run. +func GetRunResourceName(run *console.StackRunMinimalFragment) string { + return fmt.Sprintf("stack-%s", run.ID) +} + +// GetRunResourceNamespace returns a resource namespace used for a job and a secret connected to a given run. +func (r *StackReconciler) GetRunResourceNamespace(jobSpec *batchv1.JobSpec) (namespace string) { + if jobSpec != nil { + namespace = jobSpec.Template.Namespace + } + + if namespace == "" { + namespace = r.namespace + } + + return } diff --git a/pkg/controller/stacks/reconciler_test.go b/pkg/controller/stacks/reconciler_test.go index c457ebae0..9a5abc966 100644 --- a/pkg/controller/stacks/reconciler_test.go +++ b/pkg/controller/stacks/reconciler_test.go @@ -7,8 +7,9 @@ import ( "github.com/Yamashou/gqlgenc/clientv2" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - console "github.com/pluralsh/console/go/client" - "github.com/samber/lo" + errors2 "github.com/pluralsh/deployment-operator/internal/errors" + "github.com/pluralsh/deployment-operator/pkg/controller/stacks" + "github.com/pluralsh/deployment-operator/pkg/test/mocks" "github.com/stretchr/testify/mock" "github.com/vektah/gqlparser/v2/gqlerror" batchv1 "k8s.io/api/batch/v1" @@ -17,11 +18,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/yaml" - - errors2 "github.com/pluralsh/deployment-operator/internal/errors" - "github.com/pluralsh/deployment-operator/pkg/controller/stacks" - "github.com/pluralsh/deployment-operator/pkg/test/mocks" ) var _ = Describe("Reconciler", Ordered, func() { @@ -48,7 +44,7 @@ var _ = Describe("Reconciler", Ordered, func() { Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ Containers: []corev1.Container{{ - Name: stacks.DefaultJobContainer, + Name: "default", Image: "image:v1.0.0", Args: []string{}, }}, @@ -93,140 +89,5 @@ var _ = Describe("Reconciler", Ordered, func() { Expect(err.Error()).To(ContainSubstring("unknown error")) }) - It("should exit without errors as job is already created", func() { - fakeConsoleClient := mocks.NewClientMock(mocks.TestingT) - fakeConsoleClient.On("GetStackRun", mock.Anything).Return(&console.StackRunMinimalFragment{ - ID: stackRunId, - Approval: lo.ToPtr(false), - Status: console.StackStatusPending, - }, nil) - - reconciler := stacks.NewStackReconciler(fakeConsoleClient, kClient, scheme.Scheme, time.Minute, 0, namespace, "", "") - - _, err := reconciler.Reconcile(ctx, stackRunId) - Expect(err).NotTo(HaveOccurred()) - }) - - It("should create new job with default values", func() { - stackRunId := "default-values" - stackRun := &console.StackRunMinimalFragment{ - ID: stackRunId, - Approval: lo.ToPtr(false), - Status: console.StackStatusPending, - } - - fakeConsoleClient := mocks.NewClientMock(mocks.TestingT) - fakeConsoleClient.On("GetStackRun", mock.Anything).Return(stackRun, nil) - fakeConsoleClient.On("UpdateStackRun", mock.Anything, mock.Anything).Return(nil) - - reconciler := stacks.NewStackReconciler(fakeConsoleClient, kClient, scheme.Scheme, time.Minute, 0, namespace, "", "") - - _, err := reconciler.Reconcile(ctx, stackRunId) - Expect(err).NotTo(HaveOccurred()) - - job := &batchv1.Job{} - Expect(kClient.Get(ctx, types.NamespacedName{Name: stacks.GetRunResourceName(stackRun), Namespace: namespace}, job)).NotTo(HaveOccurred()) - Expect(*job.Spec.BackoffLimit).To(Equal(int32(0))) - Expect(job.Spec.Template.Spec.Containers).To(HaveLen(1)) - Expect(job.Spec.Template.Spec.Volumes).To(HaveLen(2)) - Expect(kClient.Delete(ctx, job)).To(Succeed()) - }) - - It("should create new job based on user-defined spec", func() { - labelsValue := "labels-123" - annotationsValue := "annotations-123" - stackRunId := "user-defined-spec" - stackRun := &console.StackRunMinimalFragment{ - ID: stackRunId, - JobSpec: &console.JobSpecFragment{ - Namespace: namespace, - Containers: []*console.ContainerSpecFragment{{ - Image: "test", - Args: []*string{lo.ToPtr("arg1"), lo.ToPtr("arg2")}, - }, { - Image: "test2", - Args: []*string{lo.ToPtr("arg1")}, - }}, - Labels: map[string]any{"test": labelsValue}, - Annotations: map[string]any{"test": annotationsValue}, - ServiceAccount: lo.ToPtr("test-sa"), - }, - Status: console.StackStatusPending, - } - - fakeConsoleClient := mocks.NewClientMock(mocks.TestingT) - fakeConsoleClient.On("GetStackRun", mock.Anything).Return(stackRun, nil) - fakeConsoleClient.On("UpdateStackRun", mock.Anything, mock.Anything).Return(nil) - - reconciler := stacks.NewStackReconciler(fakeConsoleClient, kClient, scheme.Scheme, time.Minute, 0, namespace, "", "") - - _, err := reconciler.Reconcile(ctx, stackRunId) - Expect(err).NotTo(HaveOccurred()) - - job := &batchv1.Job{} - Expect(kClient.Get(ctx, types.NamespacedName{Name: stacks.GetRunResourceName(stackRun), Namespace: namespace}, job)).NotTo(HaveOccurred()) - Expect(*job.Spec.BackoffLimit).To(Equal(int32(0))) - Expect(job.Spec.Template.Spec.Containers).To(HaveLen(3)) - Expect(job.Spec.Template.ObjectMeta.Labels).To(ContainElement(labelsValue)) - Expect(job.Spec.Template.ObjectMeta.Annotations).To(ContainElement(annotationsValue)) - Expect(job.Spec.Template.Spec.ServiceAccountName).To(Equal(*stackRun.JobSpec.ServiceAccount)) - Expect(job.Spec.Template.Spec.Volumes).To(HaveLen(2)) - Expect(kClient.Delete(ctx, job)).To(Succeed()) - }) - - It("should create new job based on user-defined raw spec", func() { - jobSpec := batchv1.JobSpec{ - ActiveDeadlineSeconds: lo.ToPtr(int64(60)), - BackoffLimit: lo.ToPtr(int32(3)), - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{}, - Spec: corev1.PodSpec{ - Volumes: []corev1.Volume{ - { - Name: "test", - VolumeSource: corev1.VolumeSource{ - EmptyDir: &corev1.EmptyDirVolumeSource{}, - }, - }, - }, - Containers: []corev1.Container{{ - Name: stacks.DefaultJobContainer, - Image: "image:v1.0.0", - }}, - ServiceAccountName: "test-sa", - }, - }, - } - marshalledJobSpec, err := yaml.Marshal(jobSpec) - Expect(err).NotTo(HaveOccurred()) - - stackRunId := "user-defined-raw-spec" - stackRun := &console.StackRunMinimalFragment{ - ID: stackRunId, - JobSpec: &console.JobSpecFragment{ - Namespace: "", - Raw: lo.ToPtr(string(marshalledJobSpec)), - }, - Status: console.StackStatusPending, - } - - fakeConsoleClient := mocks.NewClientMock(mocks.TestingT) - fakeConsoleClient.On("GetStackRun", mock.Anything).Return(stackRun, nil) - fakeConsoleClient.On("UpdateStackRun", mock.Anything, mock.Anything).Return(nil) - - reconciler := stacks.NewStackReconciler(fakeConsoleClient, kClient, scheme.Scheme, time.Minute, 0, namespace, "", "") - - _, err = reconciler.Reconcile(ctx, stackRunId) - Expect(err).NotTo(HaveOccurred()) - - job := &batchv1.Job{} - Expect(kClient.Get(ctx, types.NamespacedName{Name: stacks.GetRunResourceName(stackRun), Namespace: namespace}, job)).NotTo(HaveOccurred()) - Expect(*job.Spec.ActiveDeadlineSeconds).To(Equal(*jobSpec.ActiveDeadlineSeconds)) - Expect(*job.Spec.BackoffLimit).To(Equal(int32(0))) // Overridden by controller. - Expect(job.Spec.Template.Spec.ServiceAccountName).To(Equal(jobSpec.Template.Spec.ServiceAccountName)) - Expect(job.Spec.Template.Spec.Containers).To(HaveLen(1)) // Merged by controller as default container was specified. - Expect(job.Spec.Template.Spec.Volumes).To(HaveLen(3)) - Expect(kClient.Delete(ctx, job)).To(Succeed()) - }) }) }) diff --git a/pkg/controller/stacks/secret.go b/pkg/controller/stacks/secret.go deleted file mode 100644 index 1c8174627..000000000 --- a/pkg/controller/stacks/secret.go +++ /dev/null @@ -1,67 +0,0 @@ -package stacks - -import ( - "context" - - corev1 "k8s.io/api/core/v1" - apierrs "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/log" -) - -const ( - envConsoleURL = "PLRL_CONSOLE_URL" - envConsoleToken = "PLRL_CONSOLE_TOKEN" - envStackRunID = "PLRL_STACK_RUN_ID" -) - -func (r *StackReconciler) getRunSecretData(runID string) map[string]string { - return map[string]string{ - envConsoleURL: r.consoleURL, - envConsoleToken: r.deployToken, - envStackRunID: runID, - } -} - -func (r *StackReconciler) hasRunSecretData(data map[string][]byte, runID string) bool { - token, hasToken := data[envConsoleToken] - url, hasUrl := data[envConsoleURL] - id, hasID := data[envConsoleURL] - return hasToken && hasUrl && hasID && - string(token) == r.deployToken && string(url) == r.consoleURL && string(id) == runID -} - -func (r *StackReconciler) upsertRunSecret(ctx context.Context, name, namespace, runID string) (*corev1.Secret, error) { - logger := log.FromContext(ctx) - - secret := &corev1.Secret{} - if err := r.k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, secret); err != nil { - if !apierrs.IsNotFound(err) { - return nil, err - } - - secret = &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: namespace}, - StringData: r.getRunSecretData(runID), - } - logger.V(2).Info("creating secret", "namespace", secret.Namespace, "name", secret.Name) - if err := r.k8sClient.Create(ctx, secret); err != nil { - logger.Error(err, "unable to create secret") - return nil, err - } - - return secret, nil - } - - if !r.hasRunSecretData(secret.Data, runID) { - logger.V(2).Info("updating secret", "namespace", secret.Namespace, "name", secret.Name) - secret.StringData = r.getRunSecretData(runID) - if err := r.k8sClient.Update(ctx, secret); err != nil { - logger.Error(err, "unable to update secret") - return nil, err - } - } - - return secret, nil -} diff --git a/test/mixed/kustomize/liquid/dev/kustomization.yaml b/test/mixed/kustomize/liquid/dev/kustomization.yaml index ef2c014f0..2b01f31ff 100644 --- a/test/mixed/kustomize/liquid/dev/kustomization.yaml +++ b/test/mixed/kustomize/liquid/dev/kustomization.yaml @@ -11,7 +11,7 @@ nameSuffix: -dev configMapGenerator: - literals: - username=demo-user - name: + name: nginx secretGenerator: - literals: diff --git a/test/mixed/raw/pod.yaml b/test/mixed/raw/pod.yaml index 2086ffbe0..efe5bd384 100644 --- a/test/mixed/raw/pod.yaml +++ b/test/mixed/raw/pod.yaml @@ -1,7 +1,7 @@ apiVersion: v1 kind: Pod metadata: - name: + name: nginx spec: containers: - name: nginx