diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/lifecycle.go b/images/virtualization-artifact/pkg/controller/vm/internal/lifecycle.go index 699ef35652..b95e5295ad 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/lifecycle.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/lifecycle.go @@ -23,6 +23,8 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/labels" virtv1 "kubevirt.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -101,7 +103,7 @@ func (h *LifeCycleHandler) Handle(ctx context.Context, s state.VirtualMachineSta log := logger.FromContext(ctx).With(logger.SlogHandler(nameLifeCycleHandler)) - h.syncRunning(changed, kvvm, kvvmi, pod, log) + h.syncRunning(ctx, changed, kvvm, kvvmi, pod, log) return reconcile.Result{}, nil } @@ -109,7 +111,7 @@ func (h *LifeCycleHandler) Name() string { return nameLifeCycleHandler } -func (h *LifeCycleHandler) syncRunning(vm *v1alpha2.VirtualMachine, kvvm *virtv1.VirtualMachine, kvvmi *virtv1.VirtualMachineInstance, pod *corev1.Pod, log *slog.Logger) { +func (h *LifeCycleHandler) syncRunning(ctx context.Context, vm *v1alpha2.VirtualMachine, kvvm *virtv1.VirtualMachine, kvvmi *virtv1.VirtualMachineInstance, pod *corev1.Pod, log *slog.Logger) { cb := conditions.NewConditionBuilder(vmcondition.TypeRunning).Generation(vm.GetGeneration()) if pod != nil && pod.Status.Message != "" { @@ -120,6 +122,14 @@ func (h *LifeCycleHandler) syncRunning(vm *v1alpha2.VirtualMachine, kvvm *virtv1 return } + if volumeError := h.checkPodVolumeErrors(ctx, vm, log); volumeError != nil { + cb.Status(metav1.ConditionFalse). + Reason(vmcondition.ReasonPodNotStarted). + Message(volumeError.Error()) + conditions.SetCondition(cb, &vm.Status.Conditions) + return + } + if kvvm != nil { podScheduled := service.GetKVVMCondition(string(corev1.PodScheduled), kvvm.Status.Conditions) if podScheduled != nil && podScheduled.Status == corev1.ConditionFalse { @@ -202,3 +212,64 @@ func (h *LifeCycleHandler) syncRunning(vm *v1alpha2.VirtualMachine, kvvm *virtv1 cb.Reason(vmcondition.ReasonVmIsNotRunning).Status(metav1.ConditionFalse) conditions.SetCondition(cb, &vm.Status.Conditions) } + +func (h *LifeCycleHandler) checkPodVolumeErrors(ctx context.Context, vm *v1alpha2.VirtualMachine, log *slog.Logger) error { + var podList corev1.PodList + err := h.client.List(ctx, &podList, &client.ListOptions{ + Namespace: vm.Namespace, + LabelSelector: labels.SelectorFromSet(map[string]string{ + virtv1.VirtualMachineNameLabel: vm.Name, + }), + }) + if err != nil { + log.Error("Failed to list pods", "error", err) + return nil + } + + if len(podList.Items) == 0 { + return nil + } + + for _, pod := range podList.Items { + if pod.Status.Phase != corev1.PodPending { + continue + } + + hasContainerCreating := false + for _, cs := range pod.Status.ContainerStatuses { + if cs.State.Waiting != nil && cs.State.Waiting.Reason == "ContainerCreating" { + hasContainerCreating = true + break + } + } + + if !hasContainerCreating { + continue + } + + eventList := &corev1.EventList{} + err := h.client.List(ctx, eventList, &client.ListOptions{ + Namespace: pod.Namespace, + FieldSelector: fields.SelectorFromSet(fields.Set{ + "involvedObject.name": pod.Name, + "involvedObject.kind": "Pod", + }), + }) + if err != nil { + log.Error("Failed to list pod events", "error", err) + continue + } + + for _, event := range eventList.Items { + if event.Type != corev1.EventTypeWarning { + continue + } + + if event.Reason == "FailedAttachVolume" || event.Reason == "FailedMount" { + return fmt.Errorf("%s: %s", event.Reason, event.Message) + } + } + } + + return nil +} diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/network.go b/images/virtualization-artifact/pkg/controller/vm/internal/network.go index e50ff5b0b0..bcceb918a6 100644 --- a/images/virtualization-artifact/pkg/controller/vm/internal/network.go +++ b/images/virtualization-artifact/pkg/controller/vm/internal/network.go @@ -168,7 +168,11 @@ func extractNetworkStatusFromPods(pods *corev1.PodList) (string, error) { networkStatusAnnotation, found := pod.Annotations[annotations.AnnNetworksStatus] if !found { - errorMessages = append(errorMessages, "Cannot determine the status of additional interfaces, waiting for a response from the SDN module") + if pod.Status.Phase == corev1.PodRunning { + errorMessages = append(errorMessages, "Cannot determine the status of additional interfaces, waiting for a response from the SDN module") + } else { + errorMessages = append(errorMessages, "Waiting for virt-launcher pod to start") + } continue } diff --git a/images/virtualization-artifact/pkg/controller/vm/internal/watcher/volumeevent_watcher.go b/images/virtualization-artifact/pkg/controller/vm/internal/watcher/volumeevent_watcher.go new file mode 100644 index 0000000000..79ae2d2f45 --- /dev/null +++ b/images/virtualization-artifact/pkg/controller/vm/internal/watcher/volumeevent_watcher.go @@ -0,0 +1,99 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package watcher + +import ( + "context" + "fmt" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + virtv1 "kubevirt.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" +) + +func NewVolumeEventWatcher(client client.Client) *VolumeEventWatcher { + return &VolumeEventWatcher{ + client: client, + } +} + +type VolumeEventWatcher struct { + client client.Client +} + +func (w *VolumeEventWatcher) Watch(mgr manager.Manager, ctr controller.Controller) error { + if err := ctr.Watch( + source.Kind( + mgr.GetCache(), + &corev1.Event{}, + handler.TypedEnqueueRequestsFromMapFunc(func(ctx context.Context, e *corev1.Event) []reconcile.Request { + if e.InvolvedObject.Kind != "Pod" { + return nil + } + + if e.Reason != "FailedAttachVolume" && e.Reason != "FailedMount" { + return nil + } + + pod := &corev1.Pod{} + if err := w.client.Get(ctx, types.NamespacedName{ + Name: e.InvolvedObject.Name, + Namespace: e.InvolvedObject.Namespace, + }, pod); err != nil { + return nil + } + + vmName, hasLabel := pod.GetLabels()[virtv1.VirtualMachineNameLabel] + if !hasLabel { + return nil + } + + return []reconcile.Request{ + { + NamespacedName: types.NamespacedName{ + Name: vmName, + Namespace: pod.GetNamespace(), + }, + }, + } + }), + predicate.TypedFuncs[*corev1.Event]{ + CreateFunc: func(e event.TypedCreateEvent[*corev1.Event]) bool { + return e.Object.Type == corev1.EventTypeWarning && + (e.Object.Reason == "FailedAttachVolume" || e.Object.Reason == "FailedMount") + }, + UpdateFunc: func(e event.TypedUpdateEvent[*corev1.Event]) bool { + return false + }, + DeleteFunc: func(e event.TypedDeleteEvent[*corev1.Event]) bool { + return false + }, + }, + ), + ); err != nil { + return fmt.Errorf("error setting watch on Event: %w", err) + } + return nil +} diff --git a/images/virtualization-artifact/pkg/controller/vm/vm_controller.go b/images/virtualization-artifact/pkg/controller/vm/vm_controller.go index 1cd2ad4433..f86a3a6fe9 100644 --- a/images/virtualization-artifact/pkg/controller/vm/vm_controller.go +++ b/images/virtualization-artifact/pkg/controller/vm/vm_controller.go @@ -20,8 +20,10 @@ import ( "context" "time" + corev1 "k8s.io/api/core/v1" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/metrics" @@ -43,6 +45,24 @@ const ( ControllerName = "vm-controller" ) +func setupEventIndexes(ctx context.Context, mgr manager.Manager) error { + if err := mgr.GetFieldIndexer().IndexField(ctx, &corev1.Event{}, "involvedObject.name", func(obj client.Object) []string { + event := obj.(*corev1.Event) + return []string{event.InvolvedObject.Name} + }); err != nil { + return err + } + + if err := mgr.GetFieldIndexer().IndexField(ctx, &corev1.Event{}, "involvedObject.kind", func(obj client.Object) []string { + event := obj.(*corev1.Event) + return []string{event.InvolvedObject.Kind} + }); err != nil { + return err + } + + return nil +} + func SetupController( ctx context.Context, mgr manager.Manager, @@ -50,6 +70,10 @@ func SetupController( dvcrSettings *dvcr.Settings, firmwareImage string, ) error { + if err := setupEventIndexes(ctx, mgr); err != nil { + return err + } + recorder := eventrecord.NewEventRecorderLogger(mgr, ControllerName) mgrCache := mgr.GetCache() client := mgr.GetClient() diff --git a/images/virtualization-artifact/pkg/controller/vm/vm_reconciler.go b/images/virtualization-artifact/pkg/controller/vm/vm_reconciler.go index 92d0baaa20..aa5c4488a9 100644 --- a/images/virtualization-artifact/pkg/controller/vm/vm_reconciler.go +++ b/images/virtualization-artifact/pkg/controller/vm/vm_reconciler.go @@ -65,6 +65,7 @@ func (r *Reconciler) SetupController(_ context.Context, mgr manager.Manager, ctr watcher.NewKVVMWatcher(), watcher.NewKVVMIWatcher(), watcher.NewPodWatcher(), + watcher.NewVolumeEventWatcher(mgr.GetClient()), watcher.NewVirtualImageWatcher(mgr.GetClient()), watcher.NewClusterVirtualImageWatcher(mgr.GetClient()), watcher.NewVirtualDiskWatcher(mgr.GetClient()), diff --git a/templates/virtualization-controller/rbac-for-us.yaml b/templates/virtualization-controller/rbac-for-us.yaml index 73acbabd5a..cb742ce14b 100644 --- a/templates/virtualization-controller/rbac-for-us.yaml +++ b/templates/virtualization-controller/rbac-for-us.yaml @@ -101,6 +101,8 @@ rules: verbs: - create - patch + - list + - watch - apiGroups: - cdi.internal.virtualization.deckhouse.io resources: