@@ -670,8 +670,8 @@ func (self *KubeClient) GetSimpleHealthStatus(ctx context.Context, namespace str
670670 hasCrashing := false
671671 hasPending := false
672672 hasTerminating := false
673- readyCount := 0
674- allInstances := make ([]SimpleInstanceStatus , 0 )
673+ readyPodCount := 0
674+ allInstances := make ([]SimpleInstanceStatus , 0 , len ( podStatuses ) )
675675
676676 for _ , podStatus := range podStatuses {
677677 // Check if pod itself is terminating
@@ -684,75 +684,138 @@ func (self *KubeClient) GetSimpleHealthStatus(ctx context.Context, namespace str
684684 hasCrashing = true
685685 }
686686
687+ // For grouping logic - track pod-level health
688+ podHasCrashing := false
689+ podHasPending := false
690+ podHasTerminating := false
691+ podReadyContainers := 0
692+ podTotalMainContainers := len (podStatus .Instances )
693+ var podState ContainerState
694+ var podEvents []models.EventRecord
695+ var maxRestartCount int32
696+
697+ // Process main containers
687698 for _ , instance := range podStatus .Instances {
688- allInstances = append ( allInstances , SimpleInstanceStatus {
689- KubernetesName : instance .KubernetesName ,
690- Status : instance . State ,
691- RestartCount : instance . RestartCount ,
692- PodCreatedAt : instance .PodCreatedAt ,
693- Events : instance .Events ,
694- })
699+ // Collect all events from all containers in this pod
700+ podEvents = append ( podEvents , instance .Events ... )
701+
702+ // Track highest restart count
703+ if instance .RestartCount > maxRestartCount {
704+ maxRestartCount = instance .RestartCount
705+ }
695706
696- // Count ready instances and detect pending/crashing/terminating states
707+ // Track pod-level states
697708 switch instance .State {
698709 case ContainerStateCrashing :
710+ podHasCrashing = true
699711 hasCrashing = true
712+ podState = ContainerStateCrashing // Crashing takes precedence
700713 case ContainerStateTerminating :
714+ podHasTerminating = true
701715 hasTerminating = true
716+ if podState != ContainerStateCrashing {
717+ podState = ContainerStateTerminating
718+ }
702719 case ContainerStateRunning :
703720 if instance .Ready {
704- readyCount ++
721+ podReadyContainers ++
705722 } else {
723+ podHasPending = true
706724 hasPending = true
725+ if podState != ContainerStateCrashing && podState != ContainerStateTerminating {
726+ podState = ContainerStateNotReady
727+ }
707728 }
708729 case ContainerStateNotReady , ContainerStateWaiting , ContainerStateStarting , ContainerStateImagePullError :
730+ podHasPending = true
709731 hasPending = true
732+ if podState != ContainerStateCrashing && podState != ContainerStateTerminating {
733+ podState = instance .State
734+ }
710735 case ContainerStateTerminated :
711736 // Terminated containers might be crashing if they have restart counts or failed
712737 if instance .IsCrashing {
738+ podHasCrashing = true
713739 hasCrashing = true
740+ podState = ContainerStateCrashing
741+ } else if podState == "" {
742+ podState = ContainerStateTerminated
714743 }
715744 }
716745 }
717746
718- // Also check init containers
747+ // Process init containers but filter out terminated ones
719748 for _ , instance := range podStatus .InstanceDependencies {
720- allInstances = append (allInstances , SimpleInstanceStatus {
721- KubernetesName : instance .KubernetesName ,
722- Status : instance .State ,
723- RestartCount : instance .RestartCount ,
724- PodCreatedAt : instance .PodCreatedAt ,
725- Events : instance .Events ,
726- })
749+ // Skip terminated init containers as they're expected to be terminated after successful completion
750+ if instance .State == ContainerStateTerminated && ! instance .IsCrashing {
751+ continue
752+ }
753+
754+ // Collect events from init containers that are still relevant
755+ podEvents = append (podEvents , instance .Events ... )
756+
757+ // Track highest restart count including init containers
758+ if instance .RestartCount > maxRestartCount {
759+ maxRestartCount = instance .RestartCount
760+ }
727761
728762 // Init containers failing can affect overall health
729763 switch instance .State {
730764 case ContainerStateCrashing :
765+ podHasCrashing = true
731766 hasCrashing = true
767+ podState = ContainerStateCrashing
732768 case ContainerStateTerminating :
769+ podHasTerminating = true
733770 hasTerminating = true
771+ if podState != ContainerStateCrashing {
772+ podState = ContainerStateTerminating
773+ }
734774 case ContainerStateWaiting , ContainerStateStarting , ContainerStateImagePullError :
775+ podHasPending = true
735776 hasPending = true
777+ if podState != ContainerStateCrashing && podState != ContainerStateTerminating {
778+ podState = instance .State
779+ }
736780 case ContainerStateTerminated :
737781 if instance .IsCrashing {
782+ podHasCrashing = true
738783 hasCrashing = true
784+ podState = ContainerStateCrashing
739785 }
740786 }
741787 }
788+
789+ // Determine final pod state - if all main containers are ready and running, pod is running
790+ if ! podHasCrashing && ! podHasTerminating && ! podHasPending && podReadyContainers == podTotalMainContainers && podTotalMainContainers > 0 {
791+ podState = ContainerStateRunning
792+ readyPodCount ++
793+ }
794+
795+ // Create a single SimpleInstanceStatus representing the entire pod
796+ podInstanceStatus := SimpleInstanceStatus {
797+ KubernetesName : podStatus .KubernetesName , // Use pod name instead of container name
798+ Status : podState ,
799+ RestartCount : maxRestartCount , // Use highest restart count from all containers
800+ PodCreatedAt : podStatus .CreatedAt ,
801+ Events : podEvents , // Combine events from all containers
802+ }
803+
804+ allInstances = append (allInstances , podInstanceStatus )
742805 }
743806
744807 // Determine health status based on priority:
745808 // 1. Crashing takes precedence over everything (indicates real problems)
746809 // 2. Terminating comes next (planned shutdown/scaling)
747- // 3. Pending if any containers are not ready or we don't have enough instances
748- // 4. Active only if all expected instances are ready and running
810+ // 3. Pending if any containers are not ready or we don't have enough pod replicas
811+ // 4. Active only if all expected pod replicas are ready and running
749812 var health InstanceHealth
750813 switch {
751814 case hasCrashing :
752815 health = InstanceHealthCrashing
753816 case hasTerminating :
754817 health = InstanceHealthTerminating
755- case hasPending || readyCount < expectedInstances :
818+ case hasPending || readyPodCount < expectedInstances :
756819 health = InstanceHealthPending
757820 default :
758821 health = InstanceHealthActive
0 commit comments