From 9ac03b8d6f78bd4f6b06dbf9bc2c1dffad4d0184 Mon Sep 17 00:00:00 2001 From: Cyril Galibern Date: Thu, 15 Jan 2026 18:51:06 +0100 Subject: [PATCH] Fix sd-watchdog ping logic to ensure earlier ping once re-enabled With this commit when pingEnabled is changed, we force a new sdNotify to ensure a new starting point. Previous issue example with sd watchdog 20s and pubsub timeout 11s Jan 15 11:04:47.534780 node1 om[421482]: daemon: main: sd-watchdog: disable on missing pubsub heathcheck Jan 15 11:04:59.939026 node1 om[421482]: daemon: main: sd-watchdog: enable on pubsub heathcheck // a ping should have been sent here Jan 15 11:05:02.689806 node1 systemd[1]: opensvc-server.service: Watchdog timeout (limit 20s)! --- daemon/daemon/main.go | 52 +++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/daemon/daemon/main.go b/daemon/daemon/main.go index 0994c79c0..c205050ce 100644 --- a/daemon/daemon/main.go +++ b/daemon/daemon/main.go @@ -427,6 +427,16 @@ func (t *T) sdWatchDog(ctx context.Context, interval, pubsubTimeout time.Duratio }() } + sdNotify := func() { + if ok, err := sd.NotifyWatchdog(); err != nil { + t.log.Warnf("sd-watchdog: %s", err) + } else if !ok { + t.log.Tracef("sd-watchdog: delivery not needed") + } else { + t.log.Debugf("sd-watchdog: delivered") + } + } + sub := t.bus.Sub("sd-watchdog") sub.AddFilter(&msgbus.WatchDog{}, pubsub.Label{"node", hostname.Hostname()}) sub.Start() @@ -437,42 +447,36 @@ func (t *T) sdWatchDog(ctx context.Context, interval, pubsubTimeout time.Duratio } }() }() - ticker := time.NewTicker(interval) - defer ticker.Stop() + pingTicker := time.NewTicker(interval) + defer pingTicker.Stop() - tickerPubsub := time.NewTicker(pubsubTimeout) - defer tickerPubsub.Stop() + pubsubTicker := time.NewTicker(pubsubTimeout) + defer pubsubTicker.Stop() - var disabled bool + pingEnabled := true t.log.Tracef("sd-watchdog: started") defer t.log.Tracef("sd-watchdog: stopped") for { select { case <-ctx.Done(): return - case <-ticker.C: - if disabled { - continue - } - if ok, err := sd.NotifyWatchdog(); err != nil { - t.log.Warnf("sd-watchdog: %s", err) - } else if !ok { - t.log.Tracef("sd-watchdog: delivery not needed") - } else { - t.log.Debugf("sd-watchdog: delivered") + case <-pingTicker.C: + if pingEnabled { + sdNotify() } - case <-tickerPubsub.C: - if disabled { - continue + case <-pubsubTicker.C: + if pingEnabled { + pingEnabled = false + t.log.Infof("sd-watchdog: suspend ping (pubsub is stale)") + sdNotify() } - disabled = true - t.log.Infof("sd-watchdog: disable on missing pubsub heathcheck") case <-sub.C: - if disabled { - t.log.Infof("sd-watchdog: enable on pubsub heathcheck") + if !pingEnabled { + pingEnabled = true + t.log.Infof("sd-watchdog: resume ping (pubsub is alive)") + sdNotify() } - disabled = false - tickerPubsub.Reset(pubsubTimeout) + pubsubTicker.Reset(pubsubTimeout) } } }