From 373ef731feaa851059e8f5f47eb714aa3596f0e4 Mon Sep 17 00:00:00 2001 From: Jeny Sadadia Date: Thu, 31 Jul 2025 10:58:09 +0530 Subject: [PATCH] Fix re-subscription logic in pipeline services When API helper functions `receive_event_node` or `receive_event_data` fails, there may be some issue with extracting node data from the event rather than an issue with the subscription. Check exception message before resubscribing. Fixes: f04f6d0 ("(events): Add re-subscribe mechanism") Signed-off-by: Jeny Sadadia --- src/monitor.py | 16 +++++++++------- src/scheduler.py | 16 +++++++++------- src/send_kcidb.py | 16 +++++++++------- src/tarball.py | 19 ++++++++++--------- 4 files changed, 37 insertions(+), 30 deletions(-) diff --git a/src/monitor.py b/src/monitor.py index c492bc630..52bf8b5bc 100755 --- a/src/monitor.py +++ b/src/monitor.py @@ -59,13 +59,15 @@ def _run(self, sub_id): try: event = self._api.receive_event(sub_id) except Exception as e: - self.log.error(f"Error receiving event: {e}, re-subscribing in 10 seconds") - time.sleep(10) - sub_id = self._api.subscribe('node') - subscribe_retries += 1 - if subscribe_retries > 3: - self.log.error("Failed to re-subscribe to node events") - return False + self.log.error(f"Error receiving event: {e}") + if "404 Client Error" in str(e): + self.log.error(f"Error receiving event: {e}. Re-subscribing...") + sub_id = self._setup(None) + subscribe_retries += 1 + if subscribe_retries > 3: + self.log.error("Failed to re-subscribe to node events") + return False + continue continue subscribe_retries = 0 obj = event.data diff --git a/src/scheduler.py b/src/scheduler.py index 099b45b1d..a53b83381 100755 --- a/src/scheduler.py +++ b/src/scheduler.py @@ -382,13 +382,15 @@ def _run(self, sub_id): try: event = self._api_helper.receive_event_data(sub_id, block=False) except Exception as e: - self.log.error(f"Error receiving event: {e}, re-subscribing in 10 seconds") - time.sleep(10) - sub_id = self._api.subscribe('node') - subscribe_retries += 1 - if subscribe_retries > 3: - self.log.error("Failed to re-subscribe to node events") - return False + self.log.error(f"Error receiving event: {e}") + if "404 Client Error" in str(e): + self.log.error(f"Error receiving event: {e}. Re-subscribing...") + sub_id = self._setup(None) + subscribe_retries += 1 + if subscribe_retries > 3: + self.log.error("Failed to re-subscribe to node events") + return False + continue continue if not event: # If we received a keep-alive event, just continue diff --git a/src/send_kcidb.py b/src/send_kcidb.py index a58839707..b242a99d3 100755 --- a/src/send_kcidb.py +++ b/src/send_kcidb.py @@ -702,13 +702,15 @@ def _run(self, context): try: node, is_hierarchy = self._api_helper.receive_event_node(context['sub_id']) except Exception as e: - self.log.error(f"Error receiving event: {e}, re-subscribing in 10 seconds") - time.sleep(10) - context['sub_id'] = self._api_helper.subscribe_filters(self._filters, promiscuous=True) - subscribe_retries += 1 - if subscribe_retries > 3: - self.log.error("Failed to re-subscribe to node events") - return False + self.log.error(f"Error receiving event: {e}") + if "404 Client Error" in str(e): + self.log.error(f"Error receiving event: {e}. Re-subscribing...") + context['sub_id'] = self._api_helper.subscribe_filters(self._filters, promiscuous=True) + subscribe_retries += 1 + if subscribe_retries > 3: + self.log.error("Failed to re-subscribe to node events") + return False + continue continue subscribe_retries = 0 self.log.info(f"Processing event node: {node['id']}") diff --git a/src/tarball.py b/src/tarball.py index 8edd6fd34..d03751e18 100755 --- a/src/tarball.py +++ b/src/tarball.py @@ -215,17 +215,18 @@ def _run(self, sub_id): try: checkout_node, _ = self._api_helper.receive_event_node(sub_id) except Exception as e: - self.log.error(f"Error receiving event: {e}, re-subscribing in 10 seconds") - time.sleep(10) - # try to resubscribe - sub_id = self._api_helper.subscribe_filters(self._filters) - subscribe_retries += 1 - if subscribe_retries > 3: - self.log.error("Failed to re-subscribe to checkout events") - return False + self.log.error(f"Error receiving event: {e}") + if "404 Client Error" in str(e): + self.log.error(f"Error receiving event: {e}. Re-subscribing...") + sub_id = self._setup(None) + subscribe_retries += 1 + if subscribe_retries > 3: + self.log.error("Failed to re-subscribe to checkout events") + return False + continue continue - subscribe_retries = 0 + subscribe_retries = 0 build_config = self._find_build_config(checkout_node) if build_config is None: continue