From 46a94d170708655bf24667fb392a08a0be532cbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Such=C3=A1nek?= Date: Thu, 15 Jan 2026 11:38:42 +0100 Subject: [PATCH] fix(docworker): Fix handling deleted document --- .../dsw/document_worker/exceptions.py | 13 +++++++++- .../dsw/document_worker/worker.py | 26 ++++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/packages/dsw-document-worker/dsw/document_worker/exceptions.py b/packages/dsw-document-worker/dsw/document_worker/exceptions.py index 3b7b56b2..1b5095c9 100644 --- a/packages/dsw-document-worker/dsw/document_worker/exceptions.py +++ b/packages/dsw-document-worker/dsw/document_worker/exceptions.py @@ -23,7 +23,18 @@ def db_message(self): f'{str(self.exc)}' -def create_job_exception(job_id: str, message: str, exc=None): +class DocumentNotFoundException(JobException): + pass + + +def create_job_exception(job_id: str, message: str, document_found=True, exc=None): + if not document_found: + return DocumentNotFoundException( + job_id=job_id, + msg=message, + exc=exc, + ) + if isinstance(exc, JobException): return exc diff --git a/packages/dsw-document-worker/dsw/document_worker/worker.py b/packages/dsw-document-worker/dsw/document_worker/worker.py index 95adb253..efa8cc8e 100644 --- a/packages/dsw-document-worker/dsw/document_worker/worker.py +++ b/packages/dsw-document-worker/dsw/document_worker/worker.py @@ -19,7 +19,8 @@ CMD_COMPONENT, CMD_CHANNEL, PROG_NAME from .context import Context from .documents import DocumentFile, DocumentNameGiver -from .exceptions import create_job_exception, JobException +from .exceptions import create_job_exception, JobException, \ + DocumentNotFoundException from .limits import LimitsEnforcer from .templates import TemplateRegistry, Template, Format from .utils import byte_size_format, check_metamodel_version @@ -84,6 +85,21 @@ def safe_format(self) -> Format: raise RuntimeError('Format is not set but it should') return self.format + def check_document_exists(self) -> bool: + LOG.debug('Checking if document "%s" exists in DB', self.doc_uuid) + try: + doc = self.ctx.app.db.fetch_document( + document_uuid=self.doc_uuid, + tenant_uuid=self.tenant_uuid, + ) + exists = doc is not None + LOG.debug('Document "%s" exists: %s', self.doc_uuid, exists) + return exists + except Exception as e: + LOG.error('Failed to check if document "%s" exists: %s', + self.doc_uuid, str(e)) + return False + @handle_job_step('Failed to get document from DB') def get_document(self): SentryReporter.set_tags(phase='fetch') @@ -102,6 +118,7 @@ def get_document(self): raise create_job_exception( job_id=self.doc_uuid, message='Document record not found in database', + document_found=False, ) self.doc.retrieved_at = datetime.datetime.now(tz=datetime.UTC) LOG.info('Job "%s" details received', self.doc_uuid) @@ -312,6 +329,11 @@ def _run(self): self.finalize() def _set_failed(self, message: str): + document_exists = self.check_document_exists() + if not document_exists: + LOG.warning('Document %s does not exist, cannot set to FAILED', + self.doc_uuid) + return if self.try_set_job_state(DocumentState.FAILED, message): LOG.info('Set state to FAILED') else: @@ -323,6 +345,8 @@ def _set_failed(self, message: str): def run(self): try: self._run() + except DocumentNotFoundException as e: + LOG.warning('Document not found: %s', e.log_message()) except JobException as e: LOG.warning('Handled job error: %s', e.log_message()) SentryReporter.capture_exception(e)