From 975a82b3e8f174c2298f0b34cd35c251706ae844 Mon Sep 17 00:00:00 2001 From: Stacey Beard Date: Mon, 17 Nov 2025 17:30:18 -0500 Subject: [PATCH 1/4] fix: create management command to clean up expired IPS bundles --- .../management/commands/expire_ips_bundles.py | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 opal/patients/management/commands/expire_ips_bundles.py diff --git a/opal/patients/management/commands/expire_ips_bundles.py b/opal/patients/management/commands/expire_ips_bundles.py new file mode 100644 index 000000000..bda2815ec --- /dev/null +++ b/opal/patients/management/commands/expire_ips_bundles.py @@ -0,0 +1,128 @@ +# SPDX-FileCopyrightText: Copyright (C) 2025 Opal Health Informatics Group at the Research Institute of the McGill University Health Centre +# +# SPDX-License-Identifier: AGPL-3.0-or-later + +"""Command for cleaning up expired IPS bundles.""" +import datetime +import re +from typing import Any + +from django.conf import settings +from django.core.management.base import BaseCommand + +import structlog +from storages.backends.ftp import FTPStorage + +LOGGER = structlog.get_logger() + +# The number of hours after which IPS bundles will be deleted +# If this value is changed, please also update the instructions in the app (ips-preview-share.html) +IPS_EXPIRY_HOURS = 1 + + +class FTPStoragePlus(FTPStorage): + """Subclass of FTPStorage that can check a file's last modified datetime.""" + + def __init__(self, **settings): + """Default constructor.""" + super().__init__(**settings) + + def _datetime_from_time_string(self, time_string): + # Convert the time representation to ISO format, in UTC + time_string_iso = time_string[:8] + 'T' + time_string[8:] + 'Z' + + return datetime.datetime.fromisoformat(time_string_iso) + + def _get_dir_last_modified_details(self): + # Get metadata from the files in the current directory + lines = [] + self._connection.retrlines('MLSD', lines.append) + entries = {} + + for line in lines: + # Break down each part of the string (for example): ;modify=20251028155020; + attributes = line.split(';') + # The last part of each line is the file name + filename = attributes[-1].strip() + # Break attributes into their component parts (for example): ['modify', '20251028155020'] + attributes = [x.split('=') for x in attributes] + # Keep only the 'modify' value + modify = [x[1] for x in attributes if x[0] == 'modify'] + entries[filename] = modify[0] + + return entries + + def get_modified_time(self, name): + """ + Return the last modified time (as a datetime) of the file specified by name. + + The datetime will be timezone-aware if USE_TZ=True. + + Returns: + The last modified datetime for the given file. + + Raises: + FileNotFoundError: if information about the specified file cannot be found on the server. + """ + self._start_connection() + + entries = self._get_dir_last_modified_details() + date_time = self._datetime_from_time_string(entries[name]) + + if name in entries: + return date_time + raise FileNotFoundError() + + +class Command(BaseCommand): + """Command for deleting IPS bundles after a certain amount of time has elapsed since their creation.""" + + help = 'Delete expired IPS bundles from their storage location.' + + def handle(self, *args: Any, **options: Any) -> None: + """ + Handle deletion of expired IPS bundles. + + Args: + args: non-keyword input arguments. + options: additional keyword input arguments. + """ + num_deleted = 0 + num_errors = 0 + + if settings.IPS_STORAGE_BACKEND != 'storages.backends.ftp.FTPStorage': + raise NotImplementedError(f'The expire_ips_bundles command currently only supports storages.backends.ftp.FTPStorage (see IPS_STORAGE_BACKEND); current value: {settings.IPS_STORAGE_BACKEND}') + + storage_backend = FTPStoragePlus() + + file_list = storage_backend.listdir('../bundles')[1] + file_list = [name for name in file_list if re.match(r'^.+\.ips$', name)] + + LOGGER.info( + f'Checking {len(file_list)} {'file' if len(file_list) == 1 else 'files'} to clean up expired IPS bundles (from storage backend: {settings.IPS_STORAGE_BACKEND})', + ) + + for file_name in file_list: + + # Calculate the bundle's validity based on the time since it was last modified + # Note that last modified is used instead of creation time (not available); it offers the same result, since bundle files aren't updated + last_modified = storage_backend.get_modified_time(file_name) + now = datetime.datetime.now(datetime.UTC) + delta = now - last_modified + valid = delta < datetime.timedelta(hours=IPS_EXPIRY_HOURS) + + LOGGER.debug( + f'{"KEEP" if valid else "DELETE"} - Bundle "{file_name}" last modified {delta} ago ({last_modified} UTC)', + ) + + if not valid: + try: + storage_backend.delete(file_name) + num_deleted += 1 + except: + LOGGER.exception(f'Failed to delete IPS bundle "{file_name}"') + num_errors += 1 + + LOGGER.info( + f'{num_deleted} IPS {'bundle' if num_deleted == 1 else 'bundles'} out of {len(file_list)} deleted ({num_errors} {'error' if num_errors == 1 else 'errors'})', + ) From 4b052c666c88eaf31422121126cd8e35ad37b98c Mon Sep 17 00:00:00 2001 From: Stacey Beard Date: Mon, 17 Nov 2025 17:32:41 -0500 Subject: [PATCH 2/4] chore: format using ruff --- .../patients/management/commands/expire_ips_bundles.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/opal/patients/management/commands/expire_ips_bundles.py b/opal/patients/management/commands/expire_ips_bundles.py index bda2815ec..f82fbc5ea 100644 --- a/opal/patients/management/commands/expire_ips_bundles.py +++ b/opal/patients/management/commands/expire_ips_bundles.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: AGPL-3.0-or-later """Command for cleaning up expired IPS bundles.""" + import datetime import re from typing import Any @@ -91,7 +92,9 @@ def handle(self, *args: Any, **options: Any) -> None: num_errors = 0 if settings.IPS_STORAGE_BACKEND != 'storages.backends.ftp.FTPStorage': - raise NotImplementedError(f'The expire_ips_bundles command currently only supports storages.backends.ftp.FTPStorage (see IPS_STORAGE_BACKEND); current value: {settings.IPS_STORAGE_BACKEND}') + raise NotImplementedError( + f'The expire_ips_bundles command currently only supports storages.backends.ftp.FTPStorage (see IPS_STORAGE_BACKEND); current value: {settings.IPS_STORAGE_BACKEND}' + ) storage_backend = FTPStoragePlus() @@ -99,11 +102,10 @@ def handle(self, *args: Any, **options: Any) -> None: file_list = [name for name in file_list if re.match(r'^.+\.ips$', name)] LOGGER.info( - f'Checking {len(file_list)} {'file' if len(file_list) == 1 else 'files'} to clean up expired IPS bundles (from storage backend: {settings.IPS_STORAGE_BACKEND})', + f'Checking {len(file_list)} {"file" if len(file_list) == 1 else "files"} to clean up expired IPS bundles (from storage backend: {settings.IPS_STORAGE_BACKEND})', ) for file_name in file_list: - # Calculate the bundle's validity based on the time since it was last modified # Note that last modified is used instead of creation time (not available); it offers the same result, since bundle files aren't updated last_modified = storage_backend.get_modified_time(file_name) @@ -124,5 +126,5 @@ def handle(self, *args: Any, **options: Any) -> None: num_errors += 1 LOGGER.info( - f'{num_deleted} IPS {'bundle' if num_deleted == 1 else 'bundles'} out of {len(file_list)} deleted ({num_errors} {'error' if num_errors == 1 else 'errors'})', + f'{num_deleted} IPS {"bundle" if num_deleted == 1 else "bundles"} out of {len(file_list)} deleted ({num_errors} {"error" if num_errors == 1 else "errors"})', ) From d27789b9c8c63980b6cf7dfb8bda7f0b1df773b3 Mon Sep 17 00:00:00 2001 From: Stacey Beard Date: Mon, 24 Nov 2025 16:50:56 -0500 Subject: [PATCH 3/4] chore: add comment to contextualize the choice of one-hour expiry --- opal/patients/management/commands/expire_ips_bundles.py | 1 + 1 file changed, 1 insertion(+) diff --git a/opal/patients/management/commands/expire_ips_bundles.py b/opal/patients/management/commands/expire_ips_bundles.py index f82fbc5ea..5e157aefc 100644 --- a/opal/patients/management/commands/expire_ips_bundles.py +++ b/opal/patients/management/commands/expire_ips_bundles.py @@ -18,6 +18,7 @@ # The number of hours after which IPS bundles will be deleted # If this value is changed, please also update the instructions in the app (ips-preview-share.html) +# The value of 1 hour was chosen as the easiest way to comply with the SHL specification: https://docs.smarthealthit.org/smart-health-links/spec/#fileslocation-links IPS_EXPIRY_HOURS = 1 From bf3a4711ab6d7a28d5cd1d61c6079f93e613ec8d Mon Sep 17 00:00:00 2001 From: Stacey Beard Date: Mon, 24 Nov 2025 17:08:33 -0500 Subject: [PATCH 4/4] chore: prepare for PR --- opal/patients/management/commands/expire_ips_bundles.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/opal/patients/management/commands/expire_ips_bundles.py b/opal/patients/management/commands/expire_ips_bundles.py index 5e157aefc..d6479b7b2 100644 --- a/opal/patients/management/commands/expire_ips_bundles.py +++ b/opal/patients/management/commands/expire_ips_bundles.py @@ -35,6 +35,7 @@ def _datetime_from_time_string(self, time_string): return datetime.datetime.fromisoformat(time_string_iso) + # Function modeled on `_get_dir_details` of the FTPStorage class def _get_dir_last_modified_details(self): # Get metadata from the files in the current directory lines = [] @@ -54,12 +55,11 @@ def _get_dir_last_modified_details(self): return entries + # Function modeled on `` of the ??? class def get_modified_time(self, name): """ Return the last modified time (as a datetime) of the file specified by name. - The datetime will be timezone-aware if USE_TZ=True. - Returns: The last modified datetime for the given file. @@ -69,10 +69,9 @@ def get_modified_time(self, name): self._start_connection() entries = self._get_dir_last_modified_details() - date_time = self._datetime_from_time_string(entries[name]) if name in entries: - return date_time + return self._datetime_from_time_string(entries[name]) raise FileNotFoundError()