From 88ced1e3ad64f4f0e67dbe3cd83811840f14b858 Mon Sep 17 00:00:00 2001 From: Cecilia Stevens <63068179+ceciliastevens@users.noreply.github.com> Date: Mon, 5 May 2025 16:04:14 -0400 Subject: [PATCH 1/4] download files from EDS instead of PDS --- CHANGELOG.md | 6 + src/py42/clients/securitydata.py | 69 +-- src/py42/sdk/__init__.py | 2 +- src/py42/services/preservationdata.py | 18 +- src/py42/services/storage/exfiltrateddata.py | 39 +- tests/clients/test_securitydata.py | 472 +----------------- .../services/storage/test_exfiltrateddata.py | 20 +- tests/services/test_pds.py | 16 +- 8 files changed, 51 insertions(+), 591 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 832b3a084..141b2c826 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 The intended audience of this file is for py42 consumers -- as such, changes that don't affect how a consumer would use the library (e.g. adding unit tests, updating documentation, etc) are not captured here. +## Unreleased + +### Fixed + +- An issue where file download methods did not work in some conditions. + ## 1.28.0 - 2025-03-21 ### Deprecated diff --git a/src/py42/clients/securitydata.py b/src/py42/clients/securitydata.py index 4c98fc03d..5507f7c92 100644 --- a/src/py42/clients/securitydata.py +++ b/src/py42/clients/securitydata.py @@ -1,3 +1,4 @@ +import re from warnings import warn from py42.exceptions import Py42ChecksumNotFoundError @@ -136,76 +137,22 @@ def _stream_file(self, checksum, version_info): raise Py42Error(f"No file with hash {checksum} available for download.") def _get_file_version_for_stream(self, device_guid, md5_hash, sha256_hash, path): - version = self._get_device_file_version( - device_guid, md5_hash, sha256_hash, path - ) - if not version: - version = self._get_other_file_location_version(md5_hash, sha256_hash) - return version - - def _get_device_file_version(self, device_guid, md5_hash, sha256_hash, path): response = self._preservation_data_service.get_file_version_list( device_guid, md5_hash, sha256_hash, path ) - versions = ( - response.data.get("securityEventVersionsMatchingChecksum") - or response.data.get("securityEventVersionsAtPath") - or response.data.get("preservationVersions") - ) - - if versions: - if not response.data.get("securityEventVersionsAtPath"): - exact_match = _get_first_matching_version(versions, md5_hash) - if exact_match: - return exact_match - - most_recent = sorted( - versions, key=lambda i: i["versionTimestamp"], reverse=True - ) - return most_recent[0] - - def _get_other_file_location_version(self, md5_hash, sha256_hash): - response = self._file_event_service.get_file_location_detail_by_sha256( - sha256_hash - ) - locations = response["locations"] - if locations: - paths = _parse_file_location_response(locations) - version = self._preservation_data_service.find_file_version( - md5_hash, sha256_hash, paths - ) - if version.status_code == 200: - return version.data + return response.data.get("match") def _get_file_stream(self, version): - if version.get("edsUrl"): + if version.get("downloadTokenRequest"): return self._get_exfiltrated_file(version) - return self._get_stored_file(version) + raise Py42Error(f"Unable to download file from version {version}") def _get_exfiltrated_file(self, version): - eds = self._storage_service_factory.create_exfiltrated_data_service( - version["edsUrl"] - ) - token = eds.get_download_token( - version["eventId"], - version["deviceUid"], - version["filePath"], - version["fileSHA256"], - version["versionTimestamp"], - ) - return eds.get_file(str(token)) - - def _get_stored_file(self, version): - pds = self._storage_service_factory.create_preservation_data_service( - version["storageNodeURL"] - ) - token = pds.get_download_token( - version["archiveGuid"], - version["fileId"], - version["versionTimestamp"], - ) - return pds.get_file(str(token)) + downloadToken = version.get("downloadTokenRequest") + edsUrl = re.match(r"(https?://[^/]+)((/.*)|$)", downloadToken).group(1) + eds = self._storage_service_factory.create_exfiltrated_data_service(edsUrl) + return eds.get_file(downloadToken) def _parse_file_location_response(locations): diff --git a/src/py42/sdk/__init__.py b/src/py42/sdk/__init__.py index 9f306c347..fee9b596f 100644 --- a/src/py42/sdk/__init__.py +++ b/src/py42/sdk/__init__.py @@ -333,7 +333,7 @@ def _init_services(main_connection, main_auth, auth_flag=None): alert_rules_key = "FedObserver-API_URL" alerts_key = "AlertService-API_URL" file_events_key = "FORENSIC_SEARCH-API_URL" - preservation_data_key = "PRESERVATION-DATA-SERVICE_API-URL" + preservation_data_key = "EXFILTRATED-DATA-SERVICE_API-URL" kv_prefix = "simple-key-value-store" audit_logs_key = "AUDIT-LOG_API-URL" cases_key = "CASES_API-URL" diff --git a/src/py42/services/preservationdata.py b/src/py42/services/preservationdata.py index 6b3e620fb..464588a96 100644 --- a/src/py42/services/preservationdata.py +++ b/src/py42/services/preservationdata.py @@ -4,24 +4,8 @@ class PreservationDataService(BaseService): - def find_file_version(self, file_md5, file_sha256, paths): - """Fetch file version details. - - Args: - file_md5 (str): MD5 encoded hash of the file. - file_sha256 (str): SHA256 encoded hash of the file. - paths (str): File path with filename to fetch. - - Returns: - :class:`py42.response.Py42Response` - """ - - data = {"fileSHA256": file_sha256, "fileMD5": file_md5, "devicePaths": paths} - uri = "/api/v1/FindAvailableVersion" - return self._connection.post(uri, json=data) - def get_file_version_list(self, device_id, file_md5, file_sha256, path): params = "fileSHA256={}&fileMD5={}&deviceUid={}&filePath={}" params = params.format(file_sha256, file_md5, device_id, quote(path)) - uri = f"/api/v2/file-version-listing?{params}" + uri = f"/api/v3/search-file?{params}" return self._connection.get(uri) diff --git a/src/py42/services/storage/exfiltrateddata.py b/src/py42/services/storage/exfiltrateddata.py index 31b8f8324..f6f488fe0 100644 --- a/src/py42/services/storage/exfiltrateddata.py +++ b/src/py42/services/storage/exfiltrateddata.py @@ -1,5 +1,3 @@ -from urllib.parse import quote - from py42.services import BaseService @@ -7,34 +5,11 @@ class ExfiltratedDataService(BaseService): _base_uri = "api/v1/" - def __init__(self, main_session, streaming_session): - super().__init__(main_session) + def __init__(self, main_connection, streaming_session): + super().__init__(main_connection) self._streaming_session = streaming_session - def get_download_token( - self, event_id, device_id, file_path, file_sha256, timestamp - ): - """Get EDS download token for a file. - - Args: - event_id (str): Id of the file event that references the file desired for download. - device_id (str): Id of the device on which the file desired for download is stored. - file_path (str): Path where the file desired for download resides on the device. - timestamp (int): Last updated timestamp of the file in milliseconds. - - Returns: - :class:`py42.response.Py42Response`: A response containing download token for the file. - """ - params = "deviceUid={}&eventId={}&filePath={}&fileSHA256={}&versionTimestamp={}" - params = params.format( - device_id, event_id, quote(file_path), file_sha256, timestamp - ) - resource = "file-download-token" - headers = {"Accept": "*/*"} - uri = f"{self._base_uri}{resource}?{params}" - return self._connection.get(uri, headers=headers) - - def get_file(self, token): + def get_file(self, downloadUrl): """Streams a file. Args: @@ -43,10 +18,6 @@ def get_file(self, token): Returns: Returns a stream of the file indicated by the input token. """ - resource = "get-file" - uri = f"{self._connection.host_address}/{self._base_uri}{resource}" - params = {"token": token} + uri = f"{downloadUrl}" headers = {"Accept": "*/*"} - return self._streaming_session.get( - uri, params=params, headers=headers, stream=True - ) + return self._streaming_session.get(uri, headers=headers, stream=True) diff --git a/tests/clients/test_securitydata.py b/tests/clients/test_securitydata.py index 6dc503f93..d85bffd98 100644 --- a/tests/clients/test_securitydata.py +++ b/tests/clients/test_securitydata.py @@ -12,7 +12,6 @@ from py42.services.savedsearch import SavedSearchService from py42.services.storage._service_factory import StorageServiceFactory from py42.services.storage.exfiltrateddata import ExfiltratedDataService -from py42.services.storage.preservationdata import StoragePreservationDataService FILE_EVENT_URI = "/forensic-search/queryservice/api/v1/fileevent" RAW_QUERY = "RAW JSON QUERY" @@ -44,114 +43,20 @@ ] }""" -PDS_FILE_VERSIONS = """{ - "preservationVersions": [ - { - "storageNodeURL": "https://host-1.example.com", - "archiveGuid": "archiveid-1", - "fileId": "fileid-1", - "fileMD5": "testmd5-1", - "fileSHA256": "testsha256-1", - "versionTimestamp": 12345 - }, - { - "storageNodeURL": "https://host-2.example.com", - "archiveGuid": "archiveid-2", - "fileId": "fileid-2", - "fileMD5": "testmd5-2", - "fileSHA256": "testsha256-2", - "versionTimestamp": 12344 - }, - { - "storageNodeURL": "https://host-3.example.com", - "archiveGuid": "archiveid-3", - "fileId": "fileid-3", - "fileMD5": "testmd5-3", - "fileSHA256": "testsha256-3", - "versionTimestamp": 12346 - } - ], - "securityEventVersionsMatchingChecksum": [], - "securityEventVersionsAtPath": [] -}""" -XFC_EXACT_FILE_VERSIONS = """{ - "preservationVersions": [], - "securityEventVersionsMatchingChecksum": [ - { - "edsUrl": "https://host-1.example.com", - "deviceUid": "deviceuid-1", - "eventId": "eventid-1", - "fileMD5": "testmd5-1", - "fileSHA256": "testsha256-1", - "filePath": "/test/file/path-1/", - "versionTimestamp": 12345 - }, - { - "edsUrl": "https://host-2.example.com", - "deviceUid": "deviceuid-2", - "eventId": "eventid-2", - "fileMD5": "testmd5-2", - "fileSHA256": "testsha256-2", - "filePath": "/test/file/path-2/", - "versionTimestamp": 12344 - }, - { - "edsUrl": "https://host-3.example.com", - "deviceUid": "deviceuid-3", - "eventId": "eventid-3", - "fileMD5": "testmd5-3", - "fileSHA256": "testsha256-3", - "filePath": "/test/file/path-3/", - "versionTimestamp": 12346 - } - ], - "securityEventVersionsAtPath": [] +XFC_EXACT_FILE_VERSION_RESPONSE = """{ + "match": { + "downloadTokenRequest": "https://test-url/test-path?token=test-token" + } }""" -XFC_MATCHED_FILE_VERSIONS = """{ - "preservationVersions": [], - "securityEventVersionsMatchingChecksum": [], - "securityEventVersionsAtPath": [ - { - "edsUrl": "https://host-1.example.com", - "deviceUid": "deviceuid-1", - "eventId": "eventid-1", - "fileMD5": "testmd5-1", - "fileSHA256": "testsha256-1", - "filePath": "/test/file/path-1/", - "versionTimestamp": 12345 - }, - { - "edsUrl": "https://host-2.example.com", - "deviceUid": "deviceuid-2", - "eventId": "eventid-2", - "fileMD5": "testmd5-2", - "fileSHA256": "testsha256-2", - "filePath": "/test/file/path-2/", - "versionTimestamp": 12344 - }, - { - "edsUrl": "https://host-3.example.com", - "deviceUid": "deviceuid-3", - "eventId": "eventid-3", - "fileMD5": "testmd5-3", - "fileSHA256": "testsha256-3", - "filePath": "/test/file/path-3/", - "versionTimestamp": 12346 - } +XFC_NOT_FOUND_RESPONSE = """{ + "match": null, + "failureReports": [ + "failure-one" ] }""" -AVAILABLE_VERSION_RESPONSE = """{ - "storageNodeURL": "https://host.com", - "archiveGuid": "archiveid-3", - "fileId": "fileid-3", - "fileMD5": "testmd5-3", - "fileSHA256": "testsha256-3", - "versionTimestamp": 12346 -}""" - class TestSecurityClient: @pytest.fixture @@ -221,7 +126,11 @@ def file_location(self, mocker): @pytest.fixture def file_version_list(self, mocker): - return create_mock_response(mocker, PDS_FILE_VERSIONS) + return create_mock_response(mocker, XFC_EXACT_FILE_VERSION_RESPONSE) + + @pytest.fixture + def empty_file_versions(self, mocker): + return create_mock_response(mocker, XFC_NOT_FOUND_RESPONSE) @pytest.fixture def pds_config( @@ -233,24 +142,16 @@ def pds_config( saved_search_service, ): mock = mocker.MagicMock() - file_download = create_mock_response(mocker, "PDSDownloadToken=token") file_event_service.search.return_value = create_mock_response( mocker, FILE_EVENTS_RESPONSE_V2 ) preservation_data_service.get_file_version_list.return_value = ( - create_mock_response(mocker, PDS_FILE_VERSIONS) + create_mock_response(mocker, XFC_EXACT_FILE_VERSION_RESPONSE) ) file_event_service.get_file_location_detail_by_sha256.return_value = ( create_mock_response(mocker, FILE_LOCATION_RESPONSE) ) - storage_node_client = mocker.MagicMock(spec=StoragePreservationDataService) - storage_node_client.get_download_token.return_value = file_download - storage_node_client.get_file.return_value = b"stream" - storage_service_factory.create_preservation_data_service.return_value = ( - storage_node_client - ) exfiltration_client = mocker.MagicMock(spec=ExfiltratedDataService) - exfiltration_client.get_download_token.return_value = file_download exfiltration_client.get_file.return_value = b"stream" storage_service_factory.create_exfiltrated_data_service.return_value = ( exfiltration_client @@ -260,7 +161,6 @@ def pds_config( mock.file_event_service = file_event_service mock.preservation_data_service = preservation_data_service mock.saved_search_service = saved_search_service - mock.storage_node_client = storage_node_client mock.exfiltration_client = exfiltration_client return mock @@ -274,24 +174,16 @@ def pds_config_v2( saved_search_service, ): mock = mocker.MagicMock() - file_download = create_mock_response(mocker, "PDSDownloadToken=token") file_event_service.search.return_value = create_mock_response( mocker, FILE_EVENTS_RESPONSE_V2 ) preservation_data_service.get_file_version_list.return_value = ( - create_mock_response(mocker, PDS_FILE_VERSIONS) + create_mock_response(mocker, XFC_EXACT_FILE_VERSION_RESPONSE) ) file_event_service.get_file_location_detail_by_sha256.return_value = ( create_mock_response(mocker, FILE_LOCATION_RESPONSE) ) - storage_node_client = mocker.MagicMock(spec=StoragePreservationDataService) - storage_node_client.get_download_token.return_value = file_download - storage_node_client.get_file.return_value = b"stream" - storage_service_factory.create_preservation_data_service.return_value = ( - storage_node_client - ) exfiltration_client = mocker.MagicMock(spec=ExfiltratedDataService) - exfiltration_client.get_download_token.return_value = file_download exfiltration_client.get_file.return_value = b"stream" storage_service_factory.create_exfiltrated_data_service.return_value = ( exfiltration_client @@ -301,7 +193,6 @@ def pds_config_v2( mock.file_event_service = file_event_service mock.preservation_data_service = preservation_data_service mock.saved_search_service = saved_search_service - mock.storage_node_client = storage_node_client mock.exfiltration_client = exfiltration_client return mock @@ -327,58 +218,13 @@ def test_stream_file_by_sha256_with_exact_match_response_calls_get_version_list_ pds_config.preservation_data_service.get_file_version_list.assert_called_once_with( *version_list_params ) - pds_config.storage_service_factory.create_preservation_data_service.assert_called_once_with( - "https://host-2.example.com" + pds_config.storage_service_factory.create_exfiltrated_data_service.assert_called_once_with( + "https://test-url" ) assert ( pds_config.file_event_service.get_file_location_detail_by_sha256.call_count == 0 ) - assert pds_config.preservation_data_service.find_file_version.call_count == 0 - expected_download_token_params = ["archiveid-2", "fileid-2", 12344] - pds_config.storage_node_client.get_download_token.assert_called_once_with( - *expected_download_token_params - ) - assert response == b"stream" - - def test_stream_file_by_sha256_without_exact_match_response_calls_get_version_list_with_expected_params( - self, - mocker, - pds_config, - ): - pds_config.file_event_service.search.return_value = create_mock_response( - mocker, FILE_EVENTS_RESPONSE_V2.replace("-2", "-6") - ) - security_client = SecurityDataClient( - pds_config.file_event_service, - pds_config.preservation_data_service, - pds_config.saved_search_service, - pds_config.storage_service_factory, - ) - - response = security_client.stream_file_by_sha256("testsha256-6") - expected = [ - "testdeviceUid", - "testmd5-6", - "testsha256-6", - "/test/file/path/testfileName", - ] - pds_config.preservation_data_service.get_file_version_list.assert_called_once_with( - *expected - ) - pds_config.storage_service_factory.create_preservation_data_service.assert_called_once_with( - "https://host-3.example.com" - ) - assert ( - pds_config.file_event_service.get_file_location_detail_by_sha256.call_count - == 0 - ) - assert pds_config.preservation_data_service.find_file_version.call_count == 0 - # should get version with most recent versionTimestamp - expected_download_token_params = ["archiveid-3", "fileid-3", 12346] - pds_config.storage_node_client.get_download_token.assert_called_once_with( - *expected_download_token_params - ) assert response == b"stream" def test_stream_file_by_sha256_when_search_returns_empty_response_raises_py42_checksum_not_found_error_( @@ -399,50 +245,12 @@ def test_stream_file_by_sha256_when_search_returns_empty_response_raises_py42_ch assert "No files found with SHA256 checksum" in e.value.args[0] - def test_stream_file_by_sha256_when_file_versions_returns_empty_response_gets_version_from_other_location( - self, - mocker, - pds_config, - ): - available_version = create_mock_response(mocker, AVAILABLE_VERSION_RESPONSE) - file_version_list = create_mock_response(mocker, '{"preservationVersions": []}') - pds_config.preservation_data_service.get_file_version_list.return_value = ( - file_version_list - ) - pds_config.preservation_data_service.find_file_version.return_value = ( - available_version - ) - - security_client = SecurityDataClient( - pds_config.file_event_service, - pds_config.preservation_data_service, - pds_config.saved_search_service, - pds_config.storage_service_factory, - ) - response = security_client.stream_file_by_sha256("shahash") - assert response == b"stream" - pds_config.file_event_service.get_file_location_detail_by_sha256.assert_called_once_with( - "testsha256-2" - ) - expected = ["testmd5-2", "testsha256-2", mocker.ANY] - pds_config.preservation_data_service.find_file_version.assert_called_once_with( - *expected - ) - # should return version returned by find_file_version - expected_expected_download_token_params = ["archiveid-3", "fileid-3", 12346] - pds_config.storage_node_client.get_download_token.assert_called_once_with( - *expected_expected_download_token_params - ) - def test_stream_file_by_sha256_when_get_locations_returns_empty_list_raises_py42_error( - self, - mocker, - pds_config, + self, mocker, pds_config, empty_file_versions ): - file_version_list = create_mock_response(mocker, '{"preservationVersions": []}') file_location = create_mock_response(mocker, '{"locations": []}') pds_config.preservation_data_service.get_file_version_list.return_value = ( - file_version_list + empty_file_versions ) pds_config.file_event_service.get_file_location_detail_by_sha256.return_value = ( file_location @@ -459,34 +267,6 @@ def test_stream_file_by_sha256_when_get_locations_returns_empty_list_raises_py42 assert e.value.args[0] == PDS_EXCEPTION_MESSAGE.format("shahash") - def test_stream_file_by_sha256_when_find_file_version_returns_204_status_code_raises_py42_error( - self, - mocker, - pds_config, - ): - file_version_list = create_mock_response(mocker, '{"preservationVersions": []}') - pds_config.preservation_data_service.get_file_version_list.return_value = ( - file_version_list - ) - available_version = create_mock_response( - mocker, AVAILABLE_VERSION_RESPONSE, 204 - ) - pds_config.preservation_data_service.find_file_version.return_value = ( - available_version - ) - - security_client = SecurityDataClient( - pds_config.file_event_service, - pds_config.preservation_data_service, - pds_config.saved_search_service, - pds_config.storage_service_factory, - ) - - with pytest.raises(Py42Error) as e: - security_client.stream_file_by_sha256("shahash") - - assert e.value.args[0] == PDS_EXCEPTION_MESSAGE.format("shahash") - def test_stream_file_by_md5_with_exact_match_response_calls_get_version_list_with_expected_params( self, pds_config, @@ -508,63 +288,19 @@ def test_stream_file_by_md5_with_exact_match_response_calls_get_version_list_wit pds_config.preservation_data_service.get_file_version_list.assert_called_once_with( *version_list_params ) - pds_config.storage_service_factory.create_preservation_data_service.assert_called_once_with( - "https://host-2.example.com" + pds_config.storage_service_factory.create_exfiltrated_data_service.assert_called_once_with( + "https://test-url" ) assert ( pds_config.file_event_service.get_file_location_detail_by_sha256.call_count == 0 ) - assert pds_config.preservation_data_service.find_file_version.call_count == 0 - expected_download_token_params = ["archiveid-2", "fileid-2", 12344] - pds_config.storage_node_client.get_download_token.assert_called_once_with( - *expected_download_token_params - ) assert response == b"stream" - def test_stream_file_by_md5_without_exact_match_response_calls_get_version_list_with_expected_params( + def test_stream_file_by_md5_when_search_returns_empty_response_raises_py42_checksum_not_found_error_( self, mocker, pds_config, - ): - pds_config.file_event_service.search.return_value = create_mock_response( - mocker, FILE_EVENTS_RESPONSE_V2.replace("-2", "-6") - ) - - security_client = SecurityDataClient( - pds_config.file_event_service, - pds_config.preservation_data_service, - pds_config.saved_search_service, - pds_config.storage_service_factory, - ) - - response = security_client.stream_file_by_md5("testmd5-6") - expected = [ - "testdeviceUid", - "testmd5-6", - "testsha256-6", - "/test/file/path/testfileName", - ] - pds_config.preservation_data_service.get_file_version_list.assert_called_once_with( - *expected - ) - pds_config.storage_service_factory.create_preservation_data_service.assert_called_once_with( - "https://host-3.example.com" - ) - assert ( - pds_config.file_event_service.get_file_location_detail_by_sha256.call_count - == 0 - ) - assert pds_config.preservation_data_service.find_file_version.call_count == 0 - # should get version returned with most recent versionTimestamp - expected_download_token_params = ["archiveid-3", "fileid-3", 12346] - pds_config.storage_node_client.get_download_token.assert_called_once_with( - *expected_download_token_params - ) - assert response == b"stream" - - def test_stream_file_by_md5_when_search_returns_empty_response_raises_py42_checksum_not_found_error_( - self, mocker, pds_config ): pds_config.file_event_service.search.return_value = create_mock_response( mocker, '{"fileEvents": []}' @@ -581,94 +317,12 @@ def test_stream_file_by_md5_when_search_returns_empty_response_raises_py42_check assert "No files found with MD5 checksum" in e.value.args[0] - def test_stream_file_by_md5_when_file_versions_returns_empty_response_gets_version_from_other_location( - self, - mocker, - pds_config, - ): - file_version_list = create_mock_response(mocker, '{"preservationVersions": []}') - pds_config.preservation_data_service.get_file_version_list.return_value = ( - file_version_list - ) - available_version = create_mock_response(mocker, AVAILABLE_VERSION_RESPONSE) - pds_config.preservation_data_service.find_file_version.return_value = ( - available_version - ) - - security_client = SecurityDataClient( - pds_config.file_event_service, - pds_config.preservation_data_service, - pds_config.saved_search_service, - pds_config.storage_service_factory, - ) - response = security_client.stream_file_by_md5("mdhash") - assert response == b"stream" - pds_config.file_event_service.get_file_location_detail_by_sha256.assert_called_once_with( - "testsha256-2" - ) - expected = ["testmd5-2", "testsha256-2", mocker.ANY] - pds_config.preservation_data_service.find_file_version.assert_called_once_with( - *expected - ) - # should return version returned by find_file_version - expected_download_token_params = ["archiveid-3", "fileid-3", 12346] - pds_config.storage_node_client.get_download_token.assert_called_once_with( - *expected_download_token_params - ) - - def test_stream_file_by_md5_when_get_locations_returns_empty_list_raises_py42_error( - self, mocker, pds_config - ): - file_version_list = create_mock_response(mocker, '{"preservationVersions": []}') - file_location = create_mock_response(mocker, '{"locations": []}') - pds_config.preservation_data_service.get_file_version_list.return_value = ( - file_version_list - ) - pds_config.file_event_service.get_file_location_detail_by_sha256.return_value = ( - file_location - ) - security_client = SecurityDataClient( - pds_config.file_event_service, - pds_config.preservation_data_service, - pds_config.saved_search_service, - pds_config.storage_service_factory, - ) - - with pytest.raises(Py42Error) as e: - security_client.stream_file_by_md5("mdhash") - - assert e.value.args[0] == PDS_EXCEPTION_MESSAGE.format("mdhash") - - def test_stream_file_by_md5_when_find_file_version_returns_204_status_code_raises_py42_error( - self, mocker, pds_config - ): - file_version_list = create_mock_response(mocker, '{"preservationVersions": []}') - pds_config.preservation_data_service.get_file_version_list.return_value = ( - file_version_list - ) - available_version = create_mock_response( - mocker, AVAILABLE_VERSION_RESPONSE, 204 - ) - pds_config.preservation_data_service.find_file_version.return_value = ( - available_version - ) - - security_client = SecurityDataClient( - pds_config.file_event_service, - pds_config.preservation_data_service, - pds_config.saved_search_service, - pds_config.storage_service_factory, - ) - - with pytest.raises(Py42Error) as e: - security_client.stream_file_by_md5("mdhash") - - assert e.value.args[0] == PDS_EXCEPTION_MESSAGE.format("mdhash") - def test_stream_file_by_md5_when_has_exact_match_calls_get_token_with_expected_params_and_streams_successfully( self, mocker, pds_config ): - file_version_list = create_mock_response(mocker, XFC_EXACT_FILE_VERSIONS) + file_version_list = create_mock_response( + mocker, XFC_EXACT_FILE_VERSION_RESPONSE + ) pds_config.preservation_data_service.get_file_version_list.return_value = ( file_version_list ) @@ -681,75 +335,13 @@ def test_stream_file_by_md5_when_has_exact_match_calls_get_token_with_expected_p ) response = security_client.stream_file_by_md5("testmd5-2") assert response == b"stream" - expected_download_token_params = [ - "eventid-2", - "deviceuid-2", - "/test/file/path-2/", - "testsha256-2", - 12344, - ] - pds_config.exfiltration_client.get_download_token.assert_called_once_with( - *expected_download_token_params - ) def test_stream_file_by_sha256_when_has_exact_match_calls_get_token_with_expected_params_and_streams_successfully( self, mocker, pds_config ): - file_version_list = create_mock_response(mocker, XFC_EXACT_FILE_VERSIONS) - pds_config.preservation_data_service.get_file_version_list.return_value = ( - file_version_list - ) - - security_client = SecurityDataClient( - pds_config.file_event_service, - pds_config.preservation_data_service, - pds_config.saved_search_service, - pds_config.storage_service_factory, - ) - response = security_client.stream_file_by_sha256("testsha256-2") - assert response == b"stream" - expected_download_token_params = [ - "eventid-2", - "deviceuid-2", - "/test/file/path-2/", - "testsha256-2", - 12344, - ] - pds_config.exfiltration_client.get_download_token.assert_called_once_with( - *expected_download_token_params - ) - - def test_stream_file_by_md5_when_has_path_match_calls_get_token_with_expected_params_and_streams_successfully( - self, mocker, pds_config - ): - file_version_list = create_mock_response(mocker, XFC_MATCHED_FILE_VERSIONS) - pds_config.preservation_data_service.get_file_version_list.return_value = ( - file_version_list - ) - - security_client = SecurityDataClient( - pds_config.file_event_service, - pds_config.preservation_data_service, - pds_config.saved_search_service, - pds_config.storage_service_factory, - ) - response = security_client.stream_file_by_md5("testmd5-2") - assert response == b"stream" - expected_download_token_params = [ - "eventid-3", - "deviceuid-3", - "/test/file/path-3/", - "testsha256-3", - 12346, - ] - pds_config.exfiltration_client.get_download_token.assert_called_once_with( - *expected_download_token_params + file_version_list = create_mock_response( + mocker, XFC_EXACT_FILE_VERSION_RESPONSE ) - - def test_stream_file_by_sha256_when_has_path_match_calls_get_token_with_expected_params_and_streams_successfully( - self, mocker, pds_config - ): - file_version_list = create_mock_response(mocker, XFC_MATCHED_FILE_VERSIONS) pds_config.preservation_data_service.get_file_version_list.return_value = ( file_version_list ) @@ -762,16 +354,6 @@ def test_stream_file_by_sha256_when_has_path_match_calls_get_token_with_expected ) response = security_client.stream_file_by_sha256("testsha256-2") assert response == b"stream" - expected_download_token_params = [ - "eventid-3", - "deviceuid-3", - "/test/file/path-3/", - "testsha256-3", - 12346, - ] - pds_config.exfiltration_client.get_download_token.assert_called_once_with( - *expected_download_token_params - ) def test_search_all_file_events_calls_search_with_expected_params_when_pg_token_is_not_passed( self, diff --git a/tests/services/storage/test_exfiltrateddata.py b/tests/services/storage/test_exfiltrateddata.py index 1f58c268f..f45f3ae6e 100644 --- a/tests/services/storage/test_exfiltrateddata.py +++ b/tests/services/storage/test_exfiltrateddata.py @@ -11,21 +11,6 @@ def mock_request(self, mocker): request.return_value = b"stream" return request - def test_get_download_token_calls_get_with_valid_params( - self, mock_successful_connection - ): - service = ExfiltratedDataService( - mock_successful_connection, mock_successful_connection - ) - service.get_download_token( - "testeventid", "testdeviceid", "testfilepath", "testSHA256", 1223 - ) - qry = "deviceUid=testdeviceid&eventId=testeventid&filePath=testfilepath&fileSHA256=testSHA256&versionTimestamp=1223" - expected = f"api/v1/file-download-token?{qry}" - mock_successful_connection.get.assert_called_once_with( - expected, headers={"Accept": "*/*"} - ) - def test_get_file_calls_get_with_valid_params( self, mock_successful_connection, mock_request ): @@ -33,10 +18,9 @@ def test_get_file_calls_get_with_valid_params( service = ExfiltratedDataService( mock_successful_connection, mock_successful_connection ) - service.get_file("testtoken") + service.get_file("https://example.com/testpath?token=testtoken") mock_successful_connection.get.assert_called_once_with( - "https://example.com/api/v1/get-file", + "https://example.com/testpath?token=testtoken", headers={"Accept": "*/*"}, - params={"token": "testtoken"}, stream=True, ) diff --git a/tests/services/test_pds.py b/tests/services/test_pds.py index fd5824818..eb3dcad46 100644 --- a/tests/services/test_pds.py +++ b/tests/services/test_pds.py @@ -9,25 +9,11 @@ def mock_connection(self, mock_connection, successful_response): mock_connection.post.return_value = successful_response return mock_connection - def test_find_file_version_posts_expected_data(self, mock_connection): - pds = PreservationDataService(mock_connection) - pds.find_file_version("abc", "adfadf", ["/path/path", "/path/path2"]) - - assert mock_connection.post.call_count == 1 - posted_data = mock_connection.post.call_args[1]["json"] - assert mock_connection.post.call_args[0][0] == "/api/v1/FindAvailableVersion" - assert ( - posted_data["fileSHA256"] == "adfadf" - and posted_data["fileMD5"] == "abc" - and posted_data["devicePaths"][0] == "/path/path" - and posted_data["devicePaths"][1] == "/path/path2" - ) - def test_get_file_version_list_uses_expected_url(self, mock_connection): pds = PreservationDataService(mock_connection) pds.get_file_version_list("testguid", "testmd5", "testsha256", "/t/1 X") qry = ( "fileSHA256=testsha256&fileMD5=testmd5&deviceUid=testguid&filePath=/t/1%20X" ) - expected = f"/api/v2/file-version-listing?{qry}" + expected = f"/api/v3/search-file?{qry}" mock_connection.get.assert_called_once_with(expected) From 613c4def300568885f2cafd32d6f5e96fb92b5f6 Mon Sep 17 00:00:00 2001 From: Cecilia Stevens <63068179+ceciliastevens@users.noreply.github.com> Date: Mon, 5 May 2025 16:51:11 -0400 Subject: [PATCH 2/4] remove integration test --- tests/integration/test_securitydata.py | 50 -------------------------- 1 file changed, 50 deletions(-) delete mode 100644 tests/integration/test_securitydata.py diff --git a/tests/integration/test_securitydata.py b/tests/integration/test_securitydata.py deleted file mode 100644 index cad4cc54c..000000000 --- a/tests/integration/test_securitydata.py +++ /dev/null @@ -1,50 +0,0 @@ -from datetime import datetime -from datetime import timedelta - -import pytest -from tests.integration.conftest import assert_successful_response - -from py42.sdk.queries.fileevents.file_event_query import FileEventQuery -from py42.sdk.queries.fileevents.filters import EventTimestamp -from py42.util import convert_datetime_to_epoch - - -@pytest.fixture(scope="module") -def md5_hash(request): - return request.config.getini("md5_hash") - - -@pytest.fixture(scope="module") -def sha256_hash(request): - return request.config.getini("sha256_hash") - - -@pytest.fixture(scope="module") -def user_uid(request): - return request.config.getini("user_uid") - - -@pytest.fixture -def file_data(request): - return request.config.getini("file_data") - - -@pytest.mark.integration -class TestSecurityData: - def test_search_file_events(self, connection): - start_date = datetime.utcnow() - timedelta(1) - end_date = datetime.utcnow() - start_timestamp = convert_datetime_to_epoch(start_date) - end_timestamp = convert_datetime_to_epoch(end_date) - date_query = EventTimestamp.in_range(start_timestamp, end_timestamp) - query = FileEventQuery.all(date_query) - response = connection.securitydata.search_file_events(query) - assert_successful_response(response) - - def test_stream_file_by_md5(self, connection, md5_hash, file_data): - response = connection.securitydata.stream_file_by_md5(md5_hash) - assert str(response) == file_data - - def test_stream_file_by_sha256(self, connection, sha256_hash, file_data): - response = connection.securitydata.stream_file_by_sha256(sha256_hash) - assert str(response) == file_data From 0fa4dd72fd356f155b00c1e33e9828876a9d59c1 Mon Sep 17 00:00:00 2001 From: Cecilia Stevens <63068179+ceciliastevens@users.noreply.github.com> Date: Tue, 6 May 2025 12:19:44 -0400 Subject: [PATCH 3/4] fix file download flow --- src/py42/clients/securitydata.py | 7 ++++--- src/py42/services/storage/exfiltrateddata.py | 17 +++++++++++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/py42/clients/securitydata.py b/src/py42/clients/securitydata.py index 5507f7c92..8ed9ba5b3 100644 --- a/src/py42/clients/securitydata.py +++ b/src/py42/clients/securitydata.py @@ -149,10 +149,11 @@ def _get_file_stream(self, version): raise Py42Error(f"Unable to download file from version {version}") def _get_exfiltrated_file(self, version): - downloadToken = version.get("downloadTokenRequest") - edsUrl = re.match(r"(https?://[^/]+)((/.*)|$)", downloadToken).group(1) + downloadTokenRequest = version.get("downloadTokenRequest") + edsUrl = re.match(r"(https?://[^/]+)((/.*)|$)", downloadTokenRequest).group(1) eds = self._storage_service_factory.create_exfiltrated_data_service(edsUrl) - return eds.get_file(downloadToken) + token_response = eds.get_download_token(downloadTokenRequest) + return eds.get_file(token_response.text) def _parse_file_location_response(locations): diff --git a/src/py42/services/storage/exfiltrateddata.py b/src/py42/services/storage/exfiltrateddata.py index f6f488fe0..a84947b3f 100644 --- a/src/py42/services/storage/exfiltrateddata.py +++ b/src/py42/services/storage/exfiltrateddata.py @@ -9,7 +9,20 @@ def __init__(self, main_connection, streaming_session): super().__init__(main_connection) self._streaming_session = streaming_session - def get_file(self, downloadUrl): + def get_download_token(self, downloadRequestUrl): + """Get EDS download token for a file. + + Args: + downloadRequestUrl (str): The download request url to get the token + + Returns: + :class:`py42.response.Py42Response`: A response containing download token for the file. + """ + headers = {"Accept": "*/*"} + uri = f"{downloadRequestUrl}" + return self._connection.get(uri, headers=headers) + + def get_file(self, token): """Streams a file. Args: @@ -18,6 +31,6 @@ def get_file(self, downloadUrl): Returns: Returns a stream of the file indicated by the input token. """ - uri = f"{downloadUrl}" + uri = f"{token}" headers = {"Accept": "*/*"} return self._streaming_session.get(uri, headers=headers, stream=True) From 536f96a0e4907abfc3c86a87ef5481899de88222 Mon Sep 17 00:00:00 2001 From: Cecilia Stevens <63068179+ceciliastevens@users.noreply.github.com> Date: Tue, 6 May 2025 12:35:00 -0400 Subject: [PATCH 4/4] version bump --- CHANGELOG.md | 6 +++++- src/py42/__version__.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 141b2c826..bb79f630f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,12 +8,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 The intended audience of this file is for py42 consumers -- as such, changes that don't affect how a consumer would use the library (e.g. adding unit tests, updating documentation, etc) are not captured here. -## Unreleased +## 1.29.0 - 2025-05-06 ### Fixed - An issue where file download methods did not work in some conditions. +### Changed + +- The internal method `ExfiltratedDataService.get_download_token` now takes only one parameter, downloadRequestUrl. This is the full URL (with parameters) that will be used to request the download token. + ## 1.28.0 - 2025-03-21 ### Deprecated diff --git a/src/py42/__version__.py b/src/py42/__version__.py index a637d1655..fc7f0ef67 100644 --- a/src/py42/__version__.py +++ b/src/py42/__version__.py @@ -1,3 +1,3 @@ # py42 -__version__ = "1.28.2" +__version__ = "1.29.0"