Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
The intended audience of this file is for py42 consumers -- as such, changes that don't affect
how a consumer would use the library (e.g. adding unit tests, updating documentation, etc) are not captured here.

## 1.29.0 - 2025-05-06

### Fixed

- An issue where file download methods did not work in some conditions.

### Changed

- The internal method `ExfiltratedDataService.get_download_token` now takes only one parameter, downloadRequestUrl. This is the full URL (with parameters) that will be used to request the download token.

## 1.28.0 - 2025-03-21

### Deprecated
Expand Down
2 changes: 1 addition & 1 deletion src/py42/__version__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# py42

__version__ = "1.28.2"
__version__ = "1.29.0"
70 changes: 9 additions & 61 deletions src/py42/clients/securitydata.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from warnings import warn

from py42.exceptions import Py42ChecksumNotFoundError
Expand Down Expand Up @@ -136,76 +137,23 @@ def _stream_file(self, checksum, version_info):
raise Py42Error(f"No file with hash {checksum} available for download.")

def _get_file_version_for_stream(self, device_guid, md5_hash, sha256_hash, path):
version = self._get_device_file_version(
device_guid, md5_hash, sha256_hash, path
)
if not version:
version = self._get_other_file_location_version(md5_hash, sha256_hash)
return version

def _get_device_file_version(self, device_guid, md5_hash, sha256_hash, path):
response = self._preservation_data_service.get_file_version_list(
device_guid, md5_hash, sha256_hash, path
)
versions = (
response.data.get("securityEventVersionsMatchingChecksum")
or response.data.get("securityEventVersionsAtPath")
or response.data.get("preservationVersions")
)

if versions:
if not response.data.get("securityEventVersionsAtPath"):
exact_match = _get_first_matching_version(versions, md5_hash)
if exact_match:
return exact_match

most_recent = sorted(
versions, key=lambda i: i["versionTimestamp"], reverse=True
)
return most_recent[0]

def _get_other_file_location_version(self, md5_hash, sha256_hash):
response = self._file_event_service.get_file_location_detail_by_sha256(
sha256_hash
)
locations = response["locations"]
if locations:
paths = _parse_file_location_response(locations)
version = self._preservation_data_service.find_file_version(
md5_hash, sha256_hash, paths
)
if version.status_code == 200:
return version.data
return response.data.get("match")

def _get_file_stream(self, version):
if version.get("edsUrl"):
if version.get("downloadTokenRequest"):
return self._get_exfiltrated_file(version)

return self._get_stored_file(version)
raise Py42Error(f"Unable to download file from version {version}")

def _get_exfiltrated_file(self, version):
eds = self._storage_service_factory.create_exfiltrated_data_service(
version["edsUrl"]
)
token = eds.get_download_token(
version["eventId"],
version["deviceUid"],
version["filePath"],
version["fileSHA256"],
version["versionTimestamp"],
)
return eds.get_file(str(token))

def _get_stored_file(self, version):
pds = self._storage_service_factory.create_preservation_data_service(
version["storageNodeURL"]
)
token = pds.get_download_token(
version["archiveGuid"],
version["fileId"],
version["versionTimestamp"],
)
return pds.get_file(str(token))
downloadTokenRequest = version.get("downloadTokenRequest")
edsUrl = re.match(r"(https?://[^/]+)((/.*)|$)", downloadTokenRequest).group(1)
eds = self._storage_service_factory.create_exfiltrated_data_service(edsUrl)
token_response = eds.get_download_token(downloadTokenRequest)
return eds.get_file(token_response.text)


def _parse_file_location_response(locations):
Expand Down
2 changes: 1 addition & 1 deletion src/py42/sdk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ def _init_services(main_connection, main_auth, auth_flag=None):
alert_rules_key = "FedObserver-API_URL"
alerts_key = "AlertService-API_URL"
file_events_key = "FORENSIC_SEARCH-API_URL"
preservation_data_key = "PRESERVATION-DATA-SERVICE_API-URL"
preservation_data_key = "EXFILTRATED-DATA-SERVICE_API-URL"
kv_prefix = "simple-key-value-store"
audit_logs_key = "AUDIT-LOG_API-URL"
cases_key = "CASES_API-URL"
Expand Down
18 changes: 1 addition & 17 deletions src/py42/services/preservationdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,8 @@


class PreservationDataService(BaseService):
def find_file_version(self, file_md5, file_sha256, paths):
"""Fetch file version details.

Args:
file_md5 (str): MD5 encoded hash of the file.
file_sha256 (str): SHA256 encoded hash of the file.
paths (str): File path with filename to fetch.

Returns:
:class:`py42.response.Py42Response`
"""

data = {"fileSHA256": file_sha256, "fileMD5": file_md5, "devicePaths": paths}
uri = "/api/v1/FindAvailableVersion"
return self._connection.post(uri, json=data)

def get_file_version_list(self, device_id, file_md5, file_sha256, path):
params = "fileSHA256={}&fileMD5={}&deviceUid={}&filePath={}"
params = params.format(file_sha256, file_md5, device_id, quote(path))
uri = f"/api/v2/file-version-listing?{params}"
uri = f"/api/v3/search-file?{params}"
return self._connection.get(uri)
30 changes: 7 additions & 23 deletions src/py42/services/storage/exfiltrateddata.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,25 @@
from urllib.parse import quote

from py42.services import BaseService


class ExfiltratedDataService(BaseService):

_base_uri = "api/v1/"

def __init__(self, main_session, streaming_session):
super().__init__(main_session)
def __init__(self, main_connection, streaming_session):
super().__init__(main_connection)
self._streaming_session = streaming_session

def get_download_token(
self, event_id, device_id, file_path, file_sha256, timestamp
):
def get_download_token(self, downloadRequestUrl):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the only customer using this as far as we know? Changing the signature is technically a breaking change - but if its currently broken and/or no ones using it, then its probably okay.
Maybe we could add details about how to adopt the fix in the changelog?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method is only used internally and isn't included in the documentation. I suppose it's possible that someone could have been using it directly, but I doubt it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a note to the changelog.

"""Get EDS download token for a file.

Args:
event_id (str): Id of the file event that references the file desired for download.
device_id (str): Id of the device on which the file desired for download is stored.
file_path (str): Path where the file desired for download resides on the device.
timestamp (int): Last updated timestamp of the file in milliseconds.
downloadRequestUrl (str): The download request url to get the token

Returns:
:class:`py42.response.Py42Response`: A response containing download token for the file.
"""
params = "deviceUid={}&eventId={}&filePath={}&fileSHA256={}&versionTimestamp={}"
params = params.format(
device_id, event_id, quote(file_path), file_sha256, timestamp
)
resource = "file-download-token"
headers = {"Accept": "*/*"}
uri = f"{self._base_uri}{resource}?{params}"
uri = f"{downloadRequestUrl}"
return self._connection.get(uri, headers=headers)

def get_file(self, token):
Expand All @@ -43,10 +31,6 @@ def get_file(self, token):
Returns:
Returns a stream of the file indicated by the input token.
"""
resource = "get-file"
uri = f"{self._connection.host_address}/{self._base_uri}{resource}"
params = {"token": token}
uri = f"{token}"
headers = {"Accept": "*/*"}
return self._streaming_session.get(
uri, params=params, headers=headers, stream=True
)
return self._streaming_session.get(uri, headers=headers, stream=True)
Loading
Loading