Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions ml_datasets/test/test_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import pytest
from urllib.error import HTTPError, URLError
from ml_datasets.util import get_file


def test_get_file_domain_resolution_fails():
with pytest.raises(
URLError, match=r"test_non_existent_file.*(not known|getaddrinfo failed)"
):
get_file(
"non_existent_file.txt",
"http://test_notexist.wth/test_non_existent_file.txt"
)


def test_get_file_404_file_not_found():
with pytest.raises(HTTPError, match="test_non_existent_file.*404.*Not Found") as e:
get_file(
"non_existent_file.txt",
"http://google.com/test_non_existent_file.txt"
)
assert e.value.code == 404
# Suppress pytest.PytestUnraisableExceptionWarning:
# Exception ignored while calling deallocator
# This questionable design quirk comes from urllib.request.urlretrieve,
# so we shouldn't shim around it.
e.value.close()
13 changes: 9 additions & 4 deletions ml_datasets/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,20 @@ def dl_progress(count, block_size, total_size):
else:
progbar.update(block_size)

error_msg = "URL fetch failure on {}: {} -- {}"
if not os.path.exists(fpath):
try:
try:
urlretrieve(origin, fpath, dl_progress)
except URLError as e:
raise Exception(error_msg.format(origin, e.errno, e.reason))
# Enrich download exceptions with full file name
# HTTPError is a subclass of URLError, so it must be caught first
except HTTPError as e:
raise Exception(error_msg.format(origin, e.code, e.msg))
error_msg = "URL fetch failure on {} : {} -- {}"
e.msg = error_msg.format(origin, e.code, e.msg)
raise
except URLError as e:
error_msg = "URL fetch failure on {} -- {}"
e.reason = error_msg.format(origin, e.reason)
raise
except (Exception, KeyboardInterrupt):
if os.path.exists(fpath):
os.remove(fpath)
Expand Down