Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions dandi/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
get_utcnow_datetime,
is_page2_url,
is_same_time,
is_url,
on_windows,
post_upload_size_check,
under_paths,
Expand Down Expand Up @@ -589,3 +590,67 @@ def test_post_upload_size_check_erroring(
logging.ERROR,
f"Size of {p} was 42 at start of upload but is now 19 after upload",
) in caplog.record_tuples


class TestIsUrl:
@pytest.mark.parametrize(
"s",
[
# Standard valid HTTP/FTP URLs
"http://example.com",
"https://example.com",
"https://example.com/path",
"https://example.com/path?query=1#frag",
"https://example.com:8443/path",
"http://127.0.0.1:8000",
"ftp://example.com/path/file.txt",
"ftp://user:pass@example.com/dir",
# These pass pydantic validation but are not very useful URLs
"http:/example.com",
# Typical DANDI Archive dandiset URLs (also valid HTTP URLs)
"https://dandiarchive.org/dandiset/000027",
"https://dandiarchive.org/dandiset/000027/draft",
"https://dandiarchive.org/dandiset/000027/0.210428.2206",
# DANDI identifiers and ids
"DANDI:123456",
"DANDI:123456/draft",
"DANDI:123456/1.123456.1234",
"DANDI-SANDBOX:123456",
"DANDI-SANDBOX:123456/draft",
"DANDI-SANDBOX:123456/1.123456.1234",
# Customized DANDI URLs
"dandi://dandi/123456",
"dandi://dandi/123456/draft",
"dandi://dandi/123456/1.123456.1234",
"dandi://dandi-sandbox/123456",
"dandi://dandi-sandbox/123456/draft",
"dandi://dandi-sandbox/123456/1.123456.1234",
],
)
def test_valid_urls(self, s: str) -> None:
assert is_url(s) is True

@pytest.mark.parametrize(
"s",
[
# Clearly invalid URLs
"not a url",
"example",
"example .com",
"://example.com",
"",
" ",
# DANDI-like string that should not be treated as a valid DANDI URL
"dandi://not-a-real-dandiset",
# Invalid DANDI identifiers and ids because of unknown instance name
"FAKEDANDI:123456",
"FAKEDANDI:123456/draft",
"FAKEDANDI:123456/1.123456.1234",
# Customized DANDI URLs
"dandi://fakedandi/123456",
"dandi://fakedandi/123456/draft",
"dandi://fakedandi/123456/1.123456.1234",
],
)
def test_invalid_urls(self, s: str) -> None:
assert is_url(s) is False
35 changes: 29 additions & 6 deletions dandi/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

import dateutil.parser
from multidict import MultiDict # dependency of yarl
from pydantic import BaseModel, Field
from pydantic import AnyHttpUrl, BaseModel, Field, FtpUrl, TypeAdapter, ValidationError
import requests
import ruamel.yaml
from semantic_version import Version
Expand Down Expand Up @@ -648,13 +648,36 @@ def _get_instance(
)


def is_url(s: str) -> bool:
"""Very primitive url detection for now
# This is defined in module level because repeated invocations of
# TypeAdapter creation is expensive
_url_adapter: TypeAdapter[AnyHttpUrl | FtpUrl] = TypeAdapter(AnyHttpUrl | FtpUrl)


TODO: redo
def is_url(s: str) -> bool:
"""
return s.lower().startswith(("http://", "https://", "dandi:", "ftp://"))
# Slashes are not required after "dandi:" so as to support "DANDI:<id>"
Determines whether the input string `s` is a valid URL (standard URL or DANDI URL).
"""

# Importing from within function to avoid possible circular imports
# since this a utility module
from dandi.dandiarchive import parse_dandi_url
from dandi.exceptions import UnknownURLError

try:
_url_adapter.validate_python(s)
except ValidationError:
# `s` is not a standard URL, try parsing it as DANDI URL
try:
parse_dandi_url(s)
except UnknownURLError:
# `s` is neither a standard URL nor a DANDI URL, returning `False`
return False

# `s` is a DANDI URL, returning `True`
return True

# `s` is a standard URL
return True


def get_module_version(module: str | types.ModuleType) -> str | None:
Expand Down
Loading