From 5c3c6c67823f53f636df7a929807f67181d4aaed Mon Sep 17 00:00:00 2001
From: Mark Southern <msouthern@neurocrine.com>
Date: Tue, 3 Jun 2025 17:24:53 -0700
Subject: [PATCH 1/2] updated regex and url parsing

---
 data_url/__init__.py | 34 +++++++++++++++++++++++-----------
 test/test_url.py     |  4 ++++
 2 files changed, 27 insertions(+), 11 deletions(-)
diff --git a/data_url/__init__.py b/data_url/__init__.py
index a8617e8..61e25ce 100644
--- a/data_url/__init__.py
+++ b/data_url/__init__.py
@@ -2,9 +2,17 @@
 import base64
 
 DATA_URL_RE = re.compile(
-    r"data:(?P<MIME>([\w-]+\/[\w+\.-]+(;[\w-]+\=[\w-]+)?)?)(?P<encoded>;base64)?,(?P<data>[\w\d.~%\=\/\+-]+)"
+    r"""
+    data:                                         # literal data:
+    (?P<MIME>[\w\-\.+]+/[\w\-\.+]+)?              # optional media type
+    (?P<parameters>(?:;[\w\-\.+]+=[\w\-\.+%]+)*)  # optional attribute=values, value can be url encoded
+    (?P<encoded>;base64)?,                        # optional base64 flag
+    (?P<data>[\w\d.~%\=\/\+-]+)                   # the data
+    """,
+    re.MULTILINE | re.VERBOSE
 )
 
+
 def construct_data_url(mime_type, base64_encoded, data):
     """
     Helper method for just creating a data URL from some data. If this
@@ -47,8 +55,9 @@ def from_url(cls, url):
         """
         data_url = cls()
         data_url._url = url
-        data_url.__parse_url()
-        return data_url
+        if data_url.__parse_url():
+            return data_url
+        return None
 
     @classmethod
     def from_data(cls, mime_type, base64_encoded, data):
@@ -106,14 +115,17 @@ def from_byte_data(cls, mime_type, data):
     def __parse_url(self):
         """Parses a data URL to get each individual element and sets the
         respecting class attributes."""
-        match = DATA_URL_RE.fullmatch(self._url)
-        self._is_base64_encoded = match.group('encoded') is not None
-        self._mime_type = match.group("MIME")
-        raw_data = match.group('data')
-        if self._is_base64_encoded:
-            self._data = base64.b64decode(raw_data)
-        else:
-            self._data = raw_data
+        match = DATA_URL_RE.search(self._url)
+        if match:
+            self._is_base64_encoded = match.group('encoded') is not None
+            self._mime_type = match.group("MIME") or ""
+            raw_data = match.group('data')
+            if self._is_base64_encoded:
+                self._data = base64.b64decode(raw_data)
+            else:
+                self._data = raw_data
+            return True
+        return False
 
     def __construct_url(self):
         """Constructs an actual data URL string from class attributes."""
diff --git a/test/test_url.py b/test/test_url.py
index f9635d5..43110ed 100644
--- a/test/test_url.py
+++ b/test/test_url.py
@@ -60,6 +60,10 @@ def test_construct_data_url(self):
         self.assertEqual(raw_data, deconstructed_url.data)
         self.assertEqual(data, deconstructed_url.encoded_data)
 
+    def test_non_compliant_url(self):
+        url = DataURL.from_url("not a url")
+        assert url is None
+
 class TestFromData(unittest.TestCase):
     def test_typing(self):
         with self.assertRaises(Exception) as context:

From 2a93a001912020577ec8376c7f638d8f4ed4dd78 Mon Sep 17 00:00:00 2001
From: Mark Southern <msouthern@neurocrine.com>
Date: Thu, 12 Jun 2025 21:27:57 -0700
Subject: [PATCH 2/2] more explicit mimetype type and subtype, revert re.search
 to re.fullmatch

---
 data_url/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/data_url/__init__.py b/data_url/__init__.py
index 61e25ce..df2f304 100644
--- a/data_url/__init__.py
+++ b/data_url/__init__.py
@@ -4,7 +4,7 @@
 DATA_URL_RE = re.compile(
     r"""
     data:                                         # literal data:
-    (?P<MIME>[\w\-\.+]+/[\w\-\.+]+)?              # optional media type
+    (?P<MIME>[a-z][a-z0-9\-]+/[a-z][\w\-\.\+]+)?  # optional media type
     (?P<parameters>(?:;[\w\-\.+]+=[\w\-\.+%]+)*)  # optional attribute=values, value can be url encoded
     (?P<encoded>;base64)?,                        # optional base64 flag
     (?P<data>[\w\d.~%\=\/\+-]+)                   # the data
@@ -115,7 +115,7 @@ def from_byte_data(cls, mime_type, data):
     def __parse_url(self):
         """Parses a data URL to get each individual element and sets the
         respecting class attributes."""
-        match = DATA_URL_RE.search(self._url)
+        match = DATA_URL_RE.fullmatch(self._url)
         if match:
             self._is_base64_encoded = match.group('encoded') is not None
             self._mime_type = match.group("MIME") or ""