From f09279fa4dfdfa3fe791082b9950ccc92272d6af Mon Sep 17 00:00:00 2001 From: Mark Southern Date: Tue, 3 Jun 2025 17:35:52 -0700 Subject: [PATCH 1/2] support parameters in data urls --- data_url/__init__.py | 23 ++++++++++++++++++++--- test/test_url.py | 23 +++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/data_url/__init__.py b/data_url/__init__.py index df2f304..2e0082f 100644 --- a/data_url/__init__.py +++ b/data_url/__init__.py @@ -1,5 +1,6 @@ import re import base64 +from urllib.parse import unquote, quote DATA_URL_RE = re.compile( r""" @@ -39,7 +40,7 @@ def construct_data_url(mime_type, base64_encoded, data): return data_url.url class DataURL: - URL_FORMAT = "data:{mime_type}{encoded},{data}" + URL_FORMAT = "data:{mime_type}{parameters}{encoded},{data}" ENCODING_STRING = ";base64" @classmethod @@ -119,6 +120,14 @@ def __parse_url(self): if match: self._is_base64_encoded = match.group('encoded') is not None self._mime_type = match.group("MIME") or "" + params = match.group("parameters") + if params: + self._parameters = {} + for pair in params.split(";"): + if pair: + name, value = pair.split("=", 1) + self._parameters[name] = unquote(value) + raw_data = match.group('data') if self._is_base64_encoded: self._data = base64.b64decode(raw_data) @@ -130,8 +139,9 @@ def __parse_url(self): def __construct_url(self): """Constructs an actual data URL string from class attributes.""" return self.URL_FORMAT.format( - mime_type=self._mime_type, - encoded=self.ENCODING_STRING if self._is_base64_encoded else "", + mime_type=self.mime_type, + parameters=";" + ";".join([f"{name}={quote(value)}" for name, value in self.parameters.items()]) if self.parameters else "", + encoded=self.ENCODING_STRING if self.is_base64_encoded else "", data=self.encoded_data ) @@ -164,3 +174,10 @@ def encoded_data(self): if self._is_base64_encoded: return base64.b64encode(self._data).decode('utf-8') return self._data + + @property + def parameters(self): + """Attribute / Value parameters.""" + if not hasattr(self, '_parameters'): + self._parameters = {} + return self._parameters diff --git a/test/test_url.py b/test/test_url.py index 43110ed..487fd84 100644 --- a/test/test_url.py +++ b/test/test_url.py @@ -5,6 +5,9 @@ from data_url import * class TestUrlCreation(unittest.TestCase): + example_url = "data:image/png;hello=world;name=two%20words;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==" + example_data = "iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==" + # TODO tests with characters that need percent encoding def test_construct_data_url_string(self): mime_type = "text/plain" @@ -64,6 +67,19 @@ def test_non_compliant_url(self): url = DataURL.from_url("not a url") assert url is None + def test_url_with_parameters(self): + url = DataURL.from_url(self.example_url) + self.assertEqual(url.mime_type, "image/png") + self.assertEqual(url.is_base64_encoded, True) + self.assertDictEqual(url.parameters, {"hello": "world", "name": "two words"}) + + def test_url_assembly_with_parameters(self): + url = DataURL.from_data("image/png", True, self.example_data) + url.parameters["hello"] = "world" + url.parameters["name"] = "two words" + self.assertEqual(str(url), self.example_url) + + class TestFromData(unittest.TestCase): def test_typing(self): with self.assertRaises(Exception) as context: @@ -172,3 +188,10 @@ def run_assertions(self): self.assertEqual(self.url.is_base64_encoded, self.base64_encoded) self.assertEqual(self.expected_url, self.url.url) + +class TestFromUrl(unittest.TestCase): + def test_from_urls(self): + test_str = " asdf data:image/png;charset=USASCII;name=file.png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg== afasdfasd\n\n" + url = DataURL.from_url(test_str) + self.assertEqual(len(url.parameters), 2) + self.assertDictEqual(url.parameters, {"charset": "USASCII", "name": "file.png"}) From c55cb831cdaee1bd023d4062b5b8a560309b27a9 Mon Sep 17 00:00:00 2001 From: Mark Southern Date: Thu, 12 Jun 2025 21:31:06 -0700 Subject: [PATCH 2/2] update test_from_urls accounting for re.fullmatch --- test/test_url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_url.py b/test/test_url.py index 487fd84..f03813e 100644 --- a/test/test_url.py +++ b/test/test_url.py @@ -191,7 +191,7 @@ def run_assertions(self): class TestFromUrl(unittest.TestCase): def test_from_urls(self): - test_str = " asdf data:image/png;charset=USASCII;name=file.png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg== afasdfasd\n\n" + test_str = "data:image/png;charset=USASCII;name=file.png;base64,iVBORw0KGgoAAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==" url = DataURL.from_url(test_str) self.assertEqual(len(url.parameters), 2) self.assertDictEqual(url.parameters, {"charset": "USASCII", "name": "file.png"})