From ca3b52c564a9ddb4b44e0be0d844220f9c37ff1d Mon Sep 17 00:00:00 2001 From: jgberry Date: Fri, 14 Nov 2025 03:15:14 -0500 Subject: [PATCH 1/4] Fix from_url --- data_url/__init__.py | 2 +- test/test_url.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/data_url/__init__.py b/data_url/__init__.py index 5b7df34..d41cddf 100644 --- a/data_url/__init__.py +++ b/data_url/__init__.py @@ -132,7 +132,7 @@ def __parse_url(self) -> bool: if self._is_base64_encoded: self._data = base64.b64decode(raw_data) else: - self._data = raw_data + self._data = unquote(raw_data) return True return False diff --git a/test/test_url.py b/test/test_url.py index f03813e..1ac0632 100644 --- a/test/test_url.py +++ b/test/test_url.py @@ -79,6 +79,12 @@ def test_url_assembly_with_parameters(self): url.parameters["name"] = "two words" self.assertEqual(str(url), self.example_url) + def test_url_with_url_escaped_encoding(self): + url = DataURL.from_url("data:,A%20brief%20note") + self.assertEqual("", url.mime_type) + self.assertEqual(False, url.is_base64_encoded) + self.assertEqual("A brief note", url.data) + class TestFromData(unittest.TestCase): def test_typing(self): From 55a85f2b1f33cff30d36ce48a8cd90a79dc178d8 Mon Sep 17 00:00:00 2001 From: jgberry Date: Fri, 14 Nov 2025 03:30:32 -0500 Subject: [PATCH 2/4] Quote data --- data_url/__init__.py | 2 +- test/test_url.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/data_url/__init__.py b/data_url/__init__.py index d41cddf..8d5146a 100644 --- a/data_url/__init__.py +++ b/data_url/__init__.py @@ -173,7 +173,7 @@ def encoded_data(self) -> str: """The encoded data of the URL""" if self._is_base64_encoded: return base64.b64encode(self._data).decode('utf-8') - return self._data + return quote(self._data) @property def parameters(self) -> Dict[str, str]: diff --git a/test/test_url.py b/test/test_url.py index 1ac0632..fe61a63 100644 --- a/test/test_url.py +++ b/test/test_url.py @@ -160,6 +160,17 @@ def test_string_with_plus_mimetype(self): self.assertEqual(type(self.url.data), str) self.run_assertions() + def test_string_with_spaces(self): + self.mime_type = "image/some+other" + self.base64_encoded = False + self.data = "A brief note" + self.raw_data = self.data + self.expected_url = f"data:{self.mime_type},A%20brief%20note" + self.url = DataURL.from_data(self.mime_type, self.base64_encoded, self.data) + + self.assertEqual(type(self.url.data), str) + self.run_assertions() + def run_assertions(self): self.assertEqual(self.url.mime_type, self.mime_type) self.assertEqual(self.url.data, self.raw_data) @@ -181,6 +192,17 @@ def test_byte_data(self): self.assertEqual(type(self.url.data), bytes) self.run_assertions() + def test_string_with_spaces(self): + self.mime_type = "text/plain" + self.base64_encoded = True + self.raw_data = b"A brief note" + self.data = base64.b64encode(self.raw_data).decode("UTF-8") + self.expected_url = f"data:{self.mime_type};base64,{self.data}" + + self.url = DataURL.from_byte_data(self.mime_type, self.raw_data) + self.assertEqual(type(self.url.data), bytes) + self.run_assertions() + def test_typing(self): with self.assertRaises(Exception) as context: DataURL.from_byte_data("type", "string") From be72c0a1e8817b9cba784ab4825c50a5721a1aea Mon Sep 17 00:00:00 2001 From: jgberry Date: Wed, 19 Nov 2025 00:06:12 -0500 Subject: [PATCH 3/4] Fixes --- data_url/__init__.py | 42 +++++++++++++++++++++++++++++------------- test/test_url.py | 34 +++++++++++++--------------------- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/data_url/__init__.py b/data_url/__init__.py index 8d5146a..e71c83a 100644 --- a/data_url/__init__.py +++ b/data_url/__init__.py @@ -89,13 +89,17 @@ def from_data(cls, mime_type: str, base64_encoded: bool, data: str) -> 'DataURL' return data_url @classmethod - def from_byte_data(cls, mime_type: str, data: bytes) -> 'DataURL': - """Create a new data URL from a mime type and byte data. + def from_byte_data(cls, mime_type: str, data: Union[str, bytes]) -> 'DataURL': + """Create a new data URL from a mime type and raw data. - This method works similarly to from_data, however because the data is bytes type it will - automatically turn on base64 encoding. It also assumes that the data is not already - base64 encoded. If you have base64 encoded bytes convert them to a string then - use the `from_data` method. + If the data is a bytes type, then a data URL is constructed from the data's + base64 encoded representation. This assumes the data is not already base64 + encoded. If you have base64 encoded bytes, convert them to a string and use the + `from_data` method instead. + + If the data is a string type, then a data URL is constructed from the data's URL + encoded representation. This assumes the data is not already URL encoded. If you + have URL encoded data, use `from_data` instead. Args: mime_type (str) @@ -103,16 +107,21 @@ def from_byte_data(cls, mime_type: str, data: bytes) -> 'DataURL': Returns: DataURL: A new DataURL object. """ - if type(data) != bytes: - raise TypeError('Data must be a bytes type') - data_url = cls() data_url._mime_type = mime_type - data_url._is_base64_encoded = True - data_url._data = data + + if type(data) == str: + data_url._is_base64_encoded = False + data_url._data = quote(data) + elif type(data) == bytes: + data_url._is_base64_encoded = True + data_url._data = data + else: + raise TypeError('Data must be a string or bytes type') return data_url + def __parse_url(self) -> bool: """Parses a data URL to get each individual element and sets the respecting class attributes.""" @@ -132,7 +141,7 @@ def __parse_url(self) -> bool: if self._is_base64_encoded: self._data = base64.b64decode(raw_data) else: - self._data = unquote(raw_data) + self._data = raw_data return True return False @@ -173,7 +182,14 @@ def encoded_data(self) -> str: """The encoded data of the URL""" if self._is_base64_encoded: return base64.b64encode(self._data).decode('utf-8') - return quote(self._data) + return self._data + + @property + def decoded_data(self) -> Union[str, bytes]: + """The decoded data from the URL""" + if self._is_base64_encoded: + return self._data + return unquote(self._data) @property def parameters(self) -> Dict[str, str]: diff --git a/test/test_url.py b/test/test_url.py index fe61a63..95c0c8b 100644 --- a/test/test_url.py +++ b/test/test_url.py @@ -83,7 +83,7 @@ def test_url_with_url_escaped_encoding(self): url = DataURL.from_url("data:,A%20brief%20note") self.assertEqual("", url.mime_type) self.assertEqual(False, url.is_base64_encoded) - self.assertEqual("A brief note", url.data) + self.assertEqual("A brief note", url.decoded_data) class TestFromData(unittest.TestCase): @@ -160,17 +160,6 @@ def test_string_with_plus_mimetype(self): self.assertEqual(type(self.url.data), str) self.run_assertions() - def test_string_with_spaces(self): - self.mime_type = "image/some+other" - self.base64_encoded = False - self.data = "A brief note" - self.raw_data = self.data - self.expected_url = f"data:{self.mime_type},A%20brief%20note" - self.url = DataURL.from_data(self.mime_type, self.base64_encoded, self.data) - - self.assertEqual(type(self.url.data), str) - self.run_assertions() - def run_assertions(self): self.assertEqual(self.url.mime_type, self.mime_type) self.assertEqual(self.url.data, self.raw_data) @@ -194,25 +183,28 @@ def test_byte_data(self): def test_string_with_spaces(self): self.mime_type = "text/plain" - self.base64_encoded = True - self.raw_data = b"A brief note" - self.data = base64.b64encode(self.raw_data).decode("UTF-8") - self.expected_url = f"data:{self.mime_type};base64,{self.data}" + self.base64_encoded = False + self.data = "A brief note" + self.raw_data = quote(self.data) + self.expected_url = f"data:{self.mime_type},{self.raw_data}" - self.url = DataURL.from_byte_data(self.mime_type, self.raw_data) - self.assertEqual(type(self.url.data), bytes) + self.url = DataURL.from_byte_data(self.mime_type, self.data) + self.assertEqual(type(self.url.data), str) self.run_assertions() def test_typing(self): with self.assertRaises(Exception) as context: - DataURL.from_byte_data("type", "string") + DataURL.from_byte_data("type", 1) - self.assertTrue('Data must be a bytes type' in str(context.exception)) + self.assertTrue('Data must be a string or bytes type' in str(context.exception)) def run_assertions(self): self.assertEqual(self.url.mime_type, self.mime_type) self.assertEqual(self.url.data, self.raw_data) - self.assertEqual(self.url.encoded_data, self.data) + if self.base64_encoded: + self.assertEqual(self.url.encoded_data, self.data) + else: + self.assertEqual(self.url.decoded_data, self.data) self.assertEqual(self.url.is_base64_encoded, self.base64_encoded) self.assertEqual(self.expected_url, self.url.url) From 9825cf1af6f4e3604ae7b230a85618b18d49d29d Mon Sep 17 00:00:00 2001 From: jgberry Date: Wed, 19 Nov 2025 00:07:43 -0500 Subject: [PATCH 4/4] Spacing --- data_url/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/data_url/__init__.py b/data_url/__init__.py index e71c83a..2a36e55 100644 --- a/data_url/__init__.py +++ b/data_url/__init__.py @@ -121,7 +121,6 @@ def from_byte_data(cls, mime_type: str, data: Union[str, bytes]) -> 'DataURL': return data_url - def __parse_url(self) -> bool: """Parses a data URL to get each individual element and sets the respecting class attributes."""