Skip to content

Commit 22f70be

Browse files
committed
Ignore data: URIs
1 parent 3adef4d commit 22f70be

File tree

3 files changed

+19
-2
lines changed

3 files changed

+19
-2
lines changed

CHANGELOG

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
Unreleased
2+
3+
* Ignore `data:` URIs.
4+
15
2.4.0 (2025-09-28)
26

37
* Add index to Link (David Venhoff, #202)

linkcheck/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def handle_starttag(self, tag, attrs):
3232
self.text += f' [image:{src[0]}] '
3333

3434
def handle_endtag(self, tag):
35-
if tag == 'a' and self.in_a:
35+
if tag == 'a' and self.in_a and not self.url.startswith("data:"):
3636
self.urls.append((self.text[:256], self.url))
3737
self.in_a = False
3838
self.text = ''
@@ -48,7 +48,7 @@ class ImageLister(Lister):
4848
def handle_starttag(self, tag, attrs):
4949
if tag == 'img':
5050
src = [v for k, v in attrs if k == 'src']
51-
if src:
51+
if src and not src[0].startswith("data:"):
5252
self.urls.append(('', src[0]))
5353

5454

linkcheck/tests/test_linkcheck.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,19 @@ def test_urls_exceeding_max_length(self):
950950
)
951951
self.assertEqual(Url.objects.all().count(), 1)
952952

953+
def test_data_urls_ignored(self):
954+
self.assertEqual(Url.objects.all().count(), 0)
955+
Book.objects.create(
956+
title="My Title",
957+
description=(
958+
'This is a normal link: <a href="https://www.example.org">Example</a>, '
959+
'This is a data link: <a href="data:image/jpeg;base64,/9j/4AAQSkZJRgABAgAAZABkAAD">Example 2</a>, '
960+
'This is a data img: <img src="data:image/jpeg;base64,/9j/4AAQGffsbRR4AAZABkAAD">'
961+
)
962+
)
963+
# Only the normal link is extracted
964+
self.assertEqual(Url.objects.all().count(), 1)
965+
953966
def test_empty_url_field(self):
954967
"""
955968
Test that URLField empty content is excluded depending on ignore_empty list.

0 commit comments

Comments
 (0)