From 472f53999219986dd9b67d987b2fc4b269959dc1 Mon Sep 17 00:00:00 2001 From: Boris Muratov <8bee278@gmail.com> Date: Fri, 12 Dec 2025 23:11:42 +0200 Subject: [PATCH 1/2] Support unicode URLs in domain filtering --- bot/exts/filtering/_filter_lists/domain.py | 3 ++- bot/exts/filtering/_filters/domain.py | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/bot/exts/filtering/_filter_lists/domain.py b/bot/exts/filtering/_filter_lists/domain.py index e601f6922d..cbaeceb669 100644 --- a/bot/exts/filtering/_filter_lists/domain.py +++ b/bot/exts/filtering/_filter_lists/domain.py @@ -1,5 +1,6 @@ import re import typing +import urllib.parse from bot.exts.filtering._filter_context import Event, FilterContext from bot.exts.filtering._filter_lists.filter_list import FilterList, ListType @@ -51,7 +52,7 @@ async def actions_for( return None, [], {} text = clean_input(text) - urls = {match.group(1).lower().rstrip("/") for match in URL_RE.finditer(text)} + urls = {urllib.parse.unquote(match.group(1)).lower().rstrip("/") for match in URL_RE.finditer(text)} new_ctx = ctx.replace(content=urls) triggers = await self[ListType.DENY].filter_list_result(new_ctx) diff --git a/bot/exts/filtering/_filters/domain.py b/bot/exts/filtering/_filters/domain.py index c3f7f28865..1cd4502014 100644 --- a/bot/exts/filtering/_filters/domain.py +++ b/bot/exts/filtering/_filters/domain.py @@ -1,4 +1,5 @@ import re +import urllib.parse from typing import ClassVar from urllib.parse import urlparse @@ -59,4 +60,5 @@ async def process_input(cls, content: str, description: str) -> tuple[str, str]: match = URL_RE.fullmatch(content) if not match or not match.group(1): raise BadArgument(f"`{content}` is not a URL.") - return match.group(1), description + unquoted_content = urllib.parse.unquote(match.group(1)) + return unquoted_content, description From 5b81dbe53c3eff207b28d85d7732557060c74304 Mon Sep 17 00:00:00 2001 From: Boris Muratov <8bee278@gmail.com> Date: Fri, 12 Dec 2025 23:15:52 +0200 Subject: [PATCH 2/2] Replace deprecated tldextract registered_domain --- bot/exts/filtering/_filters/domain.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bot/exts/filtering/_filters/domain.py b/bot/exts/filtering/_filters/domain.py index 1cd4502014..c5f1d6880c 100644 --- a/bot/exts/filtering/_filters/domain.py +++ b/bot/exts/filtering/_filters/domain.py @@ -37,11 +37,11 @@ class DomainFilter(Filter): async def triggered_on(self, ctx: FilterContext) -> bool: """Searches for a domain within a given context.""" - domain = tldextract.extract(self.content).registered_domain.lower() + domain = tldextract.extract(self.content).top_domain_under_public_suffix.lower() for found_url in ctx.content: extract = tldextract.extract(found_url) - if self.content.lower() in found_url and extract.registered_domain == domain: + if self.content.lower() in found_url and extract.top_domain_under_public_suffix == domain: if self.extra_fields.only_subdomains: if not extract.subdomain and not urlparse(f"https://{found_url}").path: return False