Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion bot/exts/filtering/_filter_lists/domain.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import typing
import urllib.parse

from bot.exts.filtering._filter_context import Event, FilterContext
from bot.exts.filtering._filter_lists.filter_list import FilterList, ListType
Expand Down Expand Up @@ -51,7 +52,7 @@ async def actions_for(
return None, [], {}

text = clean_input(text)
urls = {match.group(1).lower().rstrip("/") for match in URL_RE.finditer(text)}
urls = {urllib.parse.unquote(match.group(1)).lower().rstrip("/") for match in URL_RE.finditer(text)}
new_ctx = ctx.replace(content=urls)

triggers = await self[ListType.DENY].filter_list_result(new_ctx)
Expand Down
8 changes: 5 additions & 3 deletions bot/exts/filtering/_filters/domain.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import urllib.parse
from typing import ClassVar
from urllib.parse import urlparse

Expand Down Expand Up @@ -36,11 +37,11 @@ class DomainFilter(Filter):

async def triggered_on(self, ctx: FilterContext) -> bool:
"""Searches for a domain within a given context."""
domain = tldextract.extract(self.content).registered_domain.lower()
domain = tldextract.extract(self.content).top_domain_under_public_suffix.lower()

for found_url in ctx.content:
extract = tldextract.extract(found_url)
if self.content.lower() in found_url and extract.registered_domain == domain:
if self.content.lower() in found_url and extract.top_domain_under_public_suffix == domain:
if self.extra_fields.only_subdomains:
if not extract.subdomain and not urlparse(f"https://{found_url}").path:
return False
Expand All @@ -59,4 +60,5 @@ async def process_input(cls, content: str, description: str) -> tuple[str, str]:
match = URL_RE.fullmatch(content)
if not match or not match.group(1):
raise BadArgument(f"`{content}` is not a URL.")
return match.group(1), description
unquoted_content = urllib.parse.unquote(match.group(1))
return unquoted_content, description