Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion oonipipeline/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ dependencies = [
"opentelemetry-exporter-otlp-proto-grpc ~= 1.29.0",
"uvicorn ~= 0.25.0",
"pydantic-settings ~= 2.4.0",
"apache-airflow == 2.10.4"
"apache-airflow == 2.10.4",
"google-re2==1.1.20251105"
]

[project.optional-dependencies]
Expand Down
11 changes: 6 additions & 5 deletions oonipipeline/src/oonipipeline/fingerprintdb.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import re
import re2
import csv

from pathlib import Path
Expand All @@ -24,8 +24,7 @@ class Fingerprint:
scope: Optional[str] = ""
notes: Optional[str] = ""
expected_countries: Optional[List[str]] = field(default_factory=list)

regexp: Optional[re.Pattern] = None
regexp: Optional[re2._Regexp] = None

def matches_pattern(self, s: str) -> bool:
if self.pattern_type == "full":
Expand All @@ -41,7 +40,7 @@ def matches_pattern(self, s: str) -> bool:
assert (
self.regexp is not None
), "regexp is not set for a regexp type pattern"
return self.regexp.search(s) != None
return self.regexp.search(s) is not None

raise Exception(
f"Found unknown fingerprint matching pattern {self.pattern_type}"
Expand Down Expand Up @@ -74,7 +73,9 @@ def _load_fingerprints(filepath: Path) -> Dict[str, Fingerprint]:
for row in csv_reader:
fp = Fingerprint(**row)
if fp.pattern_type == "regexp":
fp.regexp = re.compile(fp.pattern, re.DOTALL)
options = re2.Options()
options.dot_nl = True
fp.regexp = re2.compile(fp.pattern, options)
fingerprints[fp.name] = fp
return fingerprints

Expand Down