From 4c94f3216f04a8e67a2df0b593e34888e0594484 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20D=C3=ADaz?= Date: Wed, 10 Dec 2025 13:41:49 +0100 Subject: [PATCH] Change python re dependency to google's re2 --- oonipipeline/pyproject.toml | 3 ++- oonipipeline/src/oonipipeline/fingerprintdb.py | 11 ++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/oonipipeline/pyproject.toml b/oonipipeline/pyproject.toml index 1c8f92aa..07272997 100644 --- a/oonipipeline/pyproject.toml +++ b/oonipipeline/pyproject.toml @@ -32,7 +32,8 @@ dependencies = [ "opentelemetry-exporter-otlp-proto-grpc ~= 1.29.0", "uvicorn ~= 0.25.0", "pydantic-settings ~= 2.4.0", - "apache-airflow == 2.10.4" + "apache-airflow == 2.10.4", + "google-re2==1.1.20251105" ] [project.optional-dependencies] diff --git a/oonipipeline/src/oonipipeline/fingerprintdb.py b/oonipipeline/src/oonipipeline/fingerprintdb.py index 217e8562..f8e3b61d 100644 --- a/oonipipeline/src/oonipipeline/fingerprintdb.py +++ b/oonipipeline/src/oonipipeline/fingerprintdb.py @@ -1,4 +1,4 @@ -import re +import re2 import csv from pathlib import Path @@ -24,8 +24,7 @@ class Fingerprint: scope: Optional[str] = "" notes: Optional[str] = "" expected_countries: Optional[List[str]] = field(default_factory=list) - - regexp: Optional[re.Pattern] = None + regexp: Optional[re2._Regexp] = None def matches_pattern(self, s: str) -> bool: if self.pattern_type == "full": @@ -41,7 +40,7 @@ def matches_pattern(self, s: str) -> bool: assert ( self.regexp is not None ), "regexp is not set for a regexp type pattern" - return self.regexp.search(s) != None + return self.regexp.search(s) is not None raise Exception( f"Found unknown fingerprint matching pattern {self.pattern_type}" @@ -74,7 +73,9 @@ def _load_fingerprints(filepath: Path) -> Dict[str, Fingerprint]: for row in csv_reader: fp = Fingerprint(**row) if fp.pattern_type == "regexp": - fp.regexp = re.compile(fp.pattern, re.DOTALL) + options = re2.Options() + options.dot_nl = True + fp.regexp = re2.compile(fp.pattern, options) fingerprints[fp.name] = fp return fingerprints