diff --git a/fastpath/clickhouse_init.sql b/fastpath/clickhouse_init.sql index 9708a3eca..75162f103 100644 --- a/fastpath/clickhouse_init.sql +++ b/fastpath/clickhouse_init.sql @@ -38,7 +38,12 @@ CREATE TABLE IF NOT EXISTS default.fastpath `blocking_type` String, `test_helper_address` LowCardinality(String), `test_helper_type` LowCardinality(String), - `ooni_run_link_id` Nullable(UInt64) + `ooni_run_link_id` Nullable(UInt64), + `is_verified` Int8, + `nym` Nullable(String), + `zkp_request` Nullable(String), + `age_range` Nullable(String), + `msm_range` Nullable(String), ) ENGINE = ReplacingMergeTree ORDER BY (measurement_start_time, report_id, input) @@ -194,4 +199,3 @@ CREATE TABLE IF NOT EXISTS default.fingerprints_http ) ENGINE = EmbeddedRocksDB PRIMARY KEY name; - diff --git a/fastpath/debian/changelog b/fastpath/debian/changelog index abc6f6f16..b62827052 100644 --- a/fastpath/debian/changelog +++ b/fastpath/debian/changelog @@ -1,3 +1,9 @@ +fastpath (0.89) unstable; urgency=medium + + * Add support for new anonymous credentials database fields + + -- Luis Diaz Tue, 15 Sep 2026 16:25:00 +0100 + fastpath (0.88) unstable; urgency=medium * Remove fingerprints bytes metrics diff --git a/fastpath/fastpath/core.py b/fastpath/fastpath/core.py index a06ca72ce..161b030f7 100644 --- a/fastpath/fastpath/core.py +++ b/fastpath/fastpath/core.py @@ -1616,7 +1616,7 @@ def flag_measurements_with_wrong_date(msm: dict, msmt_uid: str, scores: dict) -> scores["msg"] = "Measurement start time too old" def write_measurement_to_disk(msm_tup) -> None: - """Write this measurement to disk so that it can be + """Write this measurement to disk so that it can be processed by the measurement uploader Args: @@ -1633,7 +1633,7 @@ def write_measurement_to_disk(msm_tup) -> None: msmtdir = spooldir / "incoming" / dirname msmtdir.mkdir(parents=True, exist_ok=True) - try: + try: msmt_f_tmp = msmtdir / f"{msmt_uid}.post.tmp" msmt_f_tmp.write_bytes(data) msmt_f = msmtdir / f"{msmt_uid}.post" @@ -1654,7 +1654,14 @@ def process_measurement(msm_tup, buffer_writes=False) -> None: assert msmt_uid if measurement is None: measurement = ujson.loads(msm_jstr) - if sorted(measurement.keys()) == ["content", "format"]: + + is_verified = g(measurement, 'is_verified', False) + nym = g(measurement, 'nym') + zkp_request = g(measurement, 'zkp_request') + age_range = g(measurement, 'age_range') + msm_range = g(measurement, 'msm_range') + + if "content" in measurement and "format" in measurement: measurement = unwrap_msmt(measurement) rid = measurement.get("report_id") inp = measurement.get("input") @@ -1742,6 +1749,11 @@ def process_measurement(msm_tup, buffer_writes=False) -> None: test_helper_address, test_helper_type, ooni_run_link_id, + is_verified, + nym, + zkp_request, + age_range, + msm_range, buffer_writes=buffer_writes, ) diff --git a/fastpath/fastpath/db.py b/fastpath/fastpath/db.py index e8decb6ce..9c115b56a 100644 --- a/fastpath/fastpath/db.py +++ b/fastpath/fastpath/db.py @@ -10,7 +10,7 @@ from datetime import datetime from textwrap import dedent from urllib.parse import urlparse -from typing import List, Tuple, Dict, Optional +from typing import List, Tuple, Dict, Optional, Any import logging try: @@ -209,6 +209,11 @@ def clickhouse_upsert_summary( test_helper_address: str, test_helper_type: str, ooni_run_link_id: Optional[int], + is_verified: bool, + zkp_request: Optional[str], + nym: Optional[str], + age_range: Optional[Tuple[int, int]], + msm_range: Optional[Tuple[int, int]], buffer_writes=False, ) -> None: """Insert a row in the fastpath table. Overwrite an existing one.""" @@ -224,6 +229,10 @@ def nn(features: dict, k: str) -> str: def tf(v: bool) -> str: return "t" if v else "f" + def serialize_optional(x : Optional[Any]) -> Optional[str]: + """Serialize to string if not None, return None otherwise""" + return ujson.dumps(x) if x is not None else None + test_name = msm.get("test_name", None) or "" input_, domain = extract_input_domain(msm, test_name) asn = int(msm["probe_asn"][2:]) # AS123 @@ -257,6 +266,11 @@ def tf(v: bool) -> str: test_helper_address=test_helper_address, test_helper_type=test_helper_type, ooni_run_link_id=ooni_run_link_id, + is_verified=tf(is_verified), + nym=nym, + zkp_request=zkp_request, + age_range=serialize_optional(age_range), + msm_range=serialize_optional(msm_range) ) if buffer_writes: diff --git a/fastpath/fastpath/tests/test_functional_nodb.py b/fastpath/fastpath/tests/test_functional_nodb.py index 80b658c7f..5af5b33b0 100644 --- a/fastpath/fastpath/tests/test_functional_nodb.py +++ b/fastpath/fastpath/tests/test_functional_nodb.py @@ -154,6 +154,11 @@ def test_score_web_connectivity_bug_610_2(fprints): "test_helper_address": "https://0.th.ooni.org", "test_helper_type": "https", "ooni_run_link_id": None, + "is_verified" : "f", + "nym" : None, + "zkp_request" : None, + "age_range" : None, + "msm_range" : None, } ] @@ -201,6 +206,11 @@ def test_score_browser_web(fprints): "test_runtime": 0.35740000000037253, "test_start_time": datetime.datetime(2023, 3, 20, 18, 26, 35), "test_version": "0.1.0", + "is_verified" : "f", + "nym" : None, + "zkp_request" : None, + "age_range" : None, + "msm_range" : None, }, ] @@ -252,6 +262,11 @@ def test_score_openvpn(): "test_helper_address": "", "test_helper_type": "", "ooni_run_link_id": None, + "is_verified" : "f", + "nym" : None, + "zkp_request" : None, + "age_range" : None, + "msm_range" : None, } ] diff --git a/fastpath/makefile b/fastpath/makefile index 10f18430c..b0c87355c 100644 --- a/fastpath/makefile +++ b/fastpath/makefile @@ -80,18 +80,18 @@ docker: # Runs docker in foreground, useful for checking errors in the image before it runs docker-fg: - docker compose --profile default up --build + docker compose --profile default up --build -# Runs both fastpath and the testing clickhous. +# Runs both fastpath and the testing clickhous. # Mind the fastpath configuration in fastpath.conf docker-all: docker-clickhouse echo "Waiting for clickhouse..." - sleep 4 - docker compose --profile default up --build -d + sleep 4 + docker compose --profile default up --build -d # Turns off every service docker-down: - docker compose --profile all down + docker compose --profile all down # If you need to test the fastpath locally you can use this rule to spawn the clickhouse database # locally and then use `make docker` or `make docker-fg` to start the fastpath container. Ex: @@ -109,15 +109,15 @@ docker-login: # Get logs from the fastpath docker service docker-logs: - docker compose logs fastpath -f + docker compose logs fastpath -f -# Get logs for a specified service. Example: +# Get logs for a specified service. Example: # `make docker-logs-for args="clickhouse-server"` docker-logs-for: - docker compose logs $(args) -f + docker compose logs $(args) -f # Used for actually building the fastpath docker image -docker-build: +docker-build: # We need to use tar -czh to resolve the common dir symlink tar -czh . | docker build \ --build-arg BUILD_LABEL=${BUILD_LABEL} \ diff --git a/ooniapi/common/src/common/config.py b/ooniapi/common/src/common/config.py index dc51d1421..211dd2d6b 100644 --- a/ooniapi/common/src/common/config.py +++ b/ooniapi/common/src/common/config.py @@ -1,5 +1,6 @@ from typing import List, Dict from pydantic_settings import BaseSettings +from pydantic import Field class Settings(BaseSettings): @@ -38,7 +39,7 @@ class Settings(BaseSettings): # Where the geoip DBs are downloaded to geoip_db_dir: str = "/var/lib/ooni/geoip" - # Ooniprobe only + # -- < Ooniprobe only > ------------------------------------------------------------- msmt_spool_dir: str = "" fastpath_url: str = "" # example: http://123.123.123.123:8472 failed_reports_bucket: str = ( @@ -46,6 +47,20 @@ class Settings(BaseSettings): ) tor_targets: str = "" # filename of json containing Tor bridges and DirAuth endpoints + # Used to store history of all manifests and retrieve next manifest to use + anonc_manifest_bucket: str = Field( + default="CHANGEME", + description="Name of public bucket where anonymous credentials manifests are stored" + ) + anonc_manifest_file: str = Field( + default="CHANGEME", + description="Name of the manifest file within the public bucket to use for ZKP verification" + ) + anonc_secret_key: str = Field( + default="CHANGEME", + description="Secret key matching the specified manifest file" + ) + # ooniprobe client configuration collectors: List[Dict[str, str]] = [ {"address": "httpo://guegdifjy7bjpequ.onion", "type": "onion"}, diff --git a/ooniapi/services/oonimeasurements/tests/migrations/0_clickhouse_init_tables.sql b/ooniapi/services/oonimeasurements/tests/migrations/0_clickhouse_init_tables.sql index e5b9e5549..248ae8b03 100644 --- a/ooniapi/services/oonimeasurements/tests/migrations/0_clickhouse_init_tables.sql +++ b/ooniapi/services/oonimeasurements/tests/migrations/0_clickhouse_init_tables.sql @@ -65,6 +65,11 @@ ENGINE = MergeTree ORDER BY (report_id, input) SETTINGS index_granularity = 8192; +ALTER TABLE default.fastpath ADD COLUMN IF NOT EXISTS `is_verified` Int8; +ALTER TABLE default.fastpath ADD COLUMN IF NOT EXISTS `nym` Nullable(String); +ALTER TABLE default.fastpath ADD COLUMN IF NOT EXISTS `zkp_request` Nullable(String); +ALTER TABLE default.fastpath ADD COLUMN IF NOT EXISTS `age_range` Nullable(String); +ALTER TABLE default.fastpath ADD COLUMN IF NOT EXISTS `msm_range` Nullable(String); CREATE TABLE IF NOT EXISTS default.event_detector_changepoints ( `probe_asn` UInt32, @@ -106,4 +111,4 @@ PARTITION BY toYYYYMM(ts) ORDER BY (probe_asn, probe_cc, ts, domain) SETTINGS index_granularity = 8192; -ALTER TABLE default.event_detector_changepoints ADD COLUMN `block_type` String; \ No newline at end of file +ALTER TABLE default.event_detector_changepoints ADD COLUMN `block_type` String; diff --git a/ooniapi/services/ooniprobe/Dockerfile b/ooniapi/services/ooniprobe/Dockerfile index 42fae6d0b..1c2305356 100644 --- a/ooniapi/services/ooniprobe/Dockerfile +++ b/ooniapi/services/ooniprobe/Dockerfile @@ -1,5 +1,5 @@ # Python builder -FROM python:3.11-bookworm as builder +FROM python:3.11-bookworm AS builder ARG BUILD_LABEL=docker WORKDIR /build @@ -14,10 +14,10 @@ RUN find /build -type f -name '._*' -delete RUN echo "$BUILD_LABEL" > /build/src/ooniprobe/BUILD_LABEL -RUN hatch build +RUN make build ### Actual image running on the host -FROM python:3.11-bookworm as runner +FROM python:3.11-bookworm AS runner WORKDIR /app diff --git a/ooniapi/services/ooniprobe/pyproject.toml b/ooniapi/services/ooniprobe/pyproject.toml index b8c6fb32b..c29f2c2e7 100644 --- a/ooniapi/services/ooniprobe/pyproject.toml +++ b/ooniapi/services/ooniprobe/pyproject.toml @@ -28,7 +28,8 @@ dependencies = [ "fastapi-utils[all] ~= 0.8.0", "zstd ~= 1.5.7.2", "boto3 ~= 1.39.3", - "boto3-stubs[s3] ~= 1.39.3" + "boto3-stubs[s3] ~= 1.39.3", + "ooniauth-py @ https://github.com/ooni/userauth/raw/refs/heads/release-wheels/ooniauth-py/wheels/ooniauth_py-0.1.0-cp310-abi3-manylinux_2_34_x86_64.whl" ] readme = "README.md" @@ -82,6 +83,13 @@ test-cov = "pytest -s --full-trace --log-level=INFO --log-cli-level=INFO -v --s cov-report = ["coverage report"] cov = ["test-cov", "cov-report"] +# Allows specifyng dependencies as a direct reference like an URL +# or a file path. +# Required by ooniauth-py +# See: https://hatch.pypa.io/1.13/config/metadata/#allowing-direct-references +[tool.hatch.metadata] +allow-direct-references = true + [tool.pytest.ini_options] addopts = ["--import-mode=importlib"] diff --git a/ooniapi/services/ooniprobe/scripts/new_server_state.py b/ooniapi/services/ooniprobe/scripts/new_server_state.py new file mode 100755 index 000000000..d82271f60 --- /dev/null +++ b/ooniapi/services/ooniprobe/scripts/new_server_state.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 + +""" +Use this script to generate a new server state for the anonymous credentials protocol + +Two Keys will be generated: + +- Secret key: Save it in parameter store as a secret string, do not share it +- Public parameters: A string that will be shared with probes to run the ZKP verification + +""" + +import ooniauth_py + +state = ooniauth_py.ServerState() + +secret_key = state.get_secret_key() +public_params = state.get_public_parameters() + +print( + f""" +Secret Key: + + {secret_key} + +Public Parameters: + + {public_params} +""") \ No newline at end of file diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/dependencies.py b/ooniapi/services/ooniprobe/src/ooniprobe/dependencies.py index 4cde16c68..0245e75c4 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/dependencies.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/dependencies.py @@ -1,14 +1,21 @@ -import io +from typing import Annotated, TypeAlias, Any, Dict +from datetime import datetime +import time from pathlib import Path -from typing import Annotated, TypeAlias, Dict, Any -import boto3 -import ujson -import geoip2.database -from clickhouse_driver import Client as Clickhouse from fastapi import Depends + +import geoip2.database + +import ujson +from pydantic import BaseModel, Field from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm import sessionmaker, Session + +from clickhouse_driver import Client as Clickhouse +import io + +import boto3 from mypy_boto3_s3 import S3Client from .common.config import Settings @@ -21,7 +28,6 @@ def get_postgresql_session(settings: SettingsDep): engine = create_engine(settings.postgresql_url) SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) - db = SessionLocal() try: yield db @@ -29,6 +35,9 @@ def get_postgresql_session(settings: SettingsDep): db.close() +PostgresSessionDep = Annotated[Session, Depends(get_postgresql_session)] + + def get_cc_reader(settings: SettingsDep): db_path = Path(settings.geoip_db_dir, "cc.mmdb") return geoip2.database.Reader(db_path) @@ -60,7 +69,6 @@ def get_s3_client() -> S3Client: s3 = boto3.client("s3") return s3 - S3ClientDep = Annotated[S3Client, Depends(get_s3_client)] __cache__ = dict() @@ -69,6 +77,74 @@ def get_cache(): return __cache__ CacheDep = Annotated[Dict[str, Any], Depends(get_cache)] +class Manifest(BaseModel): + """ + Manifest used for ZKP verification + """ + nym_scope: str = "ooni.org/{probe_cc}/{probe_asn}" + submission_policy: Dict[str, Any] = dict() + public_parameters: str + +class ManifestMeta(BaseModel): + """ + Manifest metadata + """ + version: str + last_modification_date: datetime + manifest_url: str = Field(description="URL pointing to the AWS public record of this manifest") + +class ManifestResponse(BaseModel): + manifest: Manifest + meta: ManifestMeta + +def get_manifest(s3: S3ClientDep, bucket: str, file: str) -> ManifestResponse: + # Get version & metadata + resp = s3.list_object_versions( + Bucket=bucket, + Prefix=file # Only get versions of the specified file + ) + + versions = resp.get("Versions") + assert versions, "Couldn't find versions for the specified manifest" + + latest = next((x for x in versions if x.get('IsLatest')), None) + + assert latest, "Couldn't find latest manifest version. Is versioning activated?" + assert 'VersionId' in latest, "Manifest version not provided" + assert 'LastModified' in latest, "Last modification date not provided" + + meta = ManifestMeta( + version=latest['VersionId'], + last_modification_date=latest['LastModified'], + manifest_url=f"https://{bucket}.s3.amazonaws.com/{file}", + ) + + # Get Object + manifest_resp = s3.get_object(Bucket=bucket, Key=file) + manifest_json = ujson.load(manifest_resp['Body']) + manifest = Manifest(**manifest_json) + return ManifestResponse(manifest=manifest, meta = meta) + +def get_manifest_cached(s3: S3ClientDep, bucket: str, file: str, cache: CacheDep, cache_time_seconds : float = 60) -> ManifestResponse: + """ + Fetch the manifest and cache the result for `cache_time_seconds` + + Following calls will try to fetch the result from cache + """ + key = str((bucket, file)) + val = cache.get(key) + now = time.time() + + if val is None or (now - val[1]) > cache_time_seconds: + val = __cache__[key] = (get_manifest(s3, bucket, file), now) + + return val[0] + +def _get_manifest(s3: S3ClientDep, settings : SettingsDep, cache: CacheDep) -> ManifestResponse: + return get_manifest_cached(s3, settings.anonc_manifest_bucket, settings.anonc_manifest_file, cache) + +ManifestDep = Annotated[ManifestResponse, Depends(_get_manifest)] + def read_file(s3_client : S3ClientDep, bucket: str, file : str) -> str: """ Reads the content of `file` within `bucket` into a string diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/main.py b/ooniapi/services/ooniprobe/src/ooniprobe/main.py index 0cb1a0e85..2a85831b4 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/main.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/main.py @@ -3,6 +3,7 @@ from contextlib import asynccontextmanager from urllib.request import urlopen +import boto3 from fastapi import Depends, FastAPI from fastapi.middleware.cors import CORSMiddleware @@ -19,7 +20,7 @@ from .routers import reports, bouncer, prio_crud from .download_geoip import try_update -from .dependencies import get_postgresql_session, get_clickhouse_session, SettingsDep +from .dependencies import S3ClientDep, get_manifest, get_postgresql_session, get_clickhouse_session, SettingsDep from .common.dependencies import get_settings from .common.config import Settings from .common.version import get_build_label @@ -50,7 +51,6 @@ async def lifespan( yield - async def setup_repeating_tasks(settings: Settings): # Call all repeating tasks here to make them start # See: https://fastapi-utils.davidmontague.xyz/user-guide/repeated-tasks/ @@ -99,6 +99,7 @@ class HealthStatus(BaseModel): @app.get("/health") async def health( settings: SettingsDep, + s3: S3ClientDep, db=Depends(get_postgresql_session), clickhouse=Depends(get_clickhouse_session), ): @@ -135,6 +136,22 @@ async def health( if settings.prometheus_metrics_password == "CHANGEME": errors.append("bad_prometheus_password") + if settings.anonc_manifest_bucket == "CHANGEME": + errors.append("bad_manifest_bucket") + + if settings.anonc_manifest_file == "CHANGEME": + errors.append("bad_manifest_file") + + if settings.anonc_secret_key == "CHANGEME": + errors.append("bad_anonc_secret_key") + + # Check that you can retrieve the manifest + try: + get_manifest(s3, settings.anonc_manifest_bucket, settings.anonc_manifest_file) + except Exception as e: + errors.append("anonc_manifest_unreachable") + log.error(f"Error retrieving manifest: {e}") + status = "ok" if len(errors) > 0: status = "fail" diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/metrics.py b/ooniapi/services/ooniprobe/src/ooniprobe/metrics.py new file mode 100644 index 000000000..6789e7003 --- /dev/null +++ b/ooniapi/services/ooniprobe/src/ooniprobe/metrics.py @@ -0,0 +1,83 @@ +from prometheus_client import Counter, Info, Gauge + + +class Metrics: + # -- < Measurement submission > ------------------------------------ + MSMNT_DISCARD_ASN0 = Counter( + "receive_measurement_discard_asn_0", + "How many measurements were discarded due to probe_asn == ASN0", + ) + + MSMNT_DISCARD_CC_ZZ = Counter( + "receive_measurement_discard_cc_zz", + "How many measurements were discarded due to probe_cc == ZZ", + ) + + MSMNT_RECEIVED_CNT = Counter( + "receive_measurement_count", + "Count of incomming measurements", + ) + + PROBE_CC_ASN_MATCH = Counter( + "probe_cc_asn_match", + "How many matches between reported and observed probe_cc and asn", + ) + + PROBE_CC_ASN_NO_MATCH = Counter( + "probe_cc_asn_nomatch", + "How many mismatches between reported and observed probe_cc and asn", + labelnames=["mismatch"], + ) + + MISSED_MSMNTS = Counter( + "missed_msmnts", "Measurements that failed to be sent to the fast path." + ) + + SEND_FASTPATH_FAILURE = Counter( + "measurement_fastpath_send_failure_count", + "How many times ooniprobe failed to send a measurement to fastpath", + ) + + SEND_S3_FAILURE = Counter( + "measurement_s3_upload_failure_count", + "How many times ooniprobe failed to send a measurement to s3. " + "Measurements are sent to s3 when they can't be sent to the fastpath", + ) + + # -- < Probe services > ---------------------------------------------------- + PROBE_LOGIN = Counter( + "probe_login_requests", + "Requests made to the probe login endpoint", + labelnames=["state", "detail", "login"], + ) + + PROBE_UPDATE_INFO = Info( + "probe_update_info", + "Information reported in the probe update endpoint", + ) + + CHECK_IN_TEST_LIST_COUNT = Gauge( + "check_in_test_list_count", "Amount of test lists present in each experiment" + ) + + GEOIP_ADDR_FOUND = Counter( + "geoip_ipaddr_found", + "If the ip address was found by geoip", + labelnames=["probe_cc", "asn"], + ) + + GEOIP_ADDR_NOT_FOUND = Counter( + "geoip_ipaddr_not_found", "We couldn't look up the IP address in the database" + ) + + GEOIP_CC_DIFFERS = Counter( + "geoip_cc_differs", "There's a mismatch between reported CC and observed CC" + ) + + GEOIP_ASN_DIFFERS = Counter( + "geoip_asn_differs", "There's a mismatch between reported ASN and observed ASN" + ) + + TEST_LIST_URLS_COUNT = Gauge( + "test_list_urls_count", "Size of reported test list" + ) \ No newline at end of file diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/models.py b/ooniapi/services/ooniprobe/src/ooniprobe/models.py index ddc040d63..2bffb46d8 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/models.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/models.py @@ -1,10 +1,12 @@ from datetime import datetime - -from sqlalchemy import ForeignKey, Sequence, String -from sqlalchemy.orm import Mapped, mapped_column, relationship - from .common.models import UtcDateTime from .common.postgresql import Base +from sqlalchemy import ForeignKey, Sequence, String +from sqlalchemy.orm import Mapped +from sqlalchemy.orm import mapped_column, relationship +import logging + +log = logging.getLogger(__name__) class OONIProbeVPNProvider(Base): diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/bouncer.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/bouncer.py index 90f203703..55240d587 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/bouncer.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/bouncer.py @@ -48,7 +48,12 @@ class NetTestResponse(BaseModel): nettests: List[NetTest] = Field(alias="net-tests") -@router.post("/net-tests", tags=["bouncer"], response_model=NetTestResponse, response_model_exclude_unset=True) +@router.post( + "/net-tests", + tags=["bouncer"], + response_model=NetTestResponse, + response_model_exclude_unset=True, +) async def bouncer_net_tests( response: Response, request: Request, diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/prio_crud.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/prio_crud.py index 6a997b1bd..39a16cf21 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/prio_crud.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/prio_crud.py @@ -26,10 +26,15 @@ class PrioritizationType(BaseModel): url: Optional[str] = Field("URL or wildcard (*)") -@router.get("/_/show_countries_prioritization", tags=["prioritization"], response_model=None) -def show_countries_prioritization(clickhouse: ClickhouseDep, - format: Optional[str] = Query(default="JSON", description="Format of response, CSV or JSON") - ) -> List[PrioritizationType]: +@router.get( + "/_/show_countries_prioritization", tags=["prioritization"], response_model=None +) +def show_countries_prioritization( + clickhouse: ClickhouseDep, + format: Optional[str] = Query( + default="JSON", description="Format of response, CSV or JSON" + ), +) -> List[PrioritizationType]: sql = """ SELECT domain, url, cc, category_code, msmt_cnt, anomaly_perc FROM citizenlab @@ -54,7 +59,7 @@ def show_countries_prioritization(clickhouse: ClickhouseDep, li = sorted(li, key=lambda x: (x["cc"], -x["priority"])) - if len(li)== 0: + if len(li) == 0: raise HTTPException(status_code=400, detail="no data") if format.upper() == "CSV": @@ -71,17 +76,24 @@ class DebugPrioritization(BaseModel): prio_rules: Tuple -@router.get("/_/debug_prioritization", tags=["prioritization"], response_model=DebugPrioritization) +@router.get( + "/_/debug_prioritization", + tags=["prioritization"], + response_model=DebugPrioritization, +) def debug_prioritization( clickhouse: ClickhouseDep, probe_cc: Optional[str] = Query(description="2-letter Country-Code", default="ZZ"), - category_codes: str = Query(description="Comma separated list of uppercase URL categories"), + category_codes: str = Query( + description="Comma separated list of uppercase URL categories" + ), probe_asn: int = Query(description="Probe ASN"), - limit: Optional[int] = Query(description="Maximum number of URLs to return", default=-1), - ) -> DebugPrioritization: + limit: Optional[int] = Query( + description="Maximum number of URLs to return", default=-1 + ), +) -> DebugPrioritization: - test_items, entries, prio_rules = generate_test_list(clickhouse, - probe_cc, category_codes, probe_asn, limit, True + test_items, entries, prio_rules = generate_test_list( + clickhouse, probe_cc, category_codes, probe_asn, limit, True ) return {"test_items": test_items, "entries": entries, "prio_rules": prio_rules} - diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py index 1fb55631e..f2fdd958d 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/reports.py @@ -1,76 +1,27 @@ +from typing import List, Dict, Any import asyncio -import io import logging -import random -from datetime import datetime, timezone from hashlib import sha512 -from typing import List, Dict, Any +from datetime import datetime, timezone +import io +import random +from fastapi import Request, Response, APIRouter, Header import httpx -from fastapi import Request, Response, APIRouter, HTTPException, Header from pydantic import Field -from prometheus_client import Counter import zstd -from ..common.metrics import timer +from ..utils import generate_report_id, error, compare_probe_msmt_cc_asn +from ..metrics import Metrics +from ..dependencies import SettingsDep, ASNReaderDep, CCReaderDep, S3ClientDep from ..common.routers import BaseModel from ..common.utils import setnocacheresponse -from ..dependencies import SettingsDep, ASNReaderDep, CCReaderDep, S3ClientDep -from ..utils import ( - generate_report_id, - extract_probe_ipaddr, - lookup_probe_cc, - lookup_probe_network, -) - +from ..common.metrics import timer router = APIRouter() log = logging.getLogger(__name__) -class Metrics: - MSMNT_DISCARD_ASN0 = Counter( - "receive_measurement_discard_asn_0", - "How many measurements were discarded due to probe_asn == ASN0", - ) - - MSMNT_DISCARD_CC_ZZ = Counter( - "receive_measurement_discard_cc_zz", - "How many measurements were discarded due to probe_cc == ZZ", - ) - - MSMNT_RECEIVED_CNT = Counter( - "receive_measurement_count", - "Count of incomming measurements", - ) - - PROBE_CC_ASN_MATCH = Counter( - "probe_cc_asn_match", - "How many matches between reported and observed probe_cc and asn", - ) - - PROBE_CC_ASN_NO_MATCH = Counter( - "probe_cc_asn_nomatch", - "How many mismatches between reported and observed probe_cc and asn", - labelnames=["mismatch", "reported", "detected"], - ) - - MISSED_MSMNTS = Counter( - "missed_msmnts", "Measurements that failed to be sent to the fast path." - ) - - SEND_FASTPATH_FAILURE = Counter( - "measurement_fastpath_send_failure_count", - "How many times ooniprobe failed to send a measurement to fastpath", - ) - - SEND_S3_FAILURE = Counter( - "measurement_s3_upload_failure_count", - "How many times ooniprobe failed to send a measurement to s3. " - "Measurements are sent to s3 when they can't be sent to the fastpath", - ) - - class OpenReportRequest(BaseModel): """ Open report @@ -156,8 +107,10 @@ async def receive_measurement( empty_measurement = {} try: rid_timestamp, test_name, cc, asn, format_cid, rand = report_id.split("_") - except Exception: - log.info("Unexpected report_id %r", report_id[:200]) + except Exception as e: + log.info( + f"Unexpected report_id {report_id[:200]}. Error: {e}", + ) raise error("Incorrect format") # TODO validate the timestamp? @@ -168,8 +121,8 @@ async def receive_measurement( try: asn_i = int(asn) - except ValueError: - log.info("ASN value not parsable %r", asn) + except ValueError as e: + log.info(f"ASN value not parsable {asn}. Error: {e}") error("Incorrect format") if asn_i == 0: @@ -185,11 +138,11 @@ async def receive_measurement( data = await request.body() if content_encoding == "zstd": try: + compressed_len = len(data) data = zstd.decompress(data) - ratio = len(data) / len(data) - log.debug(f"Zstd compression ratio {ratio}") - except Exception: - log.info("Failed zstd decompression") + log.debug(f"Zstd compression ratio {compressed_len / len(data)}") + except Exception as e: + log.info(f"Failed zstd decompression. Error: {e}") error("Incorrect format") # Write the whole body of the measurement in a directory based on a 1-hour @@ -245,40 +198,3 @@ def close_report(report_id): Close a report """ return {} - - -def error(msg: str, status_code: int = 400): - raise HTTPException(status_code=status_code, detail=msg) - - -def compare_probe_msmt_cc_asn( - cc: str, - asn: str, - request: Request, - cc_reader: CCReaderDep, - asn_reader: ASNReaderDep, -): - """Compares CC/ASN from measurement with CC/ASN from HTTPS connection ipaddr - Generates a metric. - """ - try: - cc = cc.upper() - ipaddr = extract_probe_ipaddr(request) - db_probe_cc = lookup_probe_cc(ipaddr, cc_reader) - db_asn, _ = lookup_probe_network(ipaddr, asn_reader) - if db_asn.startswith("AS"): - db_asn = db_asn[2:] - if db_probe_cc == cc and db_asn == asn: - Metrics.PROBE_CC_ASN_MATCH.inc() - elif db_probe_cc != cc: - log.error(f"db_cc != cc: {db_probe_cc} != {cc}") - Metrics.PROBE_CC_ASN_NO_MATCH.labels( - mismatch="cc", reported=cc, detected=db_probe_cc - ).inc() - elif db_asn != asn: - log.error(f"db_asn != asn: {db_asn} != {asn}") - Metrics.PROBE_CC_ASN_NO_MATCH.labels( - mismatch="asn", reported=asn, detected=db_asn - ).inc() - except Exception: - pass diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py index 48ce62dee..73ef4e251 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/routers/v1/probe_services.py @@ -1,14 +1,19 @@ import logging from datetime import datetime, timezone, timedelta import time -from typing import Annotated, List, Optional, Any, Dict, Tuple +from typing import List, Any, Dict, Tuple, Optional, Annotated import random +import ujson +from hashlib import sha512 +import asyncio +import io import geoip2 import geoip2.errors -from fastapi import APIRouter, HTTPException, Query, Response, Request -from prometheus_client import Counter, Info, Gauge +from fastapi import APIRouter, HTTPException, Response, Request, status, Header, Query from pydantic import Field, IPvAnyAddress +from ooniauth_py import ProtocolError, CredentialError, DeserializationFailed, ServerState +import httpx from ...common.auth import create_jwt, decode_jwt, jwt from ...common.routers import BaseModel @@ -25,7 +30,17 @@ extract_probe_ipaddr, lookup_probe_cc, lookup_probe_network, + error, + compare_probe_msmt_cc_asn, ) + +from ...dependencies import ( + ManifestDep, + ManifestResponse, + PostgresSessionDep, + S3ClientDep, +) +from ..reports import Metrics from ...common.utils import setcacheresponse from ...prio import FailoverTestListDep, failover_generate_test_list, generate_test_list @@ -34,45 +49,6 @@ log = logging.getLogger(__name__) -class Metrics: - PROBE_LOGIN = Counter( - "probe_login_requests", - "Requests made to the probe login endpoint", - labelnames=["state", "detail", "login"], - ) - - PROBE_UPDATE_INFO = Info( - "probe_update_info", - "Information reported in the probe update endpoint", - ) - - CHECK_IN_TEST_LIST_COUNT = Gauge( - "check_in_test_list_count", "Amount of test lists present in each experiment" - ) - - GEOIP_ADDR_FOUND = Counter( - "geoip_ipaddr_found", - "If the ip address was found by geoip", - labelnames=["probe_cc", "asn"], - ) - - GEOIP_ADDR_NOT_FOUND = Counter( - "geoip_ipaddr_not_found", "We couldn't look up the IP address in the database" - ) - - GEOIP_CC_DIFFERS = Counter( - "geoip_cc_differs", "There's a mismatch between reported CC and observed CC" - ) - - GEOIP_ASN_DIFFERS = Counter( - "geoip_asn_differs", "There's a mismatch between reported ASN and observed ASN" - ) - - TEST_LIST_URLS_COUNT = Gauge( - "test_list_urls_count", "How many urls were generated for a test list" - ) - - class ProbeLogin(BaseModel): # Allow None username and password # to deliver informational 401 error when they're missing @@ -697,12 +673,16 @@ class GeoLookupResult(BaseModel): class GeoLookupRequest(BaseModel): - addresses: List[IPvAnyAddress] = Field(description="list of IPv4 or IPv6 address to geolookup") + addresses: List[IPvAnyAddress] = Field( + description="list of IPv4 or IPv6 address to geolookup" + ) class GeoLookupResponse(BaseModel): v: int = Field(description="response format version", default=1) - geolocation: Dict[IPvAnyAddress, GeoLookupResult] = Field(description="Dict of IP addresses to GeoLookupResult") + geolocation: Dict[IPvAnyAddress, GeoLookupResult] = Field( + description="Dict of IP addresses to GeoLookupResult" + ) @router.post("/geolookup", tags=["ooniprobe"]) @@ -716,20 +696,20 @@ async def geolookup( # initial values probe_geoip compares with probe_cc = "ZZ" asn = "AS0" - geolookup_resp = {"geolocation": {}} + geolocation = dict() # for each address provided, call probe_geoip and add the data to our response for ipaddr in data.addresses: # call probe_geoip() and map the keys to the geolookup v1 API resp, _, _ = probe_geoip(ipaddr, probe_cc, asn, cc_reader, asn_reader) # it doesn't seem possible to have separate aliases for (de)serialization - if resp["probe_network_name"] == None: + if resp["probe_network_name"] is None: resp["probe_network_name"] = "" - geolookup_resp["geolocation"][ipaddr] = GeoLookupResult(cc=resp["probe_cc"], + geolocation[ipaddr] = GeoLookupResult(cc=resp["probe_cc"], asn=resp["probe_asn"], as_name=resp["probe_network_name"]) setnocacheresponse(response) - return geolookup_resp + return GeoLookupResponse(geolocation = geolocation) class CollectorEntry(BaseModel): @@ -738,10 +718,11 @@ class CollectorEntry(BaseModel): front: Optional[str] = Field(default=None, description="Fronted domain") type: Optional[str] = Field(default=None, description="Type of collector") + @router.get("/collectors", tags=["ooniprobe"]) def list_collectors( settings: SettingsDep, - ) -> List[CollectorEntry]: +) -> List[CollectorEntry]: config_collectors = settings.collectors collectors_response = [] for entry in config_collectors: @@ -750,6 +731,304 @@ def list_collectors( return collectors_response +# -- ------------------------------------ + +@router.get("/manifest", tags=["anonymous_credentials"]) +def manifest(manifest: ManifestDep, response: Response) -> ManifestResponse: + # Cache for 1 minute + setcacheresponse('1m', response) + return manifest + + +class RegisterRequest(BaseModel): + manifest_version: str + credential_sign_request: str + + +class RegisterResponse(BaseModel): + credential_sign_response: str + emission_day: int + + +# TODO: choose a better name for this endpoint +@router.post("/sign_credential", tags=["anonymous_credentials"]) +def sign_credential(register_request: RegisterRequest, manifest: ManifestDep, settings: SettingsDep): + + if register_request.manifest_version != manifest.meta.version: + _raise_manifest_not_found(register_request.manifest_version) + + protocol_state = ServerState.from_creds( + manifest.manifest.public_parameters, + settings.anonc_secret_key + ) + + try: + resp = protocol_state.handle_registration_request( + register_request.credential_sign_request + ) + except (ProtocolError, CredentialError, DeserializationFailed) as e: + raise to_http_exception(e) + + return RegisterResponse( + credential_sign_response=resp, emission_day=protocol_state.today() + ) + + +def to_http_exception(error: ProtocolError | CredentialError | DeserializationFailed): + + type_to_str = { + ProtocolError: "protocol_error", + DeserializationFailed: "deserialization_failed", + CredentialError: "credential_error", + } + type_str = type_to_str[type(error)] + + assert isinstance(error, (ProtocolError, CredentialError, DeserializationFailed)) + status_code = ( + status.HTTP_400_BAD_REQUEST + if isinstance(error, DeserializationFailed) + else status.HTTP_403_FORBIDDEN + ) + + return HTTPException( + status_code=status_code, detail={"error": type_str, "message": str(error)} + ) + + +class SubmitMeasurementRequest(BaseModel): + format: str + content: Dict[str, Any] + # -- < Anonymous Credentials > ---------------------- + # not post quantum, in the future we might want to use a hashed key for storage + nym: str + zkp_request: str + probe_age_range: Tuple[int, int] = Field( + description="A range representing an interval containing the probe actual age. " + "This is used for the anonymous credentials protocol to identify the probe without using " + "personally identifiable information.\n" + "The server will use the age range to validate in zero proof that the request came from a " + "trusted probe. " + "Example: if probe age is 30 days, a valid answer is (25, 35)" + "See: https://github.com/ooni/userauth/blob/db333a4cbee30bf289aacba857fbcb28cc9d7505/ooniauth-core/src/submit.rs#L142" + ) + probe_msm_range: Tuple[int, int] = Field( + description="A range representing an interval containing the how many measurements the probe has sent. " + "This is used for the anonymous credentials protocol to identify the probe without using " + "personally identifiable information.\n" + "The server will use the measurement count range to validate in zero proof that " + "the request came from a trusted probe. " + "Example: if the probe has sent 100 measurements, a valid answer is (90, 110)" + "See: https://github.com/ooni/userauth/blob/db333a4cbee30bf289aacba857fbcb28cc9d7505/ooniauth-core/src/submit.rs#L142" + ) + manifest_version: str + + +class SubmitMeasurementResponse(BaseModel): + """ + Acknowledge + """ + + measurement_uid: str | None = Field( + examples=["20210208220710.181572_MA_ndt_7888edc7748936bf"], default=None + ) + is_verified: bool = Field(description="if the ZKP was able to verify this request") + submit_response: str | None = Field( + description="Anonymous credential verification response. Null if verification failed" + ) + + +@router.post("/submit_measurement/{report_id}", tags=["anonymous_credentials"]) +async def submit_measurement( + report_id: str, + request: Request, + submit_request: SubmitMeasurementRequest, + response: Response, + cc_reader: CCReaderDep, + asn_reader: ASNReaderDep, + settings: SettingsDep, + s3_client: S3ClientDep, + manifest: ManifestDep, + content_encoding: str = Header(default=None), +) -> SubmitMeasurementResponse | Dict[str, Any]: + """ + Submit measurement, using the anonymous credentials protocol to establish a confidence + layer over the incoming measurements. + + The anonmymous credentials protocol allows us to measure the trustworthiness of a probe without + revealing personally identifiable information. + + An error will be returned if using a deprecated manifest version + """ + setnocacheresponse(response) + empty_measurement = {} + try: + rid_timestamp, test_name, cc, asn, format_cid, rand = report_id.split("_") + except Exception: + log.info("Unexpected report_id %r", report_id[:200]) + raise error("Incorrect format") + + # TODO validate the timestamp? + good = len(cc) == 2 and test_name.isalnum() and 1 < len(test_name) < 30 + if not good: + log.info("Unexpected report_id %r", report_id[:200]) + error("Incorrect format") + + try: + asn_i = int(asn) + except ValueError: + log.info("ASN value not parsable %r", asn) + error("Incorrect format") + + if asn_i == 0: + log.info("Discarding ASN == 0") + Metrics.MSMNT_DISCARD_ASN0.inc() + return empty_measurement + + if cc.upper() == "ZZ": + log.info("Discarding CC == ZZ") + Metrics.MSMNT_DISCARD_CC_ZZ.inc() + return empty_measurement + + # Run verification + assert "probe_cc" in submit_request.content and isinstance( + submit_request.content["probe_cc"], str + ) + assert "probe_asn" in submit_request.content and isinstance( + submit_request.content["probe_asn"], str + ) + protocol_state = ServerState.from_creds( + manifest.manifest.public_parameters, + settings.anonc_secret_key + ) + + if submit_request.manifest_version == manifest.meta.version: + try: + submit_response = protocol_state.handle_submit_request( + submit_request.nym, + submit_request.zkp_request, + submit_request.content["probe_cc"], + submit_request.content["probe_asn"], + list(submit_request.probe_age_range), + list(submit_request.probe_msm_range), + ) + is_verified = True + except (DeserializationFailed, ProtocolError, CredentialError) as e: + # proof failed + # TODO Q: should we add a "why not verified" field to the measurement? + log.error(f"ZKP Failed: {e}") + is_verified = False + submit_response = None + else: + log.error(f"Unable to run ZKP verification: invalid manifest version '{submit_request.manifest_version}'") + _raise_manifest_not_found(submit_request.manifest_version) + + data = submit_request.model_dump() + + # Add verification-related data. + data["is_verified"] = is_verified + data_buff = io.BytesIO() + stream = io.TextIOWrapper(data_buff, "utf-8") + ujson.dump(data, stream) + data_bin = data_buff.getvalue() + + # Write the whole body of the measurement in a directory based on a 1-hour + # time window + now = datetime.now(timezone.utc) + h = sha512(data_bin).hexdigest()[:16] + ts = now.strftime("%Y%m%d%H%M%S.%f") + + # msmt_uid is a unique id based on upload time, cc, testname and hash + msmt_uid = f"{ts}_{cc}_{test_name}_{h}" + Metrics.MSMNT_RECEIVED_CNT.inc() + + compare_probe_msmt_cc_asn(cc, asn, request, cc_reader, asn_reader) + # Use exponential back off with jitter between retries to avoid choking the fastpath server + # with many retries at the same time when there's a temporary issue + N_RETRIES = 3 + for t in range(N_RETRIES): + try: + url = f"{settings.fastpath_url}/{msmt_uid}" + + async with httpx.AsyncClient() as client: + resp = await client.post(url, content=data_bin, timeout=59) + + assert resp.status_code == 200, resp.content + return SubmitMeasurementResponse( + measurement_uid=msmt_uid, + is_verified=is_verified, + submit_response=submit_response, + ) + + except Exception as exc: + log.error( + f"[Try {t+1}/{N_RETRIES}] Error trying to send measurement to the fastpath ({settings.fastpath_url}). Error: {exc}" + ) + sleep_time = random.uniform(0, min(3, 0.3 * 2**t)) + await asyncio.sleep(sleep_time) + + Metrics.SEND_FASTPATH_FAILURE.inc() + + # wasn't possible to send msmnt to fastpath, try to send it to s3 + try: + s3_client.upload_fileobj( + data_buff, Bucket=settings.failed_reports_bucket, Key=report_id + ) + except Exception as exc: + log.error(f"Unable to upload measurement to s3. Error: {exc}") + Metrics.SEND_S3_FAILURE.inc() + + log.error(f"Unable to send report to fastpath. report_id: {report_id}") + Metrics.MISSED_MSMNTS.inc() + return empty_measurement + + +class CredentialUpdateRequest(BaseModel): + old_manifest_version: str = Field( + description="The original manifest version you are trying to update from" + ) + manifest_version: str = Field( + description="The up-to-date version of the manifest you used to generate the update request" + ) + update_request: str = Field( + description="The ZKP request generated by the anonymous credentials library. " + "Note that you need to generate this with the new server credentials, " + "so you might want to update your manifest version to the most recent one" + ) + + +class CredentialUpdateResponse(BaseModel): + update_response: str = Field( + description="The ZKP response generated by the anonymous credentials library" + ) + + +# TODO implement credential update +# @router.post( +# "/update_credential", +# response_model=CredentialUpdateResponse, +# tags=["anonymous_credentials"], +# ) +async def credential_update( + update_request: CredentialUpdateRequest, session: PostgresSessionDep +) -> CredentialUpdateResponse: + """ + Update your credentials from an older version to a new version. + This might be necessary when the manifest is updated and our keys are rotated. + Before creating your update credentials request you need to update your manifest version + by calling `/api/v1/manifest` + """ + + # TODO we need to find a way to keep track of older private keys to support this endpoint + raise NotImplementedError("Credential Update is not yet implemented") + +def _raise_manifest_not_found(version: str): + raise HTTPException( + detail={ + "error": "manifest_not_found", + "message": f"No manifest with version '{version}' was found", + }, + status_code=status.HTTP_404_NOT_FOUND, + ) class TorTarget(BaseModel): address: str fingerprint: str diff --git a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py index 33bc11352..c30097b86 100644 --- a/ooniapi/services/ooniprobe/src/ooniprobe/utils.py +++ b/ooniapi/services/ooniprobe/src/ooniprobe/utils.py @@ -6,10 +6,12 @@ import itertools import logging +from typing import Dict, List, TypedDict, Tuple, Any + +from fastapi import Request, HTTPException from typing import List, TypedDict, Tuple import io -from fastapi import Request from mypy_boto3_s3 import S3Client from sqlalchemy.orm import Session import pem @@ -19,6 +21,7 @@ import httpx +from .metrics import Metrics from .common.config import Settings from .dependencies import CCReaderDep, ASNReaderDep from ooniprobe.models import OONIProbeVPNProvider, OONIProbeVPNProviderEndpoint @@ -149,6 +152,38 @@ def lookup_probe_network(ipaddr: str, asn_reader: ASNReaderDep) -> Tuple[str, st resp.autonomous_system_organization or "0", ) + +def error(msg: str | Dict[str, Any], status_code: int = 400): + raise HTTPException(status_code=status_code, detail=msg) + + +def compare_probe_msmt_cc_asn( + cc: str, + asn: str, + request: Request, + cc_reader: CCReaderDep, + asn_reader: ASNReaderDep, +): + """Compares CC/ASN from measurement with CC/ASN from HTTPS connection ipaddr + Generates a metric. + """ + try: + cc = cc.upper() + ipaddr = extract_probe_ipaddr(request) + db_probe_cc = lookup_probe_cc(ipaddr, cc_reader) + db_asn, _ = lookup_probe_network(ipaddr, asn_reader) + if db_asn.startswith("AS"): + db_asn = db_asn[2:] + if db_probe_cc == cc and db_asn == asn: + Metrics.PROBE_CC_ASN_MATCH.inc() + elif db_probe_cc != cc: + Metrics.PROBE_CC_ASN_NO_MATCH.labels(mismatch="cc").inc() + elif db_asn != asn: + Metrics.PROBE_CC_ASN_NO_MATCH.labels(mismatch="asn").inc() + except Exception: + pass + + def get_first_ip(headers: str) -> str: """ parse the first ip from a comma-separated list of ips encoded as a string diff --git a/ooniapi/services/ooniprobe/tests/conftest.py b/ooniapi/services/ooniprobe/tests/conftest.py index 476c9ba15..b60b13e69 100644 --- a/ooniapi/services/ooniprobe/tests/conftest.py +++ b/ooniapi/services/ooniprobe/tests/conftest.py @@ -5,6 +5,7 @@ import os import json import time +from datetime import datetime from typing import Dict, Any from urllib.request import urlopen @@ -19,6 +20,11 @@ from ooniprobe.common.config import Settings from ooniprobe.common.clickhouse_utils import insert_click from ooniprobe.common.dependencies import get_settings +from ooniprobe.dependencies import get_s3_client, _get_manifest, ManifestResponse, ManifestMeta, Manifest +from ooniprobe.main import app +from ooniprobe.download_geoip import try_update +from ooniprobe.common.clickhouse_utils import insert_click +from .utils import setup_user from ooniprobe.dependencies import get_s3_client, get_tor_targets_from_s3 from ooniprobe.main import app from ooniprobe.download_geoip import try_update @@ -104,20 +110,51 @@ def geoip_db_dir(fixture_path: Path): ooni_tempdir = fixture_path / "geoip" return str(ooni_tempdir) +def make_manifest_mock_fn(public_params : str): + def get_manifest_mock(): + + return ManifestResponse( + manifest=Manifest( + submission_policy={"*/*" : "*"}, + public_parameters=public_params + ), + meta = ManifestMeta( + version="1", + last_modification_date=datetime.now(), + manifest_url="https://ooni.mock/manifest" + ) + ) + + return get_manifest_mock @pytest.fixture -def client(clickhouse_server: str, test_settings: Any, geoip_db_dir: str): +def client(clickhouse_server, test_settings, geoip_db_dir, test_creds): + _, public_key = test_creds app.dependency_overrides[get_settings] = test_settings app.dependency_overrides[get_s3_client] = get_s3_client_mock app.dependency_overrides[get_tor_targets_from_s3] = get_tor_targets_from_s3_mock + app.dependency_overrides[_get_manifest] = make_manifest_mock_fn(public_key) # lifespan won't run so do this here to have the DB try_update(geoip_db_dir) client = TestClient(app) yield client +@pytest.fixture +def test_creds(): + """ + Example credentials used for anonymous credentials + """ + + # (Secret key, public key) + return ( + "ASAAAAAAAAAAXgJT5699LDE/QjmzDjsHcVP+EOxPO/aS4grULhSZqAsgAAAAAAAAAEf1WUPkxSb1cCAUAPvwqqtsOSiLd0m/BpY5HAZLvGQFAwAAAAAAAAAgAAAAAAAAABjrB0p6whCfu/5mDCtrZ/DSaPy+dC3LFL08taNMZ10KIAAAAAAAAAAC8BjxPSqTTnYT1IrWSFkHWvE3e/dstCrLo6GvN6+FAyAAAAAAAAAAyxD+iRjtKEHwRj1AwpDt0Sj4WI8pSDfoxB29G/8eYQ0=", + "ASAAAAAAAAAA0Dfe5U+8tRO3siBVVp+zEoC309fhfhtsVJIv2zpeD1cBIAAAAAAAAAAw/LnzUbQepSaQzI29yCH31/Q2Awq9NuTfgW4BQzorGwMAAAAAAAAAIAAAAAAAAABgspiZ6jNoM11fBO/JJ82Ry+QJ6S2mpOpCOmu2KsxGfiAAAAAAAAAACltCp9TukC2mNw0YYAAjqhXH2fsOYoz5FwcjE1bZoD0gAAAAAAAAAN4hyN9hpFgmOU37ynNgoIBLnSg+dObJ/yWRwt5/uYhh" + ) + @pytest.fixture -def test_settings(alembic_migration: Any, geoip_db_dir: str, clickhouse_server: str, fastpath_server: str): +def test_settings(alembic_migration: Any, geoip_db_dir: str, clickhouse_server: str, fastpath_server: str, test_creds): + (secret_key, _) = test_creds yield make_override_get_settings( postgresql_url=alembic_migration, jwt_encryption_key=JWT_ENCRYPTION_KEY, @@ -126,6 +163,9 @@ def test_settings(alembic_migration: Any, geoip_db_dir: str, clickhouse_server: geoip_db_dir=geoip_db_dir, collector_id="1", fastpath_url=fastpath_server, + anonc_manifest_bucket="test-bucket", + anonc_manifest_file = "manifest.json", + anonc_secret_key = secret_key, tor_targets="./tests/fixtures/data/tor-targets.json" ) @@ -211,3 +251,7 @@ def load_url_priorities(clickhouse_db: ClickhouseClient): query = "INSERT INTO url_priorities (sign, category_code, cc, domain, url, priority) VALUES" insert_click(clickhouse_db, query, j) + +@pytest.fixture(scope="function") +def client_with_original_manifest(client): + return setup_user(client) diff --git a/ooniapi/services/ooniprobe/tests/fakepath/main.py b/ooniapi/services/ooniprobe/tests/fakepath/main.py index 3dd68c9d5..8d7505c5b 100644 --- a/ooniapi/services/ooniprobe/tests/fakepath/main.py +++ b/ooniapi/services/ooniprobe/tests/fakepath/main.py @@ -12,3 +12,7 @@ @app.get("/") def health(): return + +@app.post("/{msmt_uid}") +def receive_msm(): + return {} diff --git a/ooniapi/services/ooniprobe/tests/fixtures/initdb/01-scheme.sql b/ooniapi/services/ooniprobe/tests/fixtures/initdb/01-scheme.sql index ed7b26694..2d1d83718 100644 --- a/ooniapi/services/ooniprobe/tests/fixtures/initdb/01-scheme.sql +++ b/ooniapi/services/ooniprobe/tests/fixtures/initdb/01-scheme.sql @@ -250,3 +250,9 @@ CREATE TABLE IF NOT EXISTS default.oonirun ENGINE = ReplacingMergeTree(translation_creation_time) ORDER BY (ooni_run_link_id, descriptor_creation_time) SETTINGS index_granularity = 1; + +ALTER TABLE default.fastpath ADD COLUMN IF NOT EXISTS `is_verified` Int8; +ALTER TABLE default.fastpath ADD COLUMN IF NOT EXISTS `nym` Nullable(String); +ALTER TABLE default.fastpath ADD COLUMN IF NOT EXISTS `zkp_request` Nullable(String); +ALTER TABLE default.fastpath ADD COLUMN IF NOT EXISTS `age_range` Nullable(String); +ALTER TABLE default.fastpath ADD COLUMN IF NOT EXISTS `msm_range` Nullable(String); diff --git a/ooniapi/services/ooniprobe/tests/integ/test_checkin.py b/ooniapi/services/ooniprobe/tests/integ/test_checkin.py index 7e8401fd2..4172840c5 100644 --- a/ooniapi/services/ooniprobe/tests/integ/test_checkin.py +++ b/ooniapi/services/ooniprobe/tests/integ/test_checkin.py @@ -1,39 +1,17 @@ -def getjson(client, url): - response = client.get(url) - assert response.status_code == 200 - return response.json() - - -def getjsonh(client, url, headers=None): - response = client.get(url, headers=headers) - assert response.status_code == 200 - return response.json() - - -def post(client, url, data): - response = client.post(url, data=data) - assert response.status_code == 200 - return response.json() - - -def postj(client, url, **kw): - response = client.post(url, json=kw) - assert response.status_code == 200 - return response.json() - +from ..utils import getj, postj ## Tests - - def test_check_in_geoip(client): j = dict( on_wifi=True, charging=False, ) - headers = [ - ("X-Forwarded-For", "192.33.4.12") # The IP address of c.root-servers.net - ] - c = client.post("/api/v1/check-in", json=j, headers=headers).json() + headers = { + "X-Forwarded-For":"192.33.4.12" # The IP address of c.root-servers.net + } + + c = postj(client, "/api/v1/check-in", json=j, headers=headers) + assert c["probe_cc"] == "US" assert c["probe_asn"] == "AS2149" assert c["probe_network_name"] is not None @@ -46,7 +24,7 @@ def test_check_in_basic(client, load_url_priorities): on_wifi=True, charging=False, ) - c = postj(client, "/api/v1/check-in", **j) + c = postj(client, "/api/v1/check-in", j) assert c["v"] == 1 urls = c["tests"]["web_connectivity"]["urls"] @@ -67,7 +45,7 @@ def test_check_in_url_category_news(client): charging=True, web_connectivity=dict(category_codes=["NEWS"]), ) - c = postj(client, "/api/v1/check-in", **j) + c = postj(client, "/api/v1/check-in", j) assert c["v"] == 1 urls = c["tests"]["web_connectivity"]["urls"] assert len(urls), urls @@ -82,5 +60,5 @@ def test_check_in_url_category_news(client): def test_test_helpers(client): - c = getjson(client, "/api/v1/test-helpers") + c = getj(client, "/api/v1/test-helpers") assert len(c) == 6 diff --git a/ooniapi/services/ooniprobe/tests/integ/test_geolookup.py b/ooniapi/services/ooniprobe/tests/integ/test_geolookup.py index 697730986..34d97efa6 100644 --- a/ooniapi/services/ooniprobe/tests/integ/test_geolookup.py +++ b/ooniapi/services/ooniprobe/tests/integ/test_geolookup.py @@ -1,6 +1,5 @@ -from typing import Dict, Tuple +from typing import Tuple import ooniprobe.routers.v1.probe_services as ps -from ooniprobe.utils import lookup_probe_cc, lookup_probe_network from ooniprobe.dependencies import CCReaderDep, ASNReaderDep def fake_lookup_probe_network(ipaddr: str, asn_reader: ASNReaderDep) -> Tuple[str, str]: diff --git a/ooniapi/services/ooniprobe/tests/integ/test_reports.py b/ooniapi/services/ooniprobe/tests/integ/test_reports.py index 7ee2d1420..5a34a8984 100644 --- a/ooniapi/services/ooniprobe/tests/integ/test_reports.py +++ b/ooniapi/services/ooniprobe/tests/integ/test_reports.py @@ -64,11 +64,11 @@ def test_collector_upload_msmt_valid(client): assert len(rid) == 61, rid msmt = dict(test_keys={}) - c = postj(client, f"/report/{rid}", {"format": "json", "content": msmt}) - assert c == {} + c = postj(client, f"/report/{rid}", {"format":"json", "content":msmt}) # unsure about this merge + assert c['measurement_uid'].endswith("_IE_webconnectivity_e7889aeba0b36729"), c c = postj(client, f"/report/{rid}/close", json={}) - assert c == {} + assert c == {}, c def test_collector_upload_msmt_valid_zstd(client): @@ -77,4 +77,5 @@ def test_collector_upload_msmt_valid_zstd(client): zmsmt = zstd.compress(msmt) headers = [("Content-Encoding", "zstd")] c = post(client, f"/report/{rid}", zmsmt, headers=headers) - assert c == {} + assert len(c) == 1 + assert c['measurement_uid'].endswith("_IT_integtest_50be3cd5406bca65"), c diff --git a/ooniapi/services/ooniprobe/tests/test_anoncred.py b/ooniapi/services/ooniprobe/tests/test_anoncred.py new file mode 100644 index 000000000..e08e7f442 --- /dev/null +++ b/ooniapi/services/ooniprobe/tests/test_anoncred.py @@ -0,0 +1,174 @@ +from typing import Any, Dict +import pytest +from fastapi import status +from ooniauth_py import UserState, ServerState +from .utils import getj, postj, setup_user + +def test_manifest_basic(client, db): + # Should not crash + getj(client, "/api/v1/manifest") + + +def test_registration_basic(client): + + manifest = getj(client, "/api/v1/manifest") + + user_state = UserState(manifest['manifest']['public_parameters']) + sign_req = user_state.make_registration_request() + resp = postj( + client, + "/api/v1/sign_credential", + { + "credential_sign_request" : sign_req, + "manifest_version" : manifest['meta']['version'] + } + ) + # should be able to verify this credential + user_state.handle_registration_response(resp['credential_sign_response']) # should not crash + +def test_registration_errors(client): + + bad_version = "999" + resp = client.post("/api/v1/sign_credential", + json={ + "credential_sign_request" : "doesntmatter", + "manifest_version" : bad_version + } + ) + # Bad manifest date should raise 404 + assert resp.status_code == 404, resp.content + j = resp.json() + assert 'error' in j['detail'] and 'message' in j['detail'], j + assert j['detail']['error'] == "manifest_not_found" + + # Not using the right public params should not verify + manifest = getj(client, "/api/v1/manifest") + bad_server = ServerState() + user = UserState(bad_server.get_public_parameters()) + resp = client.post("/api/v1/sign_credential", json={ + "credential_sign_request" : user.make_registration_request(), + "manifest_version" : manifest['meta']['version'] + }) + + assert resp.status_code == status.HTTP_403_FORBIDDEN, resp.content + j = resp.json() + assert j['detail']['error'] == 'protocol_error' + + # Changing random characters should mess with the serialization + user = UserState(manifest['manifest']['public_parameters']) + sign_req = user.make_registration_request() + bad = "bad" + assert len(sign_req) >= len(bad), sign_req + sign_req = bad + sign_req[len(bad):] + resp = client.post("/api/v1/sign_credential", json={ + "credential_sign_request" : sign_req, + "manifest_version" : manifest['meta']['version'] + }) + + assert resp.status_code == status.HTTP_400_BAD_REQUEST, resp.content + j = resp.json() + assert j['detail']['error'] == 'deserialization_failed', j + +def test_submission_basic(client): + # open report + j = make_report_request() + resp = postj(client, "/report", json=j) + rid = resp.pop("report_id") + + # Create user + user, manifest_version, emission_day = setup_user(client) + + submit_request = user.make_submit_request("IE", "AS34245", emission_day) + + msm = make_measurement(submit_request.nym, submit_request.request, emission_day, manifest_version) + + c = postj(client, f"/api/v1/submit_measurement/{rid}", msm) + assert c['is_verified'] is True + + assert c['submit_response'], "Submit response should not be null if the proof was verified" + user.handle_submit_response(c['submit_response']) + +# TODO implement credential update +@pytest.mark.skip +def test_credential_update(client, client_with_original_manifest, second_manifest): + + (user, manifest, _) = client_with_original_manifest + new_manifest = getj(client, "/api/v1/manifest") + user.set_public_params(new_manifest["manifest"]["public_parameters"]) + result = postj(client, "/api/v1/update_credential", json=dict( + old_manifest_version = manifest, + manifest_version = new_manifest['meta']['version'], + update_request = user.make_credential_update_request() + )) + assert 'update_response' in result + user.handle_credential_update_response(result['update_response']) # should not crash + +# TODO implement credential update +@pytest.mark.skip +def test_credential_update_with_submission(client, client_with_original_manifest, second_manifest): + (user, manifest_version, emission_day) = client_with_original_manifest + + # first submit: should just work out of the box + j = make_report_request() + resp = postj(client, "/report", json=j) + rid = resp.pop("report_id") + + submit_request = user.make_submit_request("IE", "AS34245", emission_day) + + msm = make_measurement(submit_request.nym, submit_request.request, emission_day, manifest_version) + + c = postj(client, f"/api/v1/submit_measurement/{rid}", msm) + + assert c['is_verified'] is True + + # second submit: should work after updating creds + new_manifest = getj(client, "/api/v1/manifest") + user.set_public_params(new_manifest['public_parameters']) + result = postj(client, "/api/v1/update_credential", json=dict( + old_manifest_version = manifest_version, + manifest_version=new_manifest['meta']['version'], + update_request = user.make_credential_update_request() + )) + + assert 'update_response' in result + user.handle_credential_update_response(result['update_response']) # should not crash + + j = make_report_request() + resp = postj(client, "/report", json=j) + rid = resp.pop("report_id") + + submit_request = user.make_submit_request("IE", "AS34245", emission_day) + + msm = make_measurement(submit_request.nym, submit_request.request, emission_day, manifest_version) + + c = postj(client, f"/api/v1/submit_measurement/{rid}", msm) + + +def make_measurement(nym : str, zkp_request: str, emission_day: int, manifest_version: str, probe_cc: str = "IE", probe_asn: str = "AS34245") -> Dict[str, Any]: + return { + "format": "json", + "content": { + "test_name": "web_connectivity", + "probe_asn": probe_asn, + "probe_cc": probe_cc, + "test_start_time": "2020-09-09 14:11:11", + }, + "nym": nym, + "zkp_request": zkp_request, + "probe_age_range": [emission_day - 30, emission_day + 1], + "probe_msm_range": [0, 100], + "manifest_version": manifest_version + } + +def make_report_request(probe_cc: str = "IE", probe_asn: str = "AS34245") -> Dict[str, Any]: + return { + "data_format_version": "0.2.0", + "format": "json", + "probe_asn": probe_asn, + "probe_cc": probe_cc, + "software_name": "miniooni", + "software_version": "0.17.0-beta", + "test_name": "web_connectivity", + "test_start_time": "2020-09-09 14:11:11", + "test_version": "0.1.0", + } \ No newline at end of file diff --git a/ooniapi/services/ooniprobe/tests/test_main.py b/ooniapi/services/ooniprobe/tests/test_main.py index d5dc9994c..473b7e1d0 100644 --- a/ooniapi/services/ooniprobe/tests/test_main.py +++ b/ooniapi/services/ooniprobe/tests/test_main.py @@ -1,11 +1,27 @@ import pytest +from datetime import datetime import httpx from fastapi.testclient import TestClient from ooniprobe.main import lifespan, app - - -def test_health_good(client): +from ooniprobe.dependencies import Manifest, ManifestResponse, ManifestMeta +import ooniprobe.main as m + +def fake_get_manifest(s3, bucket, key): + return ManifestResponse( + manifest=Manifest( + submission_policy={"*/*" : "*"}, + public_parameters="public parameters" + ), + meta = ManifestMeta( + version="1", + last_modification_date=datetime.now(), + manifest_url="https://ooni.mock/manifest" + ) + ) + +def test_health_good(client, monkeypatch): + monkeypatch.setattr(m, "get_manifest", fake_get_manifest) r = client.get("health") j = r.json() assert j["status"] == "ok", j diff --git a/ooniapi/services/ooniprobe/tests/utils.py b/ooniapi/services/ooniprobe/tests/utils.py index 01c638fa2..0188d9dd4 100644 --- a/ooniapi/services/ooniprobe/tests/utils.py +++ b/ooniapi/services/ooniprobe/tests/utils.py @@ -1,6 +1,7 @@ from httpx import Client -from typing import Dict, Any +from typing import Dict, Any, Tuple from fastapi import status +from ooniauth_py import UserState def getj(client : Client, url: str, params: Dict[str, Any] = {}) -> Dict[str, Any]: resp = client.get(url) @@ -20,4 +21,16 @@ def postj( def post(client : Client, url, data=None, headers=None): response = client.post(url, data=data, headers=headers) assert response.status_code == 200 - return response.json() \ No newline at end of file + return response.json() + +def setup_user(client) -> Tuple[UserState, str, int]: # user, manifest version, emission day + manifest = getj(client, "/api/v1/manifest") + user = UserState(manifest['manifest']['public_parameters']) + req = user.make_registration_request() + resp = postj(client, "/api/v1/sign_credential", json = { + "credential_sign_request" : req, + "manifest_version" : manifest['meta']['version'] + }) + user.handle_registration_response(resp['credential_sign_response']) + + return (user, manifest['meta']['version'], resp['emission_day'])