diff --git a/.github/ISSUE_TEMPLATE/data-quality-issue.md b/.github/ISSUE_TEMPLATE/data-quality-issue.md deleted file mode 100644 index 989dc03e..00000000 --- a/.github/ISSUE_TEMPLATE/data-quality-issue.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -name: Data quality issue -about: Information about data quality issues -title: '' -labels: data quality, triage -assignees: hellais - ---- - -**Describe the bug** - -A clear and concise description of what the data quality issue is. - -**To Reproduce** - -Steps to reproduce the behavior. - -**Expected behavior** - -A clear and concise description of what you expected to happen. - -**Screenshots** - -If applicable, add screenshots to help explain your problem. - -**System information (if applicable):** - -- Device: [e.g., iPhone6] - - OS: [e.g., iOS8.1] - - OONI Probe version: [e.g., 3.11.0] - -**Additional context** - -Add any other context about the problem here. diff --git a/oonidata/src/oonidata/models/dataformats.py b/oonidata/src/oonidata/models/dataformats.py index a6020d92..e53e8d19 100644 --- a/oonidata/src/oonidata/models/dataformats.py +++ b/oonidata/src/oonidata/models/dataformats.py @@ -285,6 +285,7 @@ class DNSQuery(BaseModel): dial_id: Optional[int] = None + @add_slots @dataclass class TCPConnectStatus(BaseModel): @@ -368,3 +369,42 @@ class NetworkEvent(BaseModel): # Deprecated fields dial_id: Optional[int] = None conn_id: Optional[int] = None + + +@add_slots +@dataclass +class OpenVPNHandshake(BaseModel): + handshake_time: float + endpoint: str + ip: str # we might want to make this optional, and scrub in favor of ASN/prefix + port: int + transport: str + provider: str + t0: float + t: float + openvpn_options: Optional[Dict[str, str]] = None + tags: Optional[List[str]] = None + transaction_id: Optional[str] = None + failure: Failure = None + +@add_slots +@dataclass +class OpenVPNPacket(BaseModel): + operation: str + opcode: str + id: int + payload_size: int + acks: Optional[List[int]] = None + send_attempts: Optional[int] = None + + +@add_slots +@dataclass +class OpenVPNNetworkEvent(BaseModel): + operation: str + stage: str + t: float + tags: Optional[List[str]] = None + packet: Optional[OpenVPNPacket] = None + transaction_id: Optional[int] = None + diff --git a/oonidata/src/oonidata/models/nettests/__init__.py b/oonidata/src/oonidata/models/nettests/__init__.py index 247fd39d..3e098bad 100644 --- a/oonidata/src/oonidata/models/nettests/__init__.py +++ b/oonidata/src/oonidata/models/nettests/__init__.py @@ -14,6 +14,7 @@ from .http_invalid_request_line import HTTPInvalidRequestLine from .http_header_field_manipulation import HTTPHeaderFieldManipulation from .echcheck import ECHCheck +from .openvpn import OpenVPN SUPPORTED_CLASSES = [ HTTPHeaderFieldManipulation, @@ -28,6 +29,7 @@ Signal, FacebookMessenger, Whatsapp, + OpenVPN, BaseMeasurement, ECHCheck, ] @@ -44,6 +46,7 @@ Signal, FacebookMessenger, Whatsapp, + OpenVPN, BaseMeasurement, ] diff --git a/oonidata/src/oonidata/models/nettests/openvpn.py b/oonidata/src/oonidata/models/nettests/openvpn.py new file mode 100644 index 00000000..a67254c9 --- /dev/null +++ b/oonidata/src/oonidata/models/nettests/openvpn.py @@ -0,0 +1,36 @@ +from dataclasses import dataclass +from typing import List, Optional + +from ..base import BaseModel + +from oonidata.compat import add_slots +from oonidata.models.dataformats import ( + BaseTestKeys, + Failure, + TCPConnect, + OpenVPNHandshake, + OpenVPNNetworkEvent, +) +from oonidata.models.nettests.base_measurement import BaseMeasurement + + +@add_slots +@dataclass +class OpenVPNTestKeys(BaseTestKeys): + success: Optional[bool] = False + failure: Failure = None + + network_events: Optional[List[OpenVPNNetworkEvent]] = None + tcp_connect: Optional[List[TCPConnect]] = None + openvpn_handshake: Optional[List[OpenVPNHandshake]] = None + + bootstrap_time: Optional[float] = None + tunnel: str = None + + +@add_slots +@dataclass +class OpenVPN(BaseMeasurement): + __test_name__ = "openvpn" + + test_keys: OpenVPNTestKeys diff --git a/oonidata/src/oonidata/models/observations.py b/oonidata/src/oonidata/models/observations.py index 89cc0fe1..fbfacb8a 100644 --- a/oonidata/src/oonidata/models/observations.py +++ b/oonidata/src/oonidata/models/observations.py @@ -2,6 +2,7 @@ from dataclasses import dataclass, field from datetime import datetime from typing import ( + Dict, Optional, List, Tuple, @@ -388,3 +389,43 @@ class HTTPMiddleboxObservation: hfm_diff: Optional[str] = None hfm_failure: Optional[str] = None hfm_success: Optional[bool] = None + + +@table_model( + table_name="obs_tunnel", + table_index=("measurement_uid", "observation_idx", "measurement_start_time"), +) +@dataclass +class TunnelObservation: + measurement_meta: MeasurementMeta + probe_meta: ProbeMeta + + observation_idx: int + + ip: str + port: int + transport: str + + # label can be a fqdn or a human readable lable used to group the endpoint + label: str + + # definition of success will need to change when/if we're able to gather metrics + # through the tunnel. + success: bool + failure: Failure + + protocol: str + + # indicates obfuscation or modifications from the main protocol family. + variant: str = "" + + # any metadata about the providers behind the endpoints. + provider: str = "" + + # time it took to perform the bootstrap of the protocol + bootstrap_time: float = -1 + + # timing list + t0: float = -1 + timing_map: Dict[str, float] = field(default_factory=dict) + failure_map: Dict[str, str] = field(default_factory=dict) diff --git a/oonipipeline/Design.md b/oonipipeline/Design.md index 72a3187a..cae79700 100644 --- a/oonipipeline/Design.md +++ b/oonipipeline/Design.md @@ -12,11 +12,11 @@ needed. ### Expose a queriable low level view on measurements -Currently it's only possible to query measurement at a granuliaty which is as -fine a measurement. +Currently it's only possible to query measurement at a granularity which is as +fine as a measurement. This means that it's only possible to answer questions which the original -designer of the experiment had already throught of. +designer of the experiment had already thought of. On the other hand the new pipeline breaks down measurements into distinct observations (think 1 DNS query and answer or 1 TLS handshake towards a diff --git a/oonipipeline/Readme.md b/oonipipeline/Readme.md index 67ed4790..951c03c2 100644 --- a/oonipipeline/Readme.md +++ b/oonipipeline/Readme.md @@ -8,7 +8,7 @@ For historical context, these are the major revisions: - `v1` - OONI Pipeline based on custom CLI scripts using mongodb as a backend. Used until ~2015. - `v2` - OONI Pipeline based on [luigi](https://luigi.readthedocs.io/en/stable/). Used until ~2017. - `v3` - OONI Pipeline based on [airflow](https://airflow.apache.org/). Used until ~2020. -- `v4` - OONI Pipeline basedon custom script and systemd units (aka fastpath). Currently in use in production. +- `v4` - OONI Pipeline based on custom script and systemd units (aka fastpath). Currently in use in production. - `v5` - Next generation OONI Pipeline. What this readme is relevant to. Expected to become in production by Q4 2024. ## Setup diff --git a/oonipipeline/src/oonipipeline/db/create_tables.py b/oonipipeline/src/oonipipeline/db/create_tables.py index f657196e..3e3d6c1a 100644 --- a/oonipipeline/src/oonipipeline/db/create_tables.py +++ b/oonipipeline/src/oonipipeline/db/create_tables.py @@ -23,6 +23,7 @@ WebControlObservation, WebObservation, HTTPMiddleboxObservation, + TunnelObservation, ) from .connections import ClickhouseConnection @@ -89,6 +90,9 @@ def typing_to_clickhouse(t: Any) -> str: if t in (Mapping[str, str], Dict[str, str]): return "Map(String, String)" + if t in (Mapping[str, float], Dict[str, float]): + return "Map(String, Float64)" + # TODO(art): eventually all the above types should be mapped using a similar pattern child_type, parent_type = typing.get_args(t) is_nullable = False @@ -165,6 +169,7 @@ def format_create_query( table_models = [ WebObservation, WebControlObservation, + TunnelObservation, HTTPMiddleboxObservation, ] diff --git a/oonipipeline/src/oonipipeline/transforms/measurement_transformer.py b/oonipipeline/src/oonipipeline/transforms/measurement_transformer.py index ec1b013a..e7bfdcc7 100644 --- a/oonipipeline/src/oonipipeline/transforms/measurement_transformer.py +++ b/oonipipeline/src/oonipipeline/transforms/measurement_transformer.py @@ -24,6 +24,8 @@ NetworkEvent, TCPConnect, TLSHandshake, + OpenVPNHandshake, + OpenVPNNetworkEvent, maybe_binary_data_to_bytes, ) from oonidata.models.nettests.base_measurement import BaseMeasurement @@ -35,6 +37,7 @@ TCPObservation, TLSObservation, WebObservation, + TunnelObservation, ) from oonidata.datautils import ( InvalidCertificateChain, @@ -731,7 +734,6 @@ def make_measurement_meta(msmt: BaseMeasurement, bucket_date: str) -> Measuremen measurement_start_time=measurement_start_time, ) - class MeasurementTransformer: """ MeasurementTransformer is responsible for taking a measurement and @@ -883,7 +885,7 @@ def consume_web_observations( It will attempt to map them via the transaction_id or ip:port tuple. - Any observation that cannot be mapped will be returned inside of it's + Any observation that cannot be mapped will be returned inside of its own WebObservation with all other columns set to None. """ web_obs_list: List[WebObservation] = [] diff --git a/oonipipeline/src/oonipipeline/transforms/nettests/openvpn.py b/oonipipeline/src/oonipipeline/transforms/nettests/openvpn.py new file mode 100644 index 00000000..c45a7c7d --- /dev/null +++ b/oonipipeline/src/oonipipeline/transforms/nettests/openvpn.py @@ -0,0 +1,107 @@ +from datetime import datetime +from typing import Dict, List, Optional, Tuple +from oonidata.models.dataformats import OpenVPNHandshake, OpenVPNNetworkEvent +from oonidata.models.nettests import OpenVPN +from oonidata.models.observations import ( + TunnelObservation, + WebObservation, +) + +from ..measurement_transformer import MeasurementTransformer, normalize_failure + + +def count_key_exchange_packets(network_events: List[OpenVPNNetworkEvent]) -> int: + """ + return number of packets exchanged in the SENT_KEY state + """ + n = 0 + for evt in network_events: + if evt.stage == "SENT_KEY" and evt.operation.startswith("packet_"): + n += 1 + return n + + +def make_openvpn_timing_map( + network_events: List[OpenVPNNetworkEvent], +) -> Dict[str, float]: + + timings = {} + # TODO(ain): condition to test version >= xyz + if len(network_events) != 0: + for evt in network_events: + if evt.packet is not None: + if evt.packet.opcode == "P_CONTROL_HARD_RESET_CLIENT_V2": + timings["openvpn_handshake_hr_client"] = evt.t + elif evt.packet.opcode == "P_CONTROL_HARD_RESET_SERVER_V2": + timings["openvpn_handshake_hr_server"] = evt.t + elif evt.tags and "client_hello" in evt.tags: + timings["openvpn_handshake_clt_hello"] = evt.t + elif evt.tags and "server_hello" in evt.tags: + timings["openvpn_handshake_srv_hello"] = evt.t + if evt.operation == "state" and evt.stage == "GOT_KEY": + timings["openvpn_handshake_got_keys"] = evt.t + if evt.operation == "state" and evt.stage == "GENERATED_KEYS": + timings["openvpn_handshake_gen_keys"] = evt.t + + timings["openvpn_handshake_key_exchg_n"] = count_key_exchange_packets( + network_events + ) + + return timings + + +class OpenVPNTransformer(MeasurementTransformer): + + def make_observations( + self, msmt: OpenVPN + ) -> Tuple[List[TunnelObservation], List[WebObservation]]: + if not msmt.test_keys: + return ([], []) + + # def make_openvpn_observations( + # self, + # tcp_observations: Optional[List[TCPConnect]], + # openvpn_handshakes: List[OpenVPNHandshake], + # network_events: Optional[List[OpenVPNNetworkEvent]], + # bootstrap_time: float, + # ) -> List[TunnelObservation]: + # """ + # Returns a list of OpenVPNObservations by mapping all related + # TCPObservations, OpenVPNNetworkevents and OpenVPNHandshakes. + # """ + + tunnel_observations: List[TunnelObservation] = [] + + assert msmt.test_keys is not None + assert msmt.test_keys.openvpn_handshake is not None + idx = 1 + for hs in msmt.test_keys.openvpn_handshake: + to = TunnelObservation( + measurement_meta=self.measurement_meta, + probe_meta=self.probe_meta, + failure=normalize_failure(hs.failure), + success=hs.failure == None, + label="", + protocol="openvpn", + transport=hs.transport, + ip=hs.ip, + observation_idx=idx, + port=hs.port, + bootstrap_time=msmt.test_keys.bootstrap_time or -1, + ) + + to.timing_map = make_openvpn_timing_map(msmt.test_keys.network_events or []) + to.timing_map["handshake_t"] = hs.t + to.timing_map["handshake_t0"] = hs.t0 + to.failure_map["handshake"] = hs.failure or "" + idx += 1 + + tunnel_observations.append(to) + + web_observations = self.consume_web_observations( + dns_observations=[], + tcp_observations=self.make_tcp_observations(msmt.test_keys.tcp_connect), + tls_observations=[], + http_observations=[], + ) + return (tunnel_observations, web_observations) diff --git a/oonipipeline/src/oonipipeline/transforms/observations.py b/oonipipeline/src/oonipipeline/transforms/observations.py index 4645bdd6..e3d8cf05 100644 --- a/oonipipeline/src/oonipipeline/transforms/observations.py +++ b/oonipipeline/src/oonipipeline/transforms/observations.py @@ -3,6 +3,7 @@ from oonidata.models.observations import ( HTTPMiddleboxObservation, + TunnelObservation, WebControlObservation, WebObservation, ) @@ -19,6 +20,7 @@ UrlGetter, WebConnectivity, HTTPInvalidRequestLine, + OpenVPN, ) from .nettests.dnscheck import DNSCheckTransformer @@ -34,6 +36,7 @@ from .nettests.browser_web import BrowserWebTransformer from .nettests.urlgetter import UrlGetterTransformer from .nettests.web_connectivity import WebConnectivityTransformer +from .nettests.openvpn import OpenVPNTransformer from .nettests.http_invalid_request_line import ( HTTPInvalidRequestLineTransformer, ) @@ -54,12 +57,14 @@ "http_header_field_manipulation": HTTPHeaderFieldManipulationTransformer, "http_invalid_request_line": HTTPInvalidRequestLineTransformer, "web_connectivity": WebConnectivityTransformer, + "openvpn": OpenVPNTransformer, "echcheck": ECHCheckTransformer, } TypeWebConnectivityObservations = Tuple[ List[WebObservation], List[WebControlObservation] ] +TypeTunnelObservations = Tuple[List[TunnelObservation], List[WebObservation]] TypeWebObservations = Tuple[List[WebObservation]] TypeHTTPMiddleboxObservations = Tuple[List[HTTPMiddleboxObservation]] @@ -90,6 +95,14 @@ def measurement_to_observations( ) -> TypeWebObservations: ... +@overload +def measurement_to_observations( + msmt: OpenVPN, + netinfodb: NetinfoDB, + bucket_date: str = "1984-01-01", +) -> TypeTunnelObservations: ... + + @overload def measurement_to_observations( msmt: SupportedDataformats, @@ -109,6 +122,7 @@ def measurement_to_observations( TypeWebObservations, TypeWebConnectivityObservations, TypeHTTPMiddleboxObservations, + TypeTunnelObservations, Tuple[()], ]: if msmt.test_name in NETTEST_TRANSFORMERS: diff --git a/oonipipeline/tests/_fixtures.py b/oonipipeline/tests/_fixtures.py index f8a81020..3426b307 100644 --- a/oonipipeline/tests/_fixtures.py +++ b/oonipipeline/tests/_fixtures.py @@ -38,6 +38,8 @@ "20240302000050.000654_SN_webconnectivity_fe4221088fbdcb0a", # nxdomain down "20240302000305.316064_EG_webconnectivity_397bca9091b07444", # nxdomain blocked, unknown_failure and from the future "20240309112858.009725_SE_webconnectivity_dce757ef4ec9b6c8", # blockpage for Iran in Sweden + "20240923234302.648951_FI_openvpn_714dd28ff412c1a5", # openvpn from Finland, tcp + "20240923234302.024724_FI_openvpn_515e6b6d9c0d832e", # openvpn from Finland, udp "20241101171509.547086_CN_webconnectivity_f0ec3f0e369cec9b", # web_connectivity 0.5 which was failing "20250120145930.582606_US_echcheck_899a304b7beef05c", # echcheck tls_handshakes "20240714111032.898994_GB_echcheck_f10079cac5cdf770" # echcheck control and target diff --git a/oonipipeline/tests/test_transforms.py b/oonipipeline/tests/test_transforms.py index 34d589ec..441734e0 100644 --- a/oonipipeline/tests/test_transforms.py +++ b/oonipipeline/tests/test_transforms.py @@ -11,7 +11,8 @@ from oonidata.models.nettests.stun_reachability import StunReachability from oonidata.models.nettests.urlgetter import UrlGetter from oonidata.models.nettests.browser_web import BrowserWeb -from oonidata.models.observations import WebObservation +from oonidata.models.nettests.openvpn import OpenVPN +from oonidata.models.observations import WebObservation, TunnelObservation from oonipipeline.transforms.measurement_transformer import ( MeasurementTransformer, @@ -410,6 +411,53 @@ def test_facebook_messenger_obs(netinfodb, measurements): assert hostname_set == spec_hostname_set assert len(web_obs) == 14 +def test_openvpn_obs(netinfodb, measurements): + bucket_date = "2024-09-23" + + msmt_udp = load_measurement( + msmt_path=measurements[ + "20240923234302.024724_FI_openvpn_515e6b6d9c0d832e" + ] + ) + assert isinstance(msmt_udp, OpenVPN) + obs_tup_udp = measurement_to_observations( + msmt=msmt_udp, netinfodb=netinfodb, bucket_date=bucket_date + ) + assert len(obs_tup_udp) == 2 + oou: TunnelObservation = obs_tup_udp[0][0] + + assert oou.success is True + assert oou.transport == "udp" + assert oou.port == 1194 + assert oou.ip == "37.218.243.98" + # assert oou.tcp_success is None + assert oou.timing_map["openvpn_handshake_srv_hello"] == 0.175448177 + assert oou.timing_map["openvpn_handshake_got_keys"] == 0.305975312 + assert oou.timing_map["openvpn_handshake_gen_keys"] == 0.376011823 + assert oou.bootstrap_time == 0.376279583 + + msmt_tcp = load_measurement( + msmt_path=measurements[ + "20240923234302.648951_FI_openvpn_714dd28ff412c1a5" + ] + ) + assert isinstance(msmt_tcp, OpenVPN) + obs_tup_tcp = measurement_to_observations( + msmt=msmt_tcp, netinfodb=netinfodb, bucket_date=bucket_date + ) + assert len(obs_tup_tcp) == 2 + oot: TunnelObservation = obs_tup_tcp[0][0] + + assert oot.success is True + assert oot.transport == "tcp" + assert oot.port == 1194 + assert oot.ip == "37.218.243.98" + # assert oot.tcp_success is True + # assert oot.tcp_t == 0.053010729 + assert oot.timing_map["openvpn_handshake_hr_client"] == 0.05684776 + assert oot.timing_map["openvpn_handshake_srv_hello"] == 0.204483958 + assert oot.timing_map["openvpn_handshake_gen_keys"] == 0.571443906 + assert oot.bootstrap_time == 0.571501093 def test_echcheck_obs_tls_handshakes(netinfodb, measurements): msmt = load_measurement(