From b8d7a86ba7008af509bd05cdfbd8c83388578009 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 16 Dec 2025 20:43:42 +0000 Subject: [PATCH] [spr] initial version Created using spr 1.3.6-beta.1 --- Cargo.lock | 17 +- Cargo.toml | 3 + cockroach-admin/Cargo.toml | 1 + cockroach-admin/api/Cargo.toml | 5 +- cockroach-admin/api/src/lib.rs | 45 +- cockroach-admin/src/cockroach_cli.rs | 6 +- cockroach-admin/src/http_entrypoints.rs | 6 +- cockroach-admin/types/Cargo.toml | 10 +- cockroach-admin/types/src/lib.rs | 683 +----------------- cockroach-admin/types/src/node.rs | 100 +++ cockroach-admin/types/versions/Cargo.toml | 21 + .../types/versions/src/impls/mod.rs | 7 + .../types/versions/src/impls/node.rs | 546 ++++++++++++++ .../types/versions/src/initial/mod.rs | 7 + .../types/versions/src/initial/node.rs | 98 +++ cockroach-admin/types/versions/src/latest.rs | 18 + cockroach-admin/types/versions/src/lib.rs | 35 + cockroach-metrics/src/lib.rs | 22 +- nexus/db-model/src/inventory.rs | 2 +- .../db-queries/src/db/datastore/inventory.rs | 2 +- nexus/inventory/src/builder.rs | 6 +- nexus/inventory/src/examples.rs | 2 +- .../tests/integration_tests/planner.rs | 2 +- nexus/types/src/inventory.rs | 2 +- 24 files changed, 882 insertions(+), 764 deletions(-) create mode 100644 cockroach-admin/types/src/node.rs create mode 100644 cockroach-admin/types/versions/Cargo.toml create mode 100644 cockroach-admin/types/versions/src/impls/mod.rs create mode 100644 cockroach-admin/types/versions/src/impls/node.rs create mode 100644 cockroach-admin/types/versions/src/initial/mod.rs create mode 100644 cockroach-admin/types/versions/src/initial/node.rs create mode 100644 cockroach-admin/types/versions/src/latest.rs create mode 100644 cockroach-admin/types/versions/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 937f69721d2..32f40672edb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1668,16 +1668,13 @@ checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" name = "cockroach-admin-api" version = "0.1.0" dependencies = [ - "cockroach-admin-types", + "cockroach-admin-types-versions", "dropshot", "dropshot-api-manager-types", "http", "omicron-common", - "omicron-uuid-kinds", "omicron-workspace-hack", - "schemars 0.8.22", "semver 1.0.27", - "serde", ] [[package]] @@ -1697,10 +1694,19 @@ dependencies = [ [[package]] name = "cockroach-admin-types" version = "0.1.0" +dependencies = [ + "cockroach-admin-types-versions", + "omicron-workspace-hack", + "serde", +] + +[[package]] +name = "cockroach-admin-types-versions" +version = "0.1.0" dependencies = [ "chrono", "csv", - "omicron-common", + "omicron-uuid-kinds", "omicron-workspace-hack", "proptest", "schemars 0.8.22", @@ -7938,6 +7944,7 @@ dependencies = [ "clap", "cockroach-admin-api", "cockroach-admin-types", + "cockroach-admin-types-versions", "csv", "dropshot", "expectorate", diff --git a/Cargo.toml b/Cargo.toml index beb7cdbfd91..47a597ca313 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ members = [ "cockroach-admin", "cockroach-admin/api", "cockroach-admin/types", + "cockroach-admin/types/versions", "cockroach-metrics", "common", "dev-tools/cert-dev", @@ -187,6 +188,7 @@ default-members = [ "cockroach-admin", "cockroach-admin/api", "cockroach-admin/types", + "cockroach-admin/types/versions", "cockroach-metrics", "common", "dev-tools/cert-dev", @@ -410,6 +412,7 @@ clickward = { git = "https://github.com/oxidecomputer/clickward", rev = "e3d9a1c cockroach-admin-api = { path = "cockroach-admin/api" } cockroach-admin-client = { path = "clients/cockroach-admin-client" } cockroach-admin-types = { path = "cockroach-admin/types" } +cockroach-admin-types-versions = { path = "cockroach-admin/types/versions" } colored = "2.1" const_format = "0.2.34" cookie = "0.18" diff --git a/cockroach-admin/Cargo.toml b/cockroach-admin/Cargo.toml index 614198974ae..d3a7fca7713 100644 --- a/cockroach-admin/Cargo.toml +++ b/cockroach-admin/Cargo.toml @@ -15,6 +15,7 @@ chrono.workspace = true clap.workspace = true cockroach-admin-api.workspace = true cockroach-admin-types.workspace = true +cockroach-admin-types-versions.workspace = true csv.workspace = true dropshot.workspace = true http.workspace = true diff --git a/cockroach-admin/api/Cargo.toml b/cockroach-admin/api/Cargo.toml index 8ca2515e678..f4ed059238a 100644 --- a/cockroach-admin/api/Cargo.toml +++ b/cockroach-admin/api/Cargo.toml @@ -8,13 +8,10 @@ license = "MPL-2.0" workspace = true [dependencies] -cockroach-admin-types.workspace = true +cockroach-admin-types-versions.workspace = true dropshot.workspace = true dropshot-api-manager-types.workspace = true http.workspace = true omicron-common.workspace = true -omicron-uuid-kinds.workspace = true omicron-workspace-hack.workspace = true -schemars.workspace = true semver.workspace = true -serde.workspace = true diff --git a/cockroach-admin/api/src/lib.rs b/cockroach-admin/api/src/lib.rs index 2d399a894ad..9d4780c056f 100644 --- a/cockroach-admin/api/src/lib.rs +++ b/cockroach-admin/api/src/lib.rs @@ -2,15 +2,12 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use cockroach_admin_types::{NodeDecommission, NodeStatus}; +use cockroach_admin_types_versions::latest; use dropshot::{ HttpError, HttpResponseOk, HttpResponseUpdatedNoContent, RequestContext, TypedBody, }; use dropshot_api_manager_types::api_versions; -use omicron_uuid_kinds::OmicronZoneUuid; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; api_versions!([ // WHEN CHANGING THE API (part 1 of 2): @@ -70,7 +67,7 @@ pub trait CockroachAdminApi { }] async fn node_status( rqctx: RequestContext, - ) -> Result, HttpError>; + ) -> Result, HttpError>; /// Get the CockroachDB node ID of the local cockroach instance. #[endpoint { @@ -79,7 +76,7 @@ pub trait CockroachAdminApi { }] async fn local_node_id( rqctx: RequestContext, - ) -> Result, HttpError>; + ) -> Result, HttpError>; /// Decommission a node from the CRDB cluster. #[endpoint { @@ -88,8 +85,8 @@ pub trait CockroachAdminApi { }] async fn node_decommission( rqctx: RequestContext, - body: TypedBody, - ) -> Result, HttpError>; + body: TypedBody, + ) -> Result, HttpError>; /// Proxy to CockroachDB's /_status/vars endpoint // @@ -114,35 +111,3 @@ pub trait CockroachAdminApi { rqctx: RequestContext, ) -> Result, HttpError>; } - -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub struct ClusterNodeStatus { - pub all_nodes: Vec, -} - -/// CockroachDB Node ID -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub struct LocalNodeId { - /// The ID of this Omicron zone. - /// - /// This is included to ensure correctness even if a socket address on a - /// sled is reused for a different zone; if our caller is trying to - /// determine the node ID for a particular Omicron CockroachDB zone, they'll - /// contact us by socket address. We include our zone ID in the response for - /// their confirmation that we are the zone they intended to contact. - pub zone_id: OmicronZoneUuid, - // CockroachDB node IDs are integers, in practice, but our use of them is as - // input and output to the `cockroach` CLI. We use a string which is a bit - // more natural (no need to parse CLI output or stringify an ID to send it - // as input) and leaves open the door for the format to change in the - // future. - pub node_id: String, -} - -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub struct NodeId { - pub node_id: String, -} diff --git a/cockroach-admin/src/cockroach_cli.rs b/cockroach-admin/src/cockroach_cli.rs index fb987a1407c..754041bc18c 100644 --- a/cockroach-admin/src/cockroach_cli.rs +++ b/cockroach-admin/src/cockroach_cli.rs @@ -3,9 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use camino::Utf8PathBuf; -use cockroach_admin_types::NodeDecommission; -use cockroach_admin_types::NodeStatus; -use cockroach_admin_types::ParseError; +use cockroach_admin_types::node::{NodeDecommission, NodeStatus, ParseError}; use dropshot::HttpError; use illumos_utils::ExecutionError; use illumos_utils::output_to_exec_error; @@ -362,7 +360,7 @@ mod tests { use super::*; use camino_tempfile::Utf8TempDir; use chrono::Utc; - use cockroach_admin_types::NodeMembership; + use cockroach_admin_types::node::NodeMembership; use nexus_test_utils::db::TestDatabase; use omicron_test_utils::dev; use omicron_test_utils::dev::poll; diff --git a/cockroach-admin/src/http_entrypoints.rs b/cockroach-admin/src/http_entrypoints.rs index db00962b230..27a686c96dd 100644 --- a/cockroach-admin/src/http_entrypoints.rs +++ b/cockroach-admin/src/http_entrypoints.rs @@ -3,8 +3,10 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use crate::context::ServerContext; -use cockroach_admin_api::*; -use cockroach_admin_types::NodeDecommission; +use cockroach_admin_api::{CockroachAdminApi, cockroach_admin_api_mod}; +use cockroach_admin_types::node::{ + ClusterNodeStatus, LocalNodeId, NodeDecommission, NodeId, +}; use dropshot::HttpError; use dropshot::HttpResponseOk; use dropshot::HttpResponseUpdatedNoContent; diff --git a/cockroach-admin/types/Cargo.toml b/cockroach-admin/types/Cargo.toml index 0f4953e4469..5808699aff7 100644 --- a/cockroach-admin/types/Cargo.toml +++ b/cockroach-admin/types/Cargo.toml @@ -8,14 +8,6 @@ license = "MPL-2.0" workspace = true [dependencies] -chrono.workspace = true -csv.workspace = true -omicron-common.workspace = true +cockroach-admin-types-versions.workspace = true omicron-workspace-hack.workspace = true -schemars.workspace = true serde.workspace = true -thiserror.workspace = true - -[dev-dependencies] -proptest.workspace = true -test-strategy.workspace = true diff --git a/cockroach-admin/types/src/lib.rs b/cockroach-admin/types/src/lib.rs index bef9ffc407c..663fbf42482 100644 --- a/cockroach-admin/types/src/lib.rs +++ b/cockroach-admin/types/src/lib.rs @@ -2,685 +2,4 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use chrono::{DateTime, NaiveDateTime, Utc}; -use csv::StringRecord; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize, de}; -use std::{io, net::SocketAddr}; - -#[derive(Debug, thiserror::Error)] -pub enum ParseError { - #[error("failed to parse `cockroach node status` output")] - NodeStatus(#[from] NodeStatusError), - #[error("failed to parse `cockroach decommission` output")] - Decommission(#[from] DecommissionError), -} - -#[derive(Debug, thiserror::Error)] -pub enum NodeStatusError { - #[error("missing `membership` header (found: {0:?})")] - MissingMembershipHeader(StringRecord), - #[error("failed to parse header row")] - ParseHeaderRow(#[source] csv::Error), - #[error("failed to parse record row")] - ParseRecordRow(#[source] csv::Error), - #[error("fewer fields than expected in status row: {0:?}")] - StatusRowMissingFields(StringRecord), - #[error("failed to parse node status row {row:?}")] - ParseStatusRow { - row: StringRecord, - #[source] - err: csv::Error, - }, -} - -#[derive(Debug, thiserror::Error)] -pub enum DecommissionError { - #[error("missing output row after headers")] - MissingOutputRow, - #[error("failed to parse row")] - ParseRow(#[from] csv::Error), -} - -/// CockroachDB Node ID -/// -/// This field is stored internally as a String to avoid questions -/// about size, signedness, etc - it can be treated as an arbitrary -/// unique identifier. -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)] -pub struct NodeId(pub String); - -impl NodeId { - pub fn new(id: String) -> Self { - Self(id) - } - - pub fn as_str(&self) -> &str { - &self.0 - } -} - -impl std::fmt::Display for NodeId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.0) - } -} - -impl std::str::FromStr for NodeId { - type Err = std::convert::Infallible; - - fn from_str(s: &str) -> Result { - Ok(Self(s.to_string())) - } -} - -// When parsing the underlying NodeId, we force it to be interpreted -// as a String. Without this custom Deserialize implementation, we -// encounter parsing errors when querying endpoints which return the -// NodeId as an integer. -impl<'de> serde::Deserialize<'de> for NodeId { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - use serde::de::{Error, Visitor}; - use std::fmt; - - struct NodeIdVisitor; - - impl<'de> Visitor<'de> for NodeIdVisitor { - type Value = NodeId; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter - .write_str("a string or integer representing a node ID") - } - - fn visit_str(self, value: &str) -> Result - where - E: Error, - { - Ok(NodeId(value.to_string())) - } - - fn visit_string(self, value: String) -> Result - where - E: Error, - { - Ok(NodeId(value)) - } - - fn visit_i64(self, value: i64) -> Result - where - E: Error, - { - Ok(NodeId(value.to_string())) - } - - fn visit_u64(self, value: u64) -> Result - where - E: Error, - { - Ok(NodeId(value.to_string())) - } - } - - deserializer.deserialize_any(NodeIdVisitor) - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub struct NodeStatus { - // TODO use NodeId - pub node_id: String, - pub address: SocketAddr, - pub sql_address: SocketAddr, - pub build: String, - pub started_at: DateTime, - pub updated_at: DateTime, - pub locality: String, - pub is_available: bool, - pub is_live: bool, - pub replicas_leaders: i64, - pub replicas_leaseholders: i64, - pub ranges: i64, - pub ranges_unavailable: i64, - pub ranges_underreplicated: i64, - pub live_bytes: i64, - pub key_bytes: i64, - pub value_bytes: i64, - pub intent_bytes: i64, - pub system_bytes: i64, - pub gossiped_replicas: i64, - pub is_decommissioning: bool, - pub membership: String, - pub is_draining: bool, -} - -impl NodeStatus { - pub fn parse_from_csv(data: &[u8]) -> Result, ParseError> { - let mut statuses = Vec::new(); - let mut reader = csv::Reader::from_reader(io::Cursor::new(data)); - - // We can't naively deserialize every record as a `CliNodeStatus` - // directly, because the `node status --all` flag to get all details - // also causes cockroach to emit statuses for decommissioned nodes, - // which report `NULL` for most fields. For now, we want to skip - // decommissioned nodes entirely, so we'll parse each record - // individually after checking first for whether it's decommissioned. - let headers = - reader.headers().map_err(NodeStatusError::ParseHeaderRow)?.clone(); - let Some(membership_idx) = - headers.iter().position(|h| h == "membership") - else { - return Err( - NodeStatusError::MissingMembershipHeader(headers).into() - ); - }; - - for row in reader.into_records() { - let row = row.map_err(NodeStatusError::ParseRecordRow)?; - - // Skip decommissioned nodes without attempting to parse them - // further, as noted above - let Some(membership) = row.get(membership_idx) else { - return Err(NodeStatusError::StatusRowMissingFields(row).into()); - }; - if membership == "decommissioned" { - continue; - } - - let record: CliNodeStatus = - row.deserialize(Some(&headers)).map_err(|err| { - NodeStatusError::ParseStatusRow { row: row.clone(), err } - })?; - statuses.push(record.into()); - } - - Ok(statuses) - } -} - -// Slightly different `NodeStatus` that matches what we get from `cockroach`: -// timestamps are a fixed format with no timezone (but are actually UTC), so we -// have a custom deserializer, and the ID column is `id` instead of `node_id`. -#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] -struct CliNodeStatus { - id: String, - address: SocketAddr, - sql_address: SocketAddr, - build: String, - #[serde(deserialize_with = "parse_cockroach_cli_timestamp")] - started_at: DateTime, - #[serde(deserialize_with = "parse_cockroach_cli_timestamp")] - updated_at: DateTime, - locality: String, - is_available: bool, - is_live: bool, - replicas_leaders: i64, - replicas_leaseholders: i64, - ranges: i64, - ranges_unavailable: i64, - ranges_underreplicated: i64, - live_bytes: i64, - key_bytes: i64, - value_bytes: i64, - intent_bytes: i64, - system_bytes: i64, - gossiped_replicas: i64, - is_decommissioning: bool, - membership: String, - is_draining: bool, -} - -impl From for NodeStatus { - fn from(cli: CliNodeStatus) -> Self { - Self { - node_id: cli.id, - address: cli.address, - sql_address: cli.sql_address, - build: cli.build, - started_at: cli.started_at, - updated_at: cli.updated_at, - locality: cli.locality, - is_available: cli.is_available, - is_live: cli.is_live, - replicas_leaders: cli.replicas_leaders, - replicas_leaseholders: cli.replicas_leaseholders, - ranges: cli.ranges, - ranges_unavailable: cli.ranges_unavailable, - ranges_underreplicated: cli.ranges_underreplicated, - live_bytes: cli.live_bytes, - key_bytes: cli.key_bytes, - value_bytes: cli.value_bytes, - intent_bytes: cli.intent_bytes, - system_bytes: cli.system_bytes, - gossiped_replicas: cli.gossiped_replicas, - is_decommissioning: cli.is_decommissioning, - membership: cli.membership, - is_draining: cli.is_draining, - } - } -} - -fn parse_cockroach_cli_timestamp<'de, D>( - d: D, -) -> Result, D::Error> -where - D: serde::Deserializer<'de>, -{ - struct CockroachTimestampVisitor; - impl de::Visitor<'_> for CockroachTimestampVisitor { - type Value = DateTime; - - fn expecting( - &self, - formatter: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - formatter.write_str("a Cockroach CLI timestamp") - } - - fn visit_str(self, v: &str) -> Result - where - E: de::Error, - { - let dt = NaiveDateTime::parse_from_str(v, "%Y-%m-%d %H:%M:%S%.f") - .map_err(E::custom)?; - Ok(DateTime::from_naive_utc_and_offset(dt, Utc)) - } - } - - d.deserialize_str(CockroachTimestampVisitor) -} - -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub struct NodeDecommission { - pub node_id: String, - pub is_live: bool, - pub replicas: i64, - pub is_decommissioning: bool, - pub membership: NodeMembership, - pub is_draining: bool, - pub notes: Vec, -} - -impl NodeDecommission { - pub fn parse_from_csv(data: &[u8]) -> Result { - // Reading the node decommission output is awkward because it isn't - // fully CSV. We expect a CSV header, then a row for each node being - // decommissioned, then (maybe) a blank line followed by a note that is - // just a string, not related to the initial CSV data. Even though the - // CLI supports decommissioning more than one node in one invocation, we - // only provide an API to decommission a single node, so we expect: - // - // 1. The CSV header line - // 2. The one row of CSV data - // 3. Trailing notes - // - // We'll collect the notes as a separate field and return them to our - // caller. - - // First we'll run the data through a csv::Reader; this will pull out - // the header row and the one row of data. - let mut reader = csv::Reader::from_reader(io::Cursor::new(data)); - let record: CliNodeDecommission = reader - .deserialize() - .next() - .ok_or_else(|| DecommissionError::MissingOutputRow)? - .map_err(DecommissionError::ParseRow)?; - - // Get the position where the reader ended after that one row; we'll - // collect any remaining nonempty lines as `notes`. - let extra_data = &data[reader.position().byte() as usize..]; - let mut notes = Vec::new(); - for line in String::from_utf8_lossy(extra_data).lines() { - let line = line.trim(); - if !line.is_empty() { - notes.push(line.to_string()); - } - } - - Ok(Self::from((record, notes))) - } -} - -// Slightly different `NodeDecommission` that matches what we get from -// `cockroach`: this omites `notes`, which isn't really a CSV field at all, but -// is instead where we collect the non-CSV string output from the CLI, uses -// a custom deserializer for `membership` to handle unknown variants, and the ID -// column is `id` instead of `node_id`. -#[derive(Debug, Clone, PartialEq, Eq, Deserialize)] -struct CliNodeDecommission { - pub id: String, - pub is_live: bool, - pub replicas: i64, - pub is_decommissioning: bool, - #[serde(deserialize_with = "parse_node_membership")] - pub membership: NodeMembership, - pub is_draining: bool, -} - -impl From<(CliNodeDecommission, Vec)> for NodeDecommission { - fn from((cli, notes): (CliNodeDecommission, Vec)) -> Self { - Self { - node_id: cli.id, - is_live: cli.is_live, - replicas: cli.replicas, - is_decommissioning: cli.is_decommissioning, - membership: cli.membership, - is_draining: cli.is_draining, - notes, - } - } -} - -fn parse_node_membership<'de, D>(d: D) -> Result -where - D: serde::Deserializer<'de>, -{ - struct CockroachNodeMembershipVisitor; - - impl de::Visitor<'_> for CockroachNodeMembershipVisitor { - type Value = NodeMembership; - - fn expecting( - &self, - formatter: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - formatter.write_str("a Cockroach node membership string") - } - - fn visit_str(self, v: &str) -> Result - where - E: de::Error, - { - let membership = match v { - "active" => NodeMembership::Active, - "decommissioning" => NodeMembership::Decommissioning, - "decommissioned" => NodeMembership::Decommissioned, - _ => NodeMembership::Unknown { value: v.to_string() }, - }; - Ok(membership) - } - } - - d.deserialize_str(CockroachNodeMembershipVisitor) -} - -// The cockroach CLI and `crdb_internal.gossip_liveness` table use a string for -// node membership, but there are only three meaningful values per -// https://github.com/cockroachdb/cockroach/blob/0c92c710d2baadfdc5475be8d2238cf26cb152ca/pkg/kv/kvserver/liveness/livenesspb/liveness.go#L96, -// so we'll convert into a Rust enum and leave the "unknown" case for future -// changes that expand or reword these values. -#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] -#[serde(tag = "state", rename_all = "lowercase")] -pub enum NodeMembership { - Active, - Decommissioning, - Decommissioned, - Unknown { value: String }, -} - -#[cfg(test)] -mod tests { - use super::*; - use chrono::NaiveDate; - use test_strategy::proptest; - - #[test] - fn test_node_status_parse_single_line_from_csv() { - let input = br#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live,replicas_leaders,replicas_leaseholders,ranges,ranges_unavailable,ranges_underreplicated,live_bytes,key_bytes,value_bytes,intent_bytes,system_bytes,gossiped_replicas,is_decommissioning,membership,is_draining -5,[fd00:1122:3344:103::3]:32221,[fd00:1122:3344:103::3]:32221,v22.1.22-29-g865aff1595,2025-05-30 21:10:30.527658,2025-06-02 14:00:36.749872,,true,true,38,38,210,0,0,3958791538,846009128,5249950302,0,108083397,210,false,active,false"#; - let expected = NodeStatus { - node_id: "5".to_string(), - address: "[fd00:1122:3344:103::3]:32221".parse().unwrap(), - sql_address: "[fd00:1122:3344:103::3]:32221".parse().unwrap(), - build: "v22.1.22-29-g865aff1595".to_string(), - started_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2025, 5, 30) - .unwrap() - .and_hms_micro_opt(21, 10, 30, 527658) - .unwrap(), - Utc, - ), - updated_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2025, 6, 2) - .unwrap() - .and_hms_micro_opt(14, 0, 36, 749872) - .unwrap(), - Utc, - ), - locality: String::new(), - is_available: true, - is_live: true, - replicas_leaders: 38, - replicas_leaseholders: 38, - ranges: 210, - ranges_unavailable: 0, - ranges_underreplicated: 0, - live_bytes: 3958791538, - key_bytes: 846009128, - value_bytes: 5249950302, - intent_bytes: 0, - system_bytes: 108083397, - gossiped_replicas: 210, - is_decommissioning: false, - membership: "active".to_string(), - is_draining: false, - }; - - let statuses = NodeStatus::parse_from_csv(input).expect("parsed input"); - assert_eq!(statuses, vec![expected]); - } - - #[test] - fn test_node_status_parse_multiple_lines_from_csv() { - let input = br#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live,replicas_leaders,replicas_leaseholders,ranges,ranges_unavailable,ranges_underreplicated,live_bytes,key_bytes,value_bytes,intent_bytes,system_bytes,gossiped_replicas,is_decommissioning,membership,is_draining -1,[fd00:1122:3344:101::3]:32221,[fd00:1122:3344:101::3]:32221,v22.1.22-29-g865aff1595,2025-05-30 21:10:26.237011,2025-06-02 14:05:05.508688,,true,true,41,41,210,0,0,3967748150,846544773,5119261316,34,108060755,210,false,active,false -2,[fd00:1122:3344:102::3]:32221,[fd00:1122:3344:102::3]:32221,v22.1.22-29-g865aff1595,2025-05-30 21:10:30.00501,2025-06-02 14:05:05.090293,,true,true,44,44,210,0,0,3967896249,846559229,5119394994,861,108017090,210,false,active,false -3,NULL,NULL,NULL,NULL,2025-05-30 21:11:45.350419,NULL,false,false,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,true,decommissioned,false -4,NULL,NULL,NULL,NULL,2025-05-30 21:11:45.668157,NULL,false,false,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,true,decommissioned,false -6,[fd00:1122:3344:102::21]:32221,[fd00:1122:3344:102::21]:32221,v22.1.22-29-g865aff1595,2025-05-30 21:10:26.26209,2025-06-02 14:05:05.906022,,true,true,41,41,210,0,0,3967896044,846559229,5119394789,0,108016856,210,false,active,false"#; - let expected = vec![ - NodeStatus { - node_id: "1".to_string(), - address: "[fd00:1122:3344:101::3]:32221".parse().unwrap(), - sql_address: "[fd00:1122:3344:101::3]:32221".parse().unwrap(), - build: "v22.1.22-29-g865aff1595".to_string(), - started_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2025, 5, 30) - .unwrap() - .and_hms_micro_opt(21, 10, 26, 237011) - .unwrap(), - Utc, - ), - updated_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2025, 6, 2) - .unwrap() - .and_hms_micro_opt(14, 5, 5, 508688) - .unwrap(), - Utc, - ), - locality: String::new(), - is_available: true, - is_live: true, - replicas_leaders: 41, - replicas_leaseholders: 41, - ranges: 210, - ranges_unavailable: 0, - ranges_underreplicated: 0, - live_bytes: 3967748150, - key_bytes: 846544773, - value_bytes: 5119261316, - intent_bytes: 34, - system_bytes: 108060755, - gossiped_replicas: 210, - is_decommissioning: false, - membership: "active".to_string(), - is_draining: false, - }, - NodeStatus { - node_id: "2".to_string(), - address: "[fd00:1122:3344:102::3]:32221".parse().unwrap(), - sql_address: "[fd00:1122:3344:102::3]:32221".parse().unwrap(), - build: "v22.1.22-29-g865aff1595".to_string(), - started_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2025, 5, 30) - .unwrap() - .and_hms_micro_opt(21, 10, 30, 5010) - .unwrap(), - Utc, - ), - updated_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2025, 6, 2) - .unwrap() - .and_hms_micro_opt(14, 5, 5, 90293) - .unwrap(), - Utc, - ), - locality: String::new(), - is_available: true, - is_live: true, - replicas_leaders: 44, - replicas_leaseholders: 44, - ranges: 210, - ranges_unavailable: 0, - ranges_underreplicated: 0, - live_bytes: 3967896249, - key_bytes: 846559229, - value_bytes: 5119394994, - intent_bytes: 861, - system_bytes: 108017090, - gossiped_replicas: 210, - is_decommissioning: false, - membership: "active".to_string(), - is_draining: false, - }, - NodeStatus { - node_id: "6".to_string(), - address: "[fd00:1122:3344:102::21]:32221".parse().unwrap(), - sql_address: "[fd00:1122:3344:102::21]:32221".parse().unwrap(), - build: "v22.1.22-29-g865aff1595".to_string(), - started_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2025, 5, 30) - .unwrap() - .and_hms_micro_opt(21, 10, 26, 262090) - .unwrap(), - Utc, - ), - updated_at: DateTime::from_naive_utc_and_offset( - NaiveDate::from_ymd_opt(2025, 6, 2) - .unwrap() - .and_hms_micro_opt(14, 5, 5, 906022) - .unwrap(), - Utc, - ), - locality: String::new(), - is_available: true, - is_live: true, - replicas_leaders: 41, - replicas_leaseholders: 41, - ranges: 210, - ranges_unavailable: 0, - ranges_underreplicated: 0, - live_bytes: 3967896044, - key_bytes: 846559229, - value_bytes: 5119394789, - intent_bytes: 0, - system_bytes: 108016856, - gossiped_replicas: 210, - is_decommissioning: false, - membership: "active".to_string(), - is_draining: false, - }, - ]; - - let statuses = NodeStatus::parse_from_csv(input).expect("parsed input"); - assert_eq!(statuses.len(), expected.len()); - for (status, expected) in statuses.iter().zip(&expected) { - assert_eq!(status, expected); - } - } - - #[test] - fn test_node_decommission_parse_with_no_trailing_notes() { - let input = - br#"id,is_live,replicas,is_decommissioning,membership,is_draining -6,true,24,true,decommissioning,false"#; - let expected = NodeDecommission { - node_id: "6".to_string(), - is_live: true, - replicas: 24, - is_decommissioning: true, - membership: NodeMembership::Decommissioning, - is_draining: false, - notes: vec![], - }; - - let statuses = - NodeDecommission::parse_from_csv(input).expect("parsed input"); - assert_eq!(statuses, expected); - } - - #[test] - fn test_node_decommission_parse_with_trailing_notes() { - let input = - br#"id,is_live,replicas,is_decommissioning,membership,is_draining -6,false,0,true,decommissioned,false - -No more data reported on target nodes. Please verify cluster health before removing the nodes. -"#; - let expected = NodeDecommission { - node_id: "6".to_string(), - is_live: false, - replicas: 0, - is_decommissioning: true, - membership: NodeMembership::Decommissioned, - is_draining: false, - notes: vec![ - "No more data reported on target nodes. \ - Please verify cluster health before removing the nodes." - .to_string(), - ], - }; - - let statuses = - NodeDecommission::parse_from_csv(input).expect("parsed input"); - assert_eq!(statuses, expected); - } - - #[test] - fn test_node_decommission_parse_with_unexpected_membership_value() { - let input = - br#"id,is_live,replicas,is_decommissioning,membership,is_draining -6,false,0,true,foobar,false"#; - let expected = NodeDecommission { - node_id: "6".to_string(), - is_live: false, - replicas: 0, - is_decommissioning: true, - membership: NodeMembership::Unknown { value: "foobar".to_string() }, - is_draining: false, - notes: vec![], - }; - - let statuses = - NodeDecommission::parse_from_csv(input).expect("parsed input"); - assert_eq!(statuses, expected); - } - - // TODO: the proptests below should probably be fuzz targets instead to - // allow for guided fuzzing. - - #[proptest] - fn node_status_parse_doesnt_panic_on_arbitrary_input(input: Vec) { - _ = NodeStatus::parse_from_csv(&input); - } - - #[proptest] - fn node_decommission_parse_doesnt_panic_on_arbitrary_input(input: Vec) { - _ = NodeDecommission::parse_from_csv(&input); - } -} +pub mod node; diff --git a/cockroach-admin/types/src/node.rs b/cockroach-admin/types/src/node.rs new file mode 100644 index 00000000000..eb58052e21a --- /dev/null +++ b/cockroach-admin/types/src/node.rs @@ -0,0 +1,100 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Node-related types for the CockroachDB Admin API. + +pub use cockroach_admin_types_versions::latest::node::*; + +/// CockroachDB Node ID (internal representation) +/// +/// This field is stored internally as a String to avoid questions +/// about size, signedness, etc - it can be treated as an arbitrary +/// unique identifier. +/// +/// Note: This is an internal type used for CLI parsing and internal +/// storage, distinct from the published API `NodeId` type in the +/// versions crate which has a different structure. +#[derive( + Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize, +)] +pub struct InternalNodeId(pub String); + +impl InternalNodeId { + pub fn new(id: String) -> Self { + Self(id) + } + + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl std::fmt::Display for InternalNodeId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.0) + } +} + +impl std::str::FromStr for InternalNodeId { + type Err = std::convert::Infallible; + + fn from_str(s: &str) -> Result { + Ok(Self(s.to_string())) + } +} + +// When parsing the underlying InternalNodeId, we force it to be interpreted +// as a String. Without this custom Deserialize implementation, we +// encounter parsing errors when querying endpoints which return the +// node ID as an integer. +impl<'de> serde::Deserialize<'de> for InternalNodeId { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + use serde::de::{Error, Visitor}; + use std::fmt; + + struct InternalNodeIdVisitor; + + impl<'de> Visitor<'de> for InternalNodeIdVisitor { + type Value = InternalNodeId; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter + .write_str("a string or integer representing a node ID") + } + + fn visit_str(self, value: &str) -> Result + where + E: Error, + { + Ok(InternalNodeId(value.to_string())) + } + + fn visit_string(self, value: String) -> Result + where + E: Error, + { + Ok(InternalNodeId(value)) + } + + fn visit_i64(self, value: i64) -> Result + where + E: Error, + { + Ok(InternalNodeId(value.to_string())) + } + + fn visit_u64(self, value: u64) -> Result + where + E: Error, + { + Ok(InternalNodeId(value.to_string())) + } + } + + deserializer.deserialize_any(InternalNodeIdVisitor) + } +} diff --git a/cockroach-admin/types/versions/Cargo.toml b/cockroach-admin/types/versions/Cargo.toml new file mode 100644 index 00000000000..61efb575d77 --- /dev/null +++ b/cockroach-admin/types/versions/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "cockroach-admin-types-versions" +version = "0.1.0" +edition.workspace = true +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +chrono.workspace = true +csv.workspace = true +omicron-uuid-kinds.workspace = true +omicron-workspace-hack.workspace = true +schemars.workspace = true +serde.workspace = true +thiserror.workspace = true + +[dev-dependencies] +proptest.workspace = true +test-strategy.workspace = true diff --git a/cockroach-admin/types/versions/src/impls/mod.rs b/cockroach-admin/types/versions/src/impls/mod.rs new file mode 100644 index 00000000000..7d609941186 --- /dev/null +++ b/cockroach-admin/types/versions/src/impls/mod.rs @@ -0,0 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Functional code for the latest versions of types. + +pub(crate) mod node; diff --git a/cockroach-admin/types/versions/src/impls/node.rs b/cockroach-admin/types/versions/src/impls/node.rs new file mode 100644 index 00000000000..885539ff6ac --- /dev/null +++ b/cockroach-admin/types/versions/src/impls/node.rs @@ -0,0 +1,546 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Functional code for node-related types. + +use crate::latest::node::{NodeDecommission, NodeMembership, NodeStatus}; +use chrono::{DateTime, NaiveDateTime, Utc}; +use csv::StringRecord; +use serde::de; +use std::io; + +#[derive(Debug, thiserror::Error)] +pub enum ParseError { + #[error("failed to parse `cockroach node status` output")] + NodeStatus(#[from] NodeStatusError), + #[error("failed to parse `cockroach decommission` output")] + Decommission(#[from] DecommissionError), +} + +#[derive(Debug, thiserror::Error)] +pub enum NodeStatusError { + #[error("missing `membership` header (found: {0:?})")] + MissingMembershipHeader(StringRecord), + #[error("failed to parse header row")] + ParseHeaderRow(#[source] csv::Error), + #[error("failed to parse record row")] + ParseRecordRow(#[source] csv::Error), + #[error("fewer fields than expected in status row: {0:?}")] + StatusRowMissingFields(StringRecord), + #[error("failed to parse node status row {row:?}")] + ParseStatusRow { + row: StringRecord, + #[source] + err: csv::Error, + }, +} + +#[derive(Debug, thiserror::Error)] +pub enum DecommissionError { + #[error("missing output row after headers")] + MissingOutputRow, + #[error("failed to parse row")] + ParseRow(#[from] csv::Error), +} + +impl NodeStatus { + pub fn parse_from_csv(data: &[u8]) -> Result, ParseError> { + let mut statuses = Vec::new(); + let mut reader = csv::Reader::from_reader(io::Cursor::new(data)); + + // We can't naively deserialize every record as a `CliNodeStatus` + // directly, because the `node status --all` flag to get all details + // also causes cockroach to emit statuses for decommissioned nodes, + // which report `NULL` for most fields. For now, we want to skip + // decommissioned nodes entirely, so we'll parse each record + // individually after checking first for whether it's decommissioned. + let headers = + reader.headers().map_err(NodeStatusError::ParseHeaderRow)?.clone(); + let Some(membership_idx) = + headers.iter().position(|h| h == "membership") + else { + return Err( + NodeStatusError::MissingMembershipHeader(headers).into() + ); + }; + + for row in reader.into_records() { + let row = row.map_err(NodeStatusError::ParseRecordRow)?; + + // Skip decommissioned nodes without attempting to parse them + // further, as noted above + let Some(membership) = row.get(membership_idx) else { + return Err(NodeStatusError::StatusRowMissingFields(row).into()); + }; + if membership == "decommissioned" { + continue; + } + + let record: CliNodeStatus = + row.deserialize(Some(&headers)).map_err(|err| { + NodeStatusError::ParseStatusRow { row: row.clone(), err } + })?; + statuses.push(record.into()); + } + + Ok(statuses) + } +} + +// Slightly different `NodeStatus` that matches what we get from `cockroach`: +// timestamps are a fixed format with no timezone (but are actually UTC), so we +// have a custom deserializer, and the ID column is `id` instead of `node_id`. +#[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize)] +struct CliNodeStatus { + id: String, + address: std::net::SocketAddr, + sql_address: std::net::SocketAddr, + build: String, + #[serde(deserialize_with = "parse_cockroach_cli_timestamp")] + started_at: DateTime, + #[serde(deserialize_with = "parse_cockroach_cli_timestamp")] + updated_at: DateTime, + locality: String, + is_available: bool, + is_live: bool, + replicas_leaders: i64, + replicas_leaseholders: i64, + ranges: i64, + ranges_unavailable: i64, + ranges_underreplicated: i64, + live_bytes: i64, + key_bytes: i64, + value_bytes: i64, + intent_bytes: i64, + system_bytes: i64, + gossiped_replicas: i64, + is_decommissioning: bool, + membership: String, + is_draining: bool, +} + +impl From for NodeStatus { + fn from(cli: CliNodeStatus) -> Self { + Self { + node_id: cli.id, + address: cli.address, + sql_address: cli.sql_address, + build: cli.build, + started_at: cli.started_at, + updated_at: cli.updated_at, + locality: cli.locality, + is_available: cli.is_available, + is_live: cli.is_live, + replicas_leaders: cli.replicas_leaders, + replicas_leaseholders: cli.replicas_leaseholders, + ranges: cli.ranges, + ranges_unavailable: cli.ranges_unavailable, + ranges_underreplicated: cli.ranges_underreplicated, + live_bytes: cli.live_bytes, + key_bytes: cli.key_bytes, + value_bytes: cli.value_bytes, + intent_bytes: cli.intent_bytes, + system_bytes: cli.system_bytes, + gossiped_replicas: cli.gossiped_replicas, + is_decommissioning: cli.is_decommissioning, + membership: cli.membership, + is_draining: cli.is_draining, + } + } +} + +fn parse_cockroach_cli_timestamp<'de, D>( + d: D, +) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + struct CockroachTimestampVisitor; + impl de::Visitor<'_> for CockroachTimestampVisitor { + type Value = DateTime; + + fn expecting( + &self, + formatter: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + formatter.write_str("a Cockroach CLI timestamp") + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + let dt = NaiveDateTime::parse_from_str(v, "%Y-%m-%d %H:%M:%S%.f") + .map_err(E::custom)?; + Ok(DateTime::from_naive_utc_and_offset(dt, Utc)) + } + } + + d.deserialize_str(CockroachTimestampVisitor) +} + +impl NodeDecommission { + pub fn parse_from_csv(data: &[u8]) -> Result { + // Reading the node decommission output is awkward because it isn't + // fully CSV. We expect a CSV header, then a row for each node being + // decommissioned, then (maybe) a blank line followed by a note that is + // just a string, not related to the initial CSV data. Even though the + // CLI supports decommissioning more than one node in one invocation, we + // only provide an API to decommission a single node, so we expect: + // + // 1. The CSV header line + // 2. The one row of CSV data + // 3. Trailing notes + // + // We'll collect the notes as a separate field and return them to our + // caller. + + // First we'll run the data through a csv::Reader; this will pull out + // the header row and the one row of data. + let mut reader = csv::Reader::from_reader(io::Cursor::new(data)); + let record: CliNodeDecommission = reader + .deserialize() + .next() + .ok_or_else(|| DecommissionError::MissingOutputRow)? + .map_err(DecommissionError::ParseRow)?; + + // Get the position where the reader ended after that one row; we'll + // collect any remaining nonempty lines as `notes`. + let extra_data = &data[reader.position().byte() as usize..]; + let mut notes = Vec::new(); + for line in String::from_utf8_lossy(extra_data).lines() { + let line = line.trim(); + if !line.is_empty() { + notes.push(line.to_string()); + } + } + + Ok(Self::from((record, notes))) + } +} + +// Slightly different `NodeDecommission` that matches what we get from +// `cockroach`: this omites `notes`, which isn't really a CSV field at all, but +// is instead where we collect the non-CSV string output from the CLI, uses +// a custom deserializer for `membership` to handle unknown variants, and the ID +// column is `id` instead of `node_id`. +#[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize)] +struct CliNodeDecommission { + pub id: String, + pub is_live: bool, + pub replicas: i64, + pub is_decommissioning: bool, + #[serde(deserialize_with = "parse_node_membership")] + pub membership: NodeMembership, + pub is_draining: bool, +} + +impl From<(CliNodeDecommission, Vec)> for NodeDecommission { + fn from((cli, notes): (CliNodeDecommission, Vec)) -> Self { + Self { + node_id: cli.id, + is_live: cli.is_live, + replicas: cli.replicas, + is_decommissioning: cli.is_decommissioning, + membership: cli.membership, + is_draining: cli.is_draining, + notes, + } + } +} + +fn parse_node_membership<'de, D>(d: D) -> Result +where + D: serde::Deserializer<'de>, +{ + struct CockroachNodeMembershipVisitor; + + impl de::Visitor<'_> for CockroachNodeMembershipVisitor { + type Value = NodeMembership; + + fn expecting( + &self, + formatter: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + formatter.write_str("a Cockroach node membership string") + } + + fn visit_str(self, v: &str) -> Result + where + E: de::Error, + { + let membership = match v { + "active" => NodeMembership::Active, + "decommissioning" => NodeMembership::Decommissioning, + "decommissioned" => NodeMembership::Decommissioned, + _ => NodeMembership::Unknown { value: v.to_string() }, + }; + Ok(membership) + } + } + + d.deserialize_str(CockroachNodeMembershipVisitor) +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::NaiveDate; + use test_strategy::proptest; + + #[test] + fn test_node_status_parse_single_line_from_csv() { + let input = br#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live,replicas_leaders,replicas_leaseholders,ranges,ranges_unavailable,ranges_underreplicated,live_bytes,key_bytes,value_bytes,intent_bytes,system_bytes,gossiped_replicas,is_decommissioning,membership,is_draining +5,[fd00:1122:3344:103::3]:32221,[fd00:1122:3344:103::3]:32221,v22.1.22-29-g865aff1595,2025-05-30 21:10:30.527658,2025-06-02 14:00:36.749872,,true,true,38,38,210,0,0,3958791538,846009128,5249950302,0,108083397,210,false,active,false"#; + let expected = NodeStatus { + node_id: "5".to_string(), + address: "[fd00:1122:3344:103::3]:32221".parse().unwrap(), + sql_address: "[fd00:1122:3344:103::3]:32221".parse().unwrap(), + build: "v22.1.22-29-g865aff1595".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2025, 5, 30) + .unwrap() + .and_hms_micro_opt(21, 10, 30, 527658) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2025, 6, 2) + .unwrap() + .and_hms_micro_opt(14, 0, 36, 749872) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + replicas_leaders: 38, + replicas_leaseholders: 38, + ranges: 210, + ranges_unavailable: 0, + ranges_underreplicated: 0, + live_bytes: 3958791538, + key_bytes: 846009128, + value_bytes: 5249950302, + intent_bytes: 0, + system_bytes: 108083397, + gossiped_replicas: 210, + is_decommissioning: false, + membership: "active".to_string(), + is_draining: false, + }; + + let statuses = NodeStatus::parse_from_csv(input).expect("parsed input"); + assert_eq!(statuses, vec![expected]); + } + + #[test] + fn test_node_status_parse_multiple_lines_from_csv() { + let input = br#"id,address,sql_address,build,started_at,updated_at,locality,is_available,is_live,replicas_leaders,replicas_leaseholders,ranges,ranges_unavailable,ranges_underreplicated,live_bytes,key_bytes,value_bytes,intent_bytes,system_bytes,gossiped_replicas,is_decommissioning,membership,is_draining +1,[fd00:1122:3344:101::3]:32221,[fd00:1122:3344:101::3]:32221,v22.1.22-29-g865aff1595,2025-05-30 21:10:26.237011,2025-06-02 14:05:05.508688,,true,true,41,41,210,0,0,3967748150,846544773,5119261316,34,108060755,210,false,active,false +2,[fd00:1122:3344:102::3]:32221,[fd00:1122:3344:102::3]:32221,v22.1.22-29-g865aff1595,2025-05-30 21:10:30.00501,2025-06-02 14:05:05.090293,,true,true,44,44,210,0,0,3967896249,846559229,5119394994,861,108017090,210,false,active,false +3,NULL,NULL,NULL,NULL,2025-05-30 21:11:45.350419,NULL,false,false,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,true,decommissioned,false +4,NULL,NULL,NULL,NULL,2025-05-30 21:11:45.668157,NULL,false,false,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,true,decommissioned,false +6,[fd00:1122:3344:102::21]:32221,[fd00:1122:3344:102::21]:32221,v22.1.22-29-g865aff1595,2025-05-30 21:10:26.26209,2025-06-02 14:05:05.906022,,true,true,41,41,210,0,0,3967896044,846559229,5119394789,0,108016856,210,false,active,false"#; + let expected = vec![ + NodeStatus { + node_id: "1".to_string(), + address: "[fd00:1122:3344:101::3]:32221".parse().unwrap(), + sql_address: "[fd00:1122:3344:101::3]:32221".parse().unwrap(), + build: "v22.1.22-29-g865aff1595".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2025, 5, 30) + .unwrap() + .and_hms_micro_opt(21, 10, 26, 237011) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2025, 6, 2) + .unwrap() + .and_hms_micro_opt(14, 5, 5, 508688) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + replicas_leaders: 41, + replicas_leaseholders: 41, + ranges: 210, + ranges_unavailable: 0, + ranges_underreplicated: 0, + live_bytes: 3967748150, + key_bytes: 846544773, + value_bytes: 5119261316, + intent_bytes: 34, + system_bytes: 108060755, + gossiped_replicas: 210, + is_decommissioning: false, + membership: "active".to_string(), + is_draining: false, + }, + NodeStatus { + node_id: "2".to_string(), + address: "[fd00:1122:3344:102::3]:32221".parse().unwrap(), + sql_address: "[fd00:1122:3344:102::3]:32221".parse().unwrap(), + build: "v22.1.22-29-g865aff1595".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2025, 5, 30) + .unwrap() + .and_hms_micro_opt(21, 10, 30, 5010) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2025, 6, 2) + .unwrap() + .and_hms_micro_opt(14, 5, 5, 90293) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + replicas_leaders: 44, + replicas_leaseholders: 44, + ranges: 210, + ranges_unavailable: 0, + ranges_underreplicated: 0, + live_bytes: 3967896249, + key_bytes: 846559229, + value_bytes: 5119394994, + intent_bytes: 861, + system_bytes: 108017090, + gossiped_replicas: 210, + is_decommissioning: false, + membership: "active".to_string(), + is_draining: false, + }, + NodeStatus { + node_id: "6".to_string(), + address: "[fd00:1122:3344:102::21]:32221".parse().unwrap(), + sql_address: "[fd00:1122:3344:102::21]:32221".parse().unwrap(), + build: "v22.1.22-29-g865aff1595".to_string(), + started_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2025, 5, 30) + .unwrap() + .and_hms_micro_opt(21, 10, 26, 262090) + .unwrap(), + Utc, + ), + updated_at: DateTime::from_naive_utc_and_offset( + NaiveDate::from_ymd_opt(2025, 6, 2) + .unwrap() + .and_hms_micro_opt(14, 5, 5, 906022) + .unwrap(), + Utc, + ), + locality: String::new(), + is_available: true, + is_live: true, + replicas_leaders: 41, + replicas_leaseholders: 41, + ranges: 210, + ranges_unavailable: 0, + ranges_underreplicated: 0, + live_bytes: 3967896044, + key_bytes: 846559229, + value_bytes: 5119394789, + intent_bytes: 0, + system_bytes: 108016856, + gossiped_replicas: 210, + is_decommissioning: false, + membership: "active".to_string(), + is_draining: false, + }, + ]; + + let statuses = NodeStatus::parse_from_csv(input).expect("parsed input"); + assert_eq!(statuses.len(), expected.len()); + for (status, expected) in statuses.iter().zip(&expected) { + assert_eq!(status, expected); + } + } + + #[test] + fn test_node_decommission_parse_with_no_trailing_notes() { + let input = + br#"id,is_live,replicas,is_decommissioning,membership,is_draining +6,true,24,true,decommissioning,false"#; + let expected = NodeDecommission { + node_id: "6".to_string(), + is_live: true, + replicas: 24, + is_decommissioning: true, + membership: NodeMembership::Decommissioning, + is_draining: false, + notes: vec![], + }; + + let statuses = + NodeDecommission::parse_from_csv(input).expect("parsed input"); + assert_eq!(statuses, expected); + } + + #[test] + fn test_node_decommission_parse_with_trailing_notes() { + let input = + br#"id,is_live,replicas,is_decommissioning,membership,is_draining +6,false,0,true,decommissioned,false + +No more data reported on target nodes. Please verify cluster health before removing the nodes. +"#; + let expected = NodeDecommission { + node_id: "6".to_string(), + is_live: false, + replicas: 0, + is_decommissioning: true, + membership: NodeMembership::Decommissioned, + is_draining: false, + notes: vec![ + "No more data reported on target nodes. \ + Please verify cluster health before removing the nodes." + .to_string(), + ], + }; + + let statuses = + NodeDecommission::parse_from_csv(input).expect("parsed input"); + assert_eq!(statuses, expected); + } + + #[test] + fn test_node_decommission_parse_with_unexpected_membership_value() { + let input = + br#"id,is_live,replicas,is_decommissioning,membership,is_draining +6,false,0,true,foobar,false"#; + let expected = NodeDecommission { + node_id: "6".to_string(), + is_live: false, + replicas: 0, + is_decommissioning: true, + membership: NodeMembership::Unknown { value: "foobar".to_string() }, + is_draining: false, + notes: vec![], + }; + + let statuses = + NodeDecommission::parse_from_csv(input).expect("parsed input"); + assert_eq!(statuses, expected); + } + + // TODO: the proptests below should probably be fuzz targets instead to + // allow for guided fuzzing. + + #[proptest] + fn node_status_parse_doesnt_panic_on_arbitrary_input(input: Vec) { + _ = NodeStatus::parse_from_csv(&input); + } + + #[proptest] + fn node_decommission_parse_doesnt_panic_on_arbitrary_input(input: Vec) { + _ = NodeDecommission::parse_from_csv(&input); + } +} diff --git a/cockroach-admin/types/versions/src/initial/mod.rs b/cockroach-admin/types/versions/src/initial/mod.rs new file mode 100644 index 00000000000..048d6016779 --- /dev/null +++ b/cockroach-admin/types/versions/src/initial/mod.rs @@ -0,0 +1,7 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Version `INITIAL` of the CockroachDB Admin API. + +pub mod node; diff --git a/cockroach-admin/types/versions/src/initial/node.rs b/cockroach-admin/types/versions/src/initial/node.rs new file mode 100644 index 00000000000..daa638bd4d5 --- /dev/null +++ b/cockroach-admin/types/versions/src/initial/node.rs @@ -0,0 +1,98 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Node-related types for the CockroachDB Admin API. + +use chrono::{DateTime, Utc}; +use omicron_uuid_kinds::OmicronZoneUuid; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::net::SocketAddr; + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct ClusterNodeStatus { + pub all_nodes: Vec, +} + +/// CockroachDB Node ID +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct LocalNodeId { + /// The ID of this Omicron zone. + /// + /// This is included to ensure correctness even if a socket address on a + /// sled is reused for a different zone; if our caller is trying to + /// determine the node ID for a particular Omicron CockroachDB zone, they'll + /// contact us by socket address. We include our zone ID in the response for + /// their confirmation that we are the zone they intended to contact. + pub zone_id: OmicronZoneUuid, + // CockroachDB node IDs are integers, in practice, but our use of them is as + // input and output to the `cockroach` CLI. We use a string which is a bit + // more natural (no need to parse CLI output or stringify an ID to send it + // as input) and leaves open the door for the format to change in the + // future. + pub node_id: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct NodeId { + pub node_id: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct NodeStatus { + // TODO use NodeId + pub node_id: String, + pub address: SocketAddr, + pub sql_address: SocketAddr, + pub build: String, + pub started_at: DateTime, + pub updated_at: DateTime, + pub locality: String, + pub is_available: bool, + pub is_live: bool, + pub replicas_leaders: i64, + pub replicas_leaseholders: i64, + pub ranges: i64, + pub ranges_unavailable: i64, + pub ranges_underreplicated: i64, + pub live_bytes: i64, + pub key_bytes: i64, + pub value_bytes: i64, + pub intent_bytes: i64, + pub system_bytes: i64, + pub gossiped_replicas: i64, + pub is_decommissioning: bool, + pub membership: String, + pub is_draining: bool, +} + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct NodeDecommission { + pub node_id: String, + pub is_live: bool, + pub replicas: i64, + pub is_decommissioning: bool, + pub membership: NodeMembership, + pub is_draining: bool, + pub notes: Vec, +} + +// The cockroach CLI and `crdb_internal.gossip_liveness` table use a string for +// node membership, but there are only three meaningful values per +// https://github.com/cockroachdb/cockroach/blob/0c92c710d2baadfdc5475be8d2238cf26cb152ca/pkg/kv/kvserver/liveness/livenesspb/liveness.go#L96, +// so we'll convert into a Rust enum and leave the "unknown" case for future +// changes that expand or reword these values. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +#[serde(tag = "state", rename_all = "lowercase")] +pub enum NodeMembership { + Active, + Decommissioning, + Decommissioned, + Unknown { value: String }, +} diff --git a/cockroach-admin/types/versions/src/latest.rs b/cockroach-admin/types/versions/src/latest.rs new file mode 100644 index 00000000000..0c2fc852b29 --- /dev/null +++ b/cockroach-admin/types/versions/src/latest.rs @@ -0,0 +1,18 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Re-exports of the latest versions of all published types. + +pub mod node { + pub use crate::v1::node::ClusterNodeStatus; + pub use crate::v1::node::LocalNodeId; + pub use crate::v1::node::NodeDecommission; + pub use crate::v1::node::NodeId; + pub use crate::v1::node::NodeMembership; + pub use crate::v1::node::NodeStatus; + + pub use crate::impls::node::DecommissionError; + pub use crate::impls::node::NodeStatusError; + pub use crate::impls::node::ParseError; +} diff --git a/cockroach-admin/types/versions/src/lib.rs b/cockroach-admin/types/versions/src/lib.rs new file mode 100644 index 00000000000..0e8af5675ef --- /dev/null +++ b/cockroach-admin/types/versions/src/lib.rs @@ -0,0 +1,35 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Versioned types for the CockroachDB Admin API. +//! +//! # Adding a new API version +//! +//! When adding a new API version N with added or changed types: +//! +//! 1. Create `/mod.rs`, where `` is the lowercase +//! form of the new version's identifier, as defined in the API trait's +//! `api_versions!` macro. +//! +//! 2. Add to the end of this list: +//! +//! ```rust,ignore +//! #[path = "/mod.rs"] +//! pub mod vN; +//! ``` +//! +//! 3. Add your types to the new module, mirroring the module structure from +//! earlier versions. +//! +//! 4. Update `latest.rs` with new and updated types from the new version. +//! +//! For more information, see the [detailed guide] and [RFD 619]. +//! +//! [detailed guide]: https://github.com/oxidecomputer/dropshot-api-manager/blob/main/guides/new-version.md +//! [RFD 619]: https://rfd.shared.oxide.computer/rfd/619 + +mod impls; +pub mod latest; +#[path = "initial/mod.rs"] +pub mod v1; diff --git a/cockroach-metrics/src/lib.rs b/cockroach-metrics/src/lib.rs index 8fc03e91365..8cb6a68b290 100644 --- a/cockroach-metrics/src/lib.rs +++ b/cockroach-metrics/src/lib.rs @@ -10,7 +10,7 @@ use anyhow::{Context, Result}; use chrono::{DateTime, Utc}; use cockroach_admin_client::Client; -use cockroach_admin_types::NodeId; +use cockroach_admin_types::node::InternalNodeId; use futures::stream::{FuturesUnordered, StreamExt}; use parallel_task_set::ParallelTaskSet; use serde::{Deserialize, Serialize}; @@ -307,7 +307,7 @@ impl CockroachClusterAdminClient { /// Fetch Prometheus metrics from all backends, returning all successful results pub async fn fetch_prometheus_metrics_from_all_nodes( &self, - ) -> Vec<(NodeId, PrometheusMetrics)> { + ) -> Vec<(InternalNodeId, PrometheusMetrics)> { let clients = self.clients.read().await; if clients.is_empty() { @@ -316,7 +316,7 @@ impl CockroachClusterAdminClient { // Collect tasks from all nodes in parallel let mut results: Vec< - Result<(NodeId, PrometheusMetrics), anyhow::Error>, + Result<(InternalNodeId, PrometheusMetrics), anyhow::Error>, > = Vec::new(); let mut tasks = ParallelTaskSet::new(); for (addr, client) in clients.iter() { @@ -332,7 +332,7 @@ impl CockroachClusterAdminClient { .with_context(|| { format!("Failed to get node ID from {}", addr,) })?; - let node_id = NodeId::new(node_id_string); + let node_id = InternalNodeId::new(node_id_string); // Then fetch the metrics let metrics = client @@ -386,7 +386,7 @@ impl CockroachClusterAdminClient { /// Fetch node status from all backends, returning all successful results pub async fn fetch_node_status_from_all_nodes( &self, - ) -> Vec<(NodeId, NodesResponse)> { + ) -> Vec<(InternalNodeId, NodesResponse)> { let clients = self.clients.read().await; if clients.is_empty() { @@ -394,8 +394,9 @@ impl CockroachClusterAdminClient { } // Create futures for all requests - let mut results: Vec> = - Vec::new(); + let mut results: Vec< + Result<(InternalNodeId, NodesResponse), anyhow::Error>, + > = Vec::new(); let mut tasks = ParallelTaskSet::new(); for (addr, client) in clients.iter() { let addr = *addr; @@ -410,7 +411,7 @@ impl CockroachClusterAdminClient { .with_context(|| { format!("Failed to get node ID from {}", addr,) })?; - let node_id = NodeId::new(node_id_string); + let node_id = InternalNodeId::new(node_id_string); // Then fetch the node status let status = @@ -844,7 +845,8 @@ pub struct NodesResponse { pub nodes: Vec, /// Maps node ID to liveness status #[serde(rename = "livenessByNodeId")] - pub liveness_by_node_id: std::collections::BTreeMap, + pub liveness_by_node_id: + std::collections::BTreeMap, } /// Node status information from CockroachDB /_status/nodes endpoint @@ -870,7 +872,7 @@ pub struct NodeStatus { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct NodeDescriptor { #[serde(rename = "nodeId")] - pub node_id: NodeId, + pub node_id: InternalNodeId, #[serde(rename = "address")] pub address: AddressInfo, diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index fba0df6b6a5..fa6703df47f 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -2920,7 +2920,7 @@ pub struct InvCockroachStatus { impl InvCockroachStatus { pub fn new( inv_collection_id: CollectionUuid, - node_id: cockroach_admin_types::NodeId, + node_id: cockroach_admin_types::node::InternalNodeId, status: &CockroachStatus, ) -> Result { Ok(Self { diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index d71f3ee0938..dd208d8de07 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -12,7 +12,7 @@ use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use async_bb8_diesel::AsyncSimpleConnection; use clickhouse_admin_types::ClickhouseKeeperClusterMembership; -use cockroach_admin_types::NodeId as CockroachNodeId; +use cockroach_admin_types::node::InternalNodeId as CockroachNodeId; use diesel::BoolExpressionMethods; use diesel::ExpressionMethods; use diesel::IntoSql; diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index 83b893657a4..89af588d843 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -13,7 +13,7 @@ use anyhow::anyhow; use chrono::DateTime; use chrono::Utc; use clickhouse_admin_types::ClickhouseKeeperClusterMembership; -use cockroach_admin_types::NodeId; +use cockroach_admin_types::node::InternalNodeId; use gateway_client::types::SpComponentCaboose; use gateway_client::types::SpState; use iddqd::IdOrdMap; @@ -128,7 +128,7 @@ pub struct CollectionBuilder { sleds: IdOrdMap, clickhouse_keeper_cluster_membership: BTreeSet, - cockroach_status: BTreeMap, + cockroach_status: BTreeMap, ntp_timesync: IdOrdMap, internal_dns_generation_status: IdOrdMap, // CollectionBuilderRng is taken by value, rather than passed in as a @@ -708,7 +708,7 @@ impl CollectionBuilder { /// Record metrics from a CockroachDB node pub fn found_cockroach_metrics( &mut self, - node_id: NodeId, + node_id: InternalNodeId, metrics: PrometheusMetrics, ) { let mut status = CockroachStatus::default(); diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index 9ad78cfbe13..ae49ecbbeed 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -672,7 +672,7 @@ pub fn representative() -> Representative { ); builder.found_cockroach_metrics( - cockroach_admin_types::NodeId::new("1".to_string()), + cockroach_admin_types::node::InternalNodeId::new("1".to_string()), PrometheusMetrics { metrics: BTreeMap::from([( "ranges_underreplicated".to_string(), diff --git a/nexus/reconfigurator/planning/tests/integration_tests/planner.rs b/nexus/reconfigurator/planning/tests/integration_tests/planner.rs index b9a06e34100..89739407faf 100644 --- a/nexus/reconfigurator/planning/tests/integration_tests/planner.rs +++ b/nexus/reconfigurator/planning/tests/integration_tests/planner.rs @@ -3571,7 +3571,7 @@ fn test_update_cockroach() { let mut result = BTreeMap::new(); for i in 1..=COCKROACHDB_REDUNDANCY { result.insert( - cockroach_admin_types::NodeId(i.to_string()), + cockroach_admin_types::node::InternalNodeId(i.to_string()), CockroachStatus { ranges_underreplicated: Some(0), liveness_live_nodes: Some(GOAL_REDUNDANCY), diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 34614d545e9..ec27089ef56 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -175,7 +175,7 @@ pub struct Collection { /// The status of our cockroachdb cluster, keyed by node identifier pub cockroach_status: - BTreeMap, + BTreeMap, /// The status of time synchronization pub ntp_timesync: IdOrdMap,