Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ use nexus_types::internal_api::background::BlueprintPlannerStatus;
use nexus_types::internal_api::background::BlueprintRendezvousStatus;
use nexus_types::internal_api::background::DatasetsRendezvousStats;
use nexus_types::internal_api::background::EreporterStatus;
use nexus_types::internal_api::background::FmAlertStats;
use nexus_types::internal_api::background::FmRendezvousStatus;
use nexus_types::internal_api::background::InstanceReincarnationStatus;
use nexus_types::internal_api::background::InstanceUpdaterStatus;
use nexus_types::internal_api::background::InventoryLoadStatus;
Expand Down Expand Up @@ -1250,6 +1252,9 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
"fm_sitrep_gc" => {
print_task_fm_sitrep_gc(details);
}
"fm_rendezvous" => {
print_task_fm_rendezvous(details);
}
_ => {
println!(
"warning: unknown background task: {:?} \
Expand Down Expand Up @@ -3243,6 +3248,64 @@ fn print_task_fm_sitrep_gc(details: &serde_json::Value) {
);
}

fn print_task_fm_rendezvous(details: &serde_json::Value) {
match serde_json::from_value::<FmRendezvousStatus>(details.clone()) {
Err(error) => {
eprintln!(
"warning: failed to interpret task details: {:?}: {:?}",
error, details
);
return;
}
Ok(FmRendezvousStatus::NoSitrep) => {
println!(" no FM situation report loaded");
}
Ok(FmRendezvousStatus::Executed { sitrep_id, alerts }) => {
println!(" current sitrep: {sitrep_id}");
display_fm_alert_stats(&alerts);
}
}
}

fn display_fm_alert_stats(stats: &FmAlertStats) {
let FmAlertStats {
total_alerts_requested,
current_sitrep_alerts_requested,
alerts_created,
errors,
} = stats;
let already_created =
total_alerts_requested - alerts_created - errors.len();
pub const REQUESTED: &str = "alerts requested:";
pub const REQUESTED_THIS_SITREP: &str = " requested in this sitrep:";
pub const CREATED: &str = " created in this activation:";
pub const ALREADY_CREATED: &str = " already created:";
pub const ERRORS: &str = " errors:";
pub const WIDTH: usize = const_max_len(&[
REQUESTED,
REQUESTED_THIS_SITREP,
CREATED,
ALREADY_CREATED,
ERRORS,
]) + 1;
pub const NUM_WIDTH: usize = 4;
println!(" {REQUESTED:<WIDTH$}{total_alerts_requested:>NUM_WIDTH$}");
println!(
" {REQUESTED_THIS_SITREP:<WIDTH$}{:>NUM_WIDTH$}",
current_sitrep_alerts_requested
);
println!(" {CREATED:<WIDTH$}{alerts_created:>NUM_WIDTH$}");
println!(" {ALREADY_CREATED:<WIDTH$}{already_created:>NUM_WIDTH$}");
println!(
"{} {ERRORS:<WIDTH$}{:>NUM_WIDTH$}",
warn_if_nonzero(errors.len()),
errors.len()
);
for error in errors {
println!(" > {error}");
}
}

const ERRICON: &str = "/!\\";

fn warn_if_nonzero(n: usize) -> &'static str {
Expand Down
15 changes: 15 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ task: "external_endpoints"
on each one


task: "fm_rendezvous"
updates externally visible database tables to match the current fault
management sitrep


task: "fm_sitrep_gc"
garbage collects fault management situation reports

Expand Down Expand Up @@ -332,6 +337,11 @@ task: "external_endpoints"
on each one


task: "fm_rendezvous"
updates externally visible database tables to match the current fault
management sitrep


task: "fm_sitrep_gc"
garbage collects fault management situation reports

Expand Down Expand Up @@ -552,6 +562,11 @@ task: "external_endpoints"
on each one


task: "fm_rendezvous"
updates externally visible database tables to match the current fault
management sitrep


task: "fm_sitrep_gc"
garbage collects fault management situation reports

Expand Down
17 changes: 17 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,11 @@ task: "external_endpoints"
on each one


task: "fm_rendezvous"
updates externally visible database tables to match the current fault
management sitrep


task: "fm_sitrep_gc"
garbage collects fault management situation reports

Expand Down Expand Up @@ -632,6 +637,12 @@ task: "external_endpoints"

TLS certificates: 0

task: "fm_rendezvous"
configured period: every <REDACTED_DURATION>m
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
no FM situation report loaded

task: "fm_sitrep_gc"
configured period: every <REDACTED_DURATION>s
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
Expand Down Expand Up @@ -1200,6 +1211,12 @@ task: "external_endpoints"

TLS certificates: 0

task: "fm_rendezvous"
configured period: every <REDACTED_DURATION>m
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
no FM situation report loaded

task: "fm_sitrep_gc"
configured period: every <REDACTED_DURATION>s
last completed activation: <REDACTED ITERATIONS>, triggered by <TRIGGERED_BY_REDACTED>
Expand Down
11 changes: 11 additions & 0 deletions nexus-config/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,11 @@ pub struct FmTasksConfig {
/// garbage collects unneeded fault management sitreps in the database.
#[serde_as(as = "DurationSeconds<u64>")]
pub sitrep_gc_period_secs: Duration,
/// period (in seconds) for periodic activations of the background task that
/// updates externally-visible database tables to match the current situation
/// report.
#[serde_as(as = "DurationSeconds<u64>")]
pub rendezvous_period_secs: Duration,
}

impl Default for FmTasksConfig {
Expand All @@ -940,6 +945,9 @@ impl Default for FmTasksConfig {
// time the current sitrep changes, and activating it more
// frequently won't make things more responsive.
sitrep_gc_period_secs: Duration::from_secs(600),
// This, too, is activated whenever a new sitrep is loaded, so we
// need not set the periodic activation interval too high.
rendezvous_period_secs: Duration::from_secs(300),
}
}
}
Expand Down Expand Up @@ -1281,6 +1289,7 @@ mod test {
fm.sitrep_gc_period_secs = 49
probe_distributor.period_secs = 50
multicast_reconciler.period_secs = 60
fm.rendezvous_period_secs = 51
[default_region_allocation_strategy]
type = "random"
seed = 0
Expand Down Expand Up @@ -1529,6 +1538,7 @@ mod test {
fm: FmTasksConfig {
sitrep_load_period_secs: Duration::from_secs(48),
sitrep_gc_period_secs: Duration::from_secs(49),
rendezvous_period_secs: Duration::from_secs(51),
},
probe_distributor: ProbeDistributorConfig {
period_secs: Duration::from_secs(50),
Expand Down Expand Up @@ -1640,6 +1650,7 @@ mod test {
fm.sitrep_load_period_secs = 45
fm.sitrep_gc_period_secs = 46
probe_distributor.period_secs = 47
fm.rendezvous_period_secs = 48
multicast_reconciler.period_secs = 60

[default_region_allocation_strategy]
Expand Down
1 change: 1 addition & 0 deletions nexus/background-task-interface/src/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ pub struct BackgroundTasks {
pub task_webhook_deliverator: Activator,
pub task_sp_ereport_ingester: Activator,
pub task_reconfigurator_config_loader: Activator,
pub task_fm_rendezvous: Activator,
pub task_fm_sitrep_loader: Activator,
pub task_fm_sitrep_gc: Activator,
pub task_probe_distributor: Activator,
Expand Down
Loading
Loading