Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
6570853
all_omicron_zones() -> danger_all_omicron_zones()
jgallagher Dec 11, 2025
0ea816b
add in_service_zones()
jgallagher Dec 11, 2025
9d62653
add expunged_zones()
jgallagher Dec 11, 2025
42de0a4
pruning more danger_all_omicron_zones() calls
jgallagher Dec 11, 2025
0a8b08f
split up expunged_zones()
jgallagher Dec 12, 2025
89f4ab2
break up `all_nexus_zones()`
jgallagher Dec 12, 2025
db9769e
break up `all_nexus_zones()`
jgallagher Dec 12, 2025
cb9d939
fixup after rebase
jgallagher Dec 12, 2025
91b7163
remove more danger_all_omicron_zones() uses
jgallagher Dec 15, 2025
fbf8663
add clickhouse IP expunge zone access reason
jgallagher Dec 15, 2025
5a5b079
combine two expunged_zones methods into one with a `ReadyForCleanup` arg
jgallagher Dec 15, 2025
11ce5b4
cargo fmt
jgallagher Dec 15, 2025
05c81a9
last danger_all_omicron_zones() external caller
jgallagher Dec 15, 2025
e9c005b
unused imports
jgallagher Dec 15, 2025
5e716e3
remove more internal danger_all_omicron_zones() calls
jgallagher Dec 15, 2025
62241ab
cleanup
jgallagher Dec 15, 2025
c2f79f2
documentation
jgallagher Dec 15, 2025
38a8bca
cargo fmt
jgallagher Dec 15, 2025
08531e2
rename
jgallagher Dec 15, 2025
baf0391
rename ReadyForCleanup -> ZoneRunningStatus
jgallagher Dec 16, 2025
4203003
add Blueprint::all_in_service_and_expunged_zones()
jgallagher Dec 16, 2025
940ed08
fix erroneous behavior change to `Blueprint::find_generation_for_self()`
jgallagher Dec 16, 2025
1f87152
typos
jgallagher Dec 16, 2025
4195e4f
Merge remote-tracking branch 'origin/main' into john/blueprint-expung…
jgallagher Dec 16, 2025
12cfe41
unused imports
jgallagher Dec 16, 2025
7f511d6
remove `reason` from `all_maybe_running_zones()`
jgallagher Dec 16, 2025
e217b5a
Merge remote-tracking branch 'origin/main' into john/blueprint-expung…
jgallagher Dec 16, 2025
4423eb6
Merge branch 'john/split-up-execution-cleanup' into john/blueprint-ex…
jgallagher Dec 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 14 additions & 10 deletions dev-tools/omdb/src/bin/omdb/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ use nexus_db_queries::db::pagination::paginated;
use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL;
use nexus_db_queries::db::queries::region_allocation;
use nexus_types::deployment::Blueprint;
use nexus_types::deployment::BlueprintExpungedZoneAccessReason;
use nexus_types::deployment::BlueprintZoneDisposition;
use nexus_types::deployment::BlueprintZoneType;
use nexus_types::deployment::DiskFilter;
Expand Down Expand Up @@ -1622,16 +1623,19 @@ async fn lookup_service_info(
service_id: Uuid,
blueprint: &Blueprint,
) -> anyhow::Result<Option<ServiceInfo>> {
let Some(zone_config) = blueprint
.all_omicron_zones(BlueprintZoneDisposition::any)
.find_map(|(_sled_id, zone_config)| {
if zone_config.id.into_untyped_uuid() == service_id {
Some(zone_config)
} else {
None
}
})
else {
// We don't know anything about `service_id`; it may be in-service or it may
// be expunged. Check all the zone states.
let mut all_zones = blueprint.all_in_service_and_expunged_zones(
BlueprintExpungedZoneAccessReason::Omdb,
);

let Some(zone_config) = all_zones.find_map(|(_sled_id, zone_config)| {
if zone_config.id.into_untyped_uuid() == service_id {
Some(zone_config)
} else {
None
}
}) else {
return Ok(None);
};

Expand Down
5 changes: 2 additions & 3 deletions dev-tools/omdb/src/bin/omdb/db/db_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ use nexus_db_model::DbMetadataNexusState;
use nexus_db_queries::context::OpContext;
use nexus_db_queries::db::DataStore;
use nexus_types::deployment::Blueprint;
use nexus_types::deployment::BlueprintZoneDisposition;
use omicron_common::api::external::Generation;
use omicron_uuid_kinds::BlueprintUuid;
use omicron_uuid_kinds::OmicronZoneUuid;
Expand Down Expand Up @@ -133,7 +132,7 @@ async fn get_db_metadata_nexus_rows(
];

let nexus_generation_by_zone = blueprint
.all_nexus_zones(BlueprintZoneDisposition::is_in_service)
.in_service_nexus_zones()
.map(|(_, zone, nexus_zone)| (zone.id, nexus_zone.nexus_generation))
.collect::<BTreeMap<_, _>>();

Expand Down Expand Up @@ -213,7 +212,7 @@ pub async fn cmd_db_metadata_force_mark_nexus_quiesced(
.await
.context("loading current target blueprint")?;
let nexus_generation = current_target_blueprint
.all_nexus_zones(BlueprintZoneDisposition::is_in_service)
.in_service_nexus_zones()
.find_map(|(_, zone, nexus_zone)| {
if zone.id == args.id {
Some(nexus_zone.nexus_generation)
Expand Down
29 changes: 9 additions & 20 deletions dev-tools/reconfigurator-cli/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,7 @@ impl ReconfiguratorSim {
builder.set_external_dns_version(parent_blueprint.external_dns_version);

// Handle zone networking setup first
for (_, zone) in parent_blueprint
.all_omicron_zones(BlueprintZoneDisposition::is_in_service)
{
for (_, zone) in parent_blueprint.in_service_zones() {
if let Some((external_ip, nic)) =
zone.zone_type.external_networking()
{
Expand Down Expand Up @@ -213,9 +211,7 @@ impl ReconfiguratorSim {
let active_nexus_gen =
state.config().active_nexus_zone_generation();
let mut active_nexus_zones = BTreeSet::new();
for (_, zone, nexus) in parent_blueprint
.all_nexus_zones(BlueprintZoneDisposition::is_in_service)
{
for (_, zone, nexus) in parent_blueprint.in_service_nexus_zones() {
if nexus.nexus_generation == active_nexus_gen {
active_nexus_zones.insert(zone.id);
}
Expand All @@ -232,9 +228,7 @@ impl ReconfiguratorSim {
let active_nexus_gen =
state.config().active_nexus_zone_generation();
let mut not_yet_nexus_zones = BTreeSet::new();
for (_, zone) in parent_blueprint
.all_omicron_zones(BlueprintZoneDisposition::is_in_service)
{
for (_, zone) in parent_blueprint.in_service_zones() {
match &zone.zone_type {
nexus_types::deployment::BlueprintZoneType::Nexus(
nexus,
Expand Down Expand Up @@ -2493,17 +2487,12 @@ fn cmd_blueprint_edit(
}
BlueprintEditCommands::BumpNexusGeneration => {
let current_generation = builder.nexus_generation();
let current_max = blueprint
.all_nexus_zones(BlueprintZoneDisposition::is_in_service)
.fold(
current_generation,
|current_max, (_sled_id, _zone_config, nexus_config)| {
std::cmp::max(
nexus_config.nexus_generation,
current_max,
)
},
);
let current_max = blueprint.in_service_nexus_zones().fold(
current_generation,
|current_max, (_sled_id, _zone_config, nexus_config)| {
std::cmp::max(nexus_config.nexus_generation, current_max)
},
);
ensure!(
current_max > current_generation,
"cannot bump blueprint generation (currently \
Expand Down
14 changes: 11 additions & 3 deletions live-tests/tests/test_nexus_add_remove.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ use nexus_reconfigurator_planning::blueprint_editor::ExternalNetworkingAllocator
use nexus_reconfigurator_planning::planner::Planner;
use nexus_reconfigurator_planning::planner::PlannerRng;
use nexus_reconfigurator_preparation::PlanningInputFromDb;
use nexus_types::deployment::BlueprintExpungedZoneAccessReason;
use nexus_types::deployment::BlueprintZoneDisposition;
use nexus_types::deployment::BlueprintZoneType;
use nexus_types::deployment::PlannerConfig;
use nexus_types::deployment::SledFilter;
use nexus_types::deployment::ZoneRunningStatus;
use nexus_types::deployment::blueprint_zone_type;
use omicron_common::address::NEXUS_LOCKSTEP_PORT;
use omicron_test_utils::dev::poll::CondCheckError;
Expand Down Expand Up @@ -207,15 +209,18 @@ async fn test_nexus_add_remove(lc: &LiveTestContext) {
.await
.expect("editing blueprint to expunge zone");
let (_, expunged_zone_config) = blueprint3
.all_omicron_zones(|_| true)
.expunged_zones(
ZoneRunningStatus::MaybeRunning,
BlueprintExpungedZoneAccessReason::Test,
)
.find(|(_sled_id, zone_config)| zone_config.id == new_zone.id)
.expect("expunged zone in new blueprint");
let BlueprintZoneDisposition::Expunged {
as_of_generation: expunged_generation,
..
} = expunged_zone_config.disposition
else {
panic!("expected expunged zone to have disposition Expunged");
unreachable!("expunged_zones() returned a non-expunged zone");
};

// At some point, we should be unable to reach this Nexus any more.
Expand Down Expand Up @@ -306,7 +311,10 @@ async fn test_nexus_add_remove(lc: &LiveTestContext) {
// We don't need to check this here. It just provides a better error
// message if something has gone wrong up to this point.
let (_, expunged_zone_config) = new_blueprint
.all_omicron_zones(|_| true)
.expunged_zones(
ZoneRunningStatus::Shutdown,
BlueprintExpungedZoneAccessReason::Test,
)
.find(|(_sled_id, zone_config)| zone_config.id == new_zone.id)
.expect("expunged zone in new blueprint");
assert!(expunged_zone_config.disposition.is_ready_for_cleanup());
Expand Down
15 changes: 7 additions & 8 deletions live-tests/tests/test_nexus_handoff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder;
use nexus_reconfigurator_planning::blueprint_editor::ExternalNetworkingAllocator;
use nexus_reconfigurator_preparation::PlanningInputFromDb;
use nexus_types::deployment::Blueprint;
use nexus_types::deployment::BlueprintZoneDisposition;
use nexus_types::deployment::BlueprintZoneImageSource;
use nexus_types::deployment::BlueprintZoneType;
use nexus_types::deployment::PlannerConfig;
Expand Down Expand Up @@ -69,7 +68,7 @@ async fn test_nexus_handoff(lc: &LiveTestContext) {
// there exist no Nexus zones with a generation newer than the blueprint's
// `nexus_generation`.
let new_zones = blueprint_initial
.all_omicron_zones(BlueprintZoneDisposition::is_in_service)
.in_service_zones()
.filter_map(|(_sled_id, z)| {
let BlueprintZoneType::Nexus(blueprint_zone_type::Nexus {
nexus_generation,
Expand Down Expand Up @@ -102,7 +101,7 @@ async fn test_nexus_handoff(lc: &LiveTestContext) {
cfg: &'a blueprint_zone_type::Nexus,
}
let current_nexus_zones: BTreeMap<OmicronZoneUuid, _> = blueprint_initial
.all_omicron_zones(BlueprintZoneDisposition::is_in_service)
.in_service_zones()
.filter_map(|(sled_id, z)| {
let BlueprintZoneType::Nexus(
cfg @ blueprint_zone_type::Nexus { nexus_generation, .. },
Expand Down Expand Up @@ -215,7 +214,7 @@ async fn test_nexus_handoff(lc: &LiveTestContext) {

// Find the new Nexus zones and make clients for them.
let new_nexus_clients = blueprint_new_nexus
.all_nexus_zones(BlueprintZoneDisposition::is_in_service)
.in_service_nexus_zones()
.filter_map(|(_sled_id, zone_cfg, nexus_config)| {
(nexus_config.nexus_generation == next_generation).then(|| {
(
Expand Down Expand Up @@ -495,7 +494,7 @@ async fn check_internal_dns(
// Compute what we expect to find, based on which Nexus instances in the
// blueprint have the specified generation.
let expected_nexus_addrs = blueprint
.all_nexus_zones(BlueprintZoneDisposition::is_in_service)
.in_service_nexus_zones()
.filter_map(|(_sled_id, _zone_cfg, nexus_config)| {
(nexus_config.nexus_generation == active_generation)
.then_some(nexus_config.internal_address)
Expand All @@ -504,7 +503,7 @@ async fn check_internal_dns(

// Find the DNS server based on what's currently in the blueprint.
let dns_sockaddr = blueprint
.all_omicron_zones(BlueprintZoneDisposition::is_in_service)
.in_service_zones()
.find_map(|(_sled_id, zone_cfg)| {
if let BlueprintZoneType::InternalDns(
blueprint_zone_type::InternalDns { dns_address, .. },
Expand Down Expand Up @@ -559,7 +558,7 @@ async fn check_external_dns(
// Compute which Nexus instances we expect to find in external DNS based on
// what's in-service in the blueprint.
let expected_nexus_addrs = blueprint
.all_nexus_zones(BlueprintZoneDisposition::is_in_service)
.in_service_nexus_zones()
.filter_map(|(_sled_id, _zone_cfg, nexus_config)| {
(nexus_config.nexus_generation == active_generation)
.then_some(nexus_config.external_ip.ip)
Expand All @@ -568,7 +567,7 @@ async fn check_external_dns(

// Find the DNS server based on what's currently in the blueprint.
let dns_http_sockaddr = blueprint
.all_omicron_zones(BlueprintZoneDisposition::is_in_service)
.in_service_zones()
.find_map(|(_sled_id, zone_cfg)| {
if let BlueprintZoneType::ExternalDns(
blueprint_zone_type::ExternalDns { http_address, .. },
Expand Down
39 changes: 25 additions & 14 deletions nexus/db-queries/src/db/datastore/deployment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ use nexus_db_schema::enums::HwM2SlotEnum;
use nexus_db_schema::enums::HwRotSlotEnum;
use nexus_db_schema::enums::SpTypeEnum;
use nexus_types::deployment::Blueprint;
use nexus_types::deployment::BlueprintExpungedZoneAccessReason;
use nexus_types::deployment::BlueprintMetadata;
use nexus_types::deployment::BlueprintSledConfig;
use nexus_types::deployment::BlueprintSource;
Expand All @@ -90,6 +91,7 @@ use nexus_types::deployment::PendingMgsUpdateRotBootloaderDetails;
use nexus_types::deployment::PendingMgsUpdateRotDetails;
use nexus_types::deployment::PendingMgsUpdateSpDetails;
use nexus_types::deployment::PendingMgsUpdates;
use nexus_types::deployment::ZoneRunningStatus;
use nexus_types::inventory::BaseboardId;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::Error;
Expand Down Expand Up @@ -1955,22 +1957,29 @@ impl DataStore {
self.ensure_zone_external_networking_deallocated_on_connection(
&conn,
&opctx.log,
// TODO-correctness Currently the planner _does not wait_
// for zones using external IPs to be ready for cleanup
// before reassigning the external IP to a new zone, so we
// have to deallocate IPs for both "ready for cleanup" and
// "not ready for cleanup" zones. We should fix the planner,
// at which point we can operate on only "ready for cleanup"
// zones here.
//
// <https://github.com/oxidecomputer/omicron/issues/9506>
blueprint
.all_omicron_zones(|disposition| {
!disposition.is_in_service()
})
.expunged_zones(
ZoneRunningStatus::Any,
BlueprintExpungedZoneAccessReason
::DeallocateExternalNetworkingResources
)
.map(|(_sled_id, zone)| zone),
)
.await
.map_err(|e| err.bail(e))?;
self.ensure_zone_external_networking_allocated_on_connection(
&conn,
opctx,
blueprint
.all_omicron_zones(|disposition| {
disposition.is_in_service()
})
.map(|(_sled_id, zone)| zone),
blueprint.in_service_zones().map(|(_sled_id, zone)| zone),
)
.await
.map_err(|e| err.bail(e))?;
Expand Down Expand Up @@ -3312,7 +3321,7 @@ mod tests {
);
assert_eq!(blueprint1.sleds.len(), collection.sled_agents.len());
assert_eq!(
blueprint1.all_omicron_zones(BlueprintZoneDisposition::any).count(),
blueprint1.in_service_zones().count(),
collection.all_ledgered_omicron_zones().count()
);
// All zones should be in service.
Expand Down Expand Up @@ -3644,9 +3653,8 @@ mod tests {
);
assert_eq!(blueprint1.sleds.len() + 1, blueprint2.sleds.len());
assert_eq!(
blueprint1.all_omicron_zones(BlueprintZoneDisposition::any).count()
+ num_new_sled_zones,
blueprint2.all_omicron_zones(BlueprintZoneDisposition::any).count()
blueprint1.in_service_zones().count() + num_new_sled_zones,
blueprint2.in_service_zones().count()
);

// All zones should be in service.
Expand Down Expand Up @@ -4296,7 +4304,7 @@ mod tests {

// Insert an IP pool range covering the one Nexus IP.
let nexus_ip = blueprint1
.all_omicron_zones(BlueprintZoneDisposition::is_in_service)
.in_service_zones()
.find_map(|(_, zone_config)| {
zone_config
.zone_type
Expand Down Expand Up @@ -4516,7 +4524,10 @@ mod tests {

fn assert_all_zones_in_service(blueprint: &Blueprint) {
let not_in_service = blueprint
.all_omicron_zones(|disposition| !disposition.is_in_service())
.expunged_zones(
ZoneRunningStatus::Any,
BlueprintExpungedZoneAccessReason::Test,
)
.collect::<Vec<_>>();
assert!(
not_in_service.is_empty(),
Expand Down
Loading
Loading