From a10307087db73879d41911f463f735613d6ffa54 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Mon, 15 Dec 2025 21:06:01 +0100 Subject: [PATCH 01/30] Add FUNDING.md --- .github/FUNDING.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/FUNDING.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 000000000..4e35480b3 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: reinterpretcat \ No newline at end of file From ab5ae8adcfc1d99c7dd0f1413beb5e597af74772 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Thu, 25 Dec 2025 10:44:35 +0100 Subject: [PATCH 02/30] Add test to reproduce crash --- .../tests/features/breaks/required_break.rs | 81 ++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/vrp-pragmatic/tests/features/breaks/required_break.rs b/vrp-pragmatic/tests/features/breaks/required_break.rs index c6e31914f..d8e52d3a3 100644 --- a/vrp-pragmatic/tests/features/breaks/required_break.rs +++ b/vrp-pragmatic/tests/features/breaks/required_break.rs @@ -1,3 +1,4 @@ +use crate::format::Location; use crate::format::problem::*; use crate::format_time; use crate::helpers::*; @@ -198,7 +199,6 @@ fn can_skip_break_if_it_is_after_start_before_end_range() { assert!(get_ids_from_tour(&solution.tours[0]).iter().flatten().all(|id| id != "break")); } -// TODO check exact and offset use cases #[test] fn can_reschedule_break_early_from_transport_to_activity() { let is_open = true; @@ -252,3 +252,82 @@ fn can_reschedule_break_early_from_transport_to_activity() { .build() ); } + +#[test] +fn can_handle_required_break_with_infeasible_sequence_relation() { + let create_test_job = |index: usize, duration: f64, times: (String, String)| Job { + services: Some(vec![JobTask { + places: vec![JobPlace { + location: Location::Reference { index }, + duration, + times: Some(vec![vec![times.0, times.1]]), + tag: None, + }], + demand: None, + order: None, + }]), + ..create_job(index.to_string().as_str()) + }; + + let problem = Problem { + plan: Plan { + jobs: vec![ + create_test_job(0, 10800., (format_time(0.), format_time(86399.))), + create_test_job(1, 3600., (format_time(81000.), format_time(81000.))), + create_test_job(2, 1800., (format_time(86400. + 900.), format_time(86400. + 900.))), + create_test_job(3, 5400., (format_time(75600.), format_time(75600.))), + create_test_job(4, 1800., (format_time(86400. + 2700.), format_time(86400. + 2700.))), + ], + relations: Some(vec![Relation { + type_field: RelationType::Sequence, + jobs: to_strings(vec!["3", "1", "2", "4"]), + vehicle_id: "my_vehicle_1".to_string(), + shift_index: Some(0), + }]), + ..create_empty_plan() + }, + fleet: Fleet { + vehicles: vec![VehicleType { + shifts: vec![VehicleShift { + start: ShiftStart { + earliest: format_time(86400. + 28800.), + latest: Some(format_time(86400. + 28800.)), + location: Location::Reference { index: 5 }, + }, + end: Some(ShiftEnd { + earliest: None, + latest: format_time(86400. + 57600.), + location: Location::Reference { index: 5 }, + }), + breaks: Some(vec![VehicleBreak::Required { + time: VehicleRequiredBreakTime::OffsetTime { earliest: 15303., latest: 15303. }, + duration: 1800., + }]), + ..create_default_vehicle_shift() + }], + ..create_default_vehicle_type() + }], + ..create_default_fleet() + }, + ..create_empty_problem() + }; + + let matrix = Matrix { + profile: Some("car".to_string()), + timestamp: None, + travel_times: vec![ + 0, 635, 24, 580, 27, 2232, 625, 0, 650, 76, 653, 2507, 24, 660, 0, 605, 3, 2257, 570, 95, 595, 0, 598, + 2449, 27, 663, 3, 608, 0, 2260, 2232, 2545, 2257, 2515, 2260, 0, + ], + distances: vec![ + 0, 8888, 192, 8510, 215, 52931, 8896, 0, 9088, 450, 9111, 56579, 192, 9080, 0, 8702, 23, 53123, 8518, 450, + 8710, 0, 8733, 60163, 215, 9103, 23, 8725, 0, 53146, 52996, 56684, 53188, 60477, 53211, 0, + ], + error_codes: None, + }; + + let solution = solve_with_metaheuristic(problem, Some(vec![matrix])); + + // Basic assertion - no crash, solution should exist and have at least one tour + assert!(!solution.tours.is_empty()); +} From 110a8125b11f0515cc808a54907d757df314f2ea Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Thu, 25 Dec 2025 12:31:44 +0100 Subject: [PATCH 03/30] Fix crash with required break and relations --- .../enablers/only_vehicle_activity_cost.rs | 5 +++-- .../construction/enablers/reserved_time.rs | 19 ++++++++++------- .../construction/enablers/schedule_update.rs | 5 +++-- .../src/construction/features/fast_service.rs | 3 ++- .../src/construction/features/transport.rs | 21 ++++++++++++++----- vrp-core/src/models/problem/costs.rs | 17 +++++++++------ .../tests/helpers/models/problem/costs.rs | 5 +++-- .../enablers/reserved_time_test.rs | 1 + .../tests/features/breaks/required_break.rs | 7 +++++-- 9 files changed, 55 insertions(+), 28 deletions(-) diff --git a/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs b/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs index 2f9a17816..9e884c65d 100644 --- a/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs +++ b/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs @@ -2,6 +2,7 @@ use crate::models::common::{Cost, Timestamp}; use crate::models::problem::{ActivityCost, SimpleActivityCost}; use crate::models::solution::Activity; use crate::models::solution::Route; +use std::ops::ControlFlow; /// Uses costs only for a vehicle ignoring costs of a driver. #[derive(Default)] @@ -19,11 +20,11 @@ impl ActivityCost for OnlyVehicleActivityCost { waiting * actor.vehicle.costs.per_waiting_time + service * actor.vehicle.costs.per_service_time } - fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> Timestamp { + fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow { self.inner.estimate_departure(route, activity, arrival) } - fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> Timestamp { + fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow { self.inner.estimate_arrival(route, activity, departure) } } diff --git a/vrp-core/src/construction/enablers/reserved_time.rs b/vrp-core/src/construction/enablers/reserved_time.rs index 860cacc13..dc7eb0613 100644 --- a/vrp-core/src/construction/enablers/reserved_time.rs +++ b/vrp-core/src/construction/enablers/reserved_time.rs @@ -5,8 +5,9 @@ mod reserved_time_test; use crate::models::common::*; use crate::models::problem::{ActivityCost, Actor, TransportCost, TravelTime}; use crate::models::solution::{Activity, Route}; -use rosomaxa::prelude::{Float, GenericError}; +use rosomaxa::prelude::GenericError; use std::collections::HashMap; +use std::ops::ControlFlow; use std::sync::Arc; /// Represent a reserved time span entity. @@ -54,12 +55,12 @@ impl DynamicActivityCost { } impl ActivityCost for DynamicActivityCost { - fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> Timestamp { + fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow { let activity_start = arrival.max(activity.place.time.start); let departure = activity_start + activity.place.duration; let schedule = TimeWindow::new(arrival, departure); - (self.reserved_times_fn)(route, &schedule).map_or(departure, |reserved_time| { + (self.reserved_times_fn)(route, &schedule).map_or(ControlFlow::Continue(departure), |reserved_time| { // NOTE we ignore reserved_time.time.start and consider the latest possible time only let reserved_tw = &reserved_time.time; let reserved_tw = TimeWindow::new(reserved_tw.end, reserved_tw.end + reserved_time.duration); @@ -81,19 +82,21 @@ impl ActivityCost for DynamicActivityCost { if activity_start + extra_duration > activity.place.time.end { // TODO this branch is the reason why departure rescheduling is disabled. // theoretically, rescheduling should be aware somehow about dynamic costs - Float::MAX + ControlFlow::Break(departure + extra_duration) } else { - departure + extra_duration + ControlFlow::Continue(departure + extra_duration) } }) } - fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> Timestamp { + fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow { let arrival = activity.place.time.end.min(departure - activity.place.duration); let schedule = TimeWindow::new(arrival, departure); - (self.reserved_times_fn)(route, &schedule) - .map_or(arrival, |reserved_time| (arrival - reserved_time.duration).max(activity.place.time.start)) + let value = (self.reserved_times_fn)(route, &schedule) + .map_or(arrival, |reserved_time| (arrival - reserved_time.duration).max(activity.place.time.start)); + + ControlFlow::Continue(value) } } diff --git a/vrp-core/src/construction/enablers/schedule_update.rs b/vrp-core/src/construction/enablers/schedule_update.rs index 2443840a4..a64770795 100644 --- a/vrp-core/src/construction/enablers/schedule_update.rs +++ b/vrp-core/src/construction/enablers/schedule_update.rs @@ -3,6 +3,7 @@ use crate::models::OP_START_MSG; use crate::models::common::{Distance, Duration, Schedule, Timestamp}; use crate::models::problem::{ActivityCost, TransportCost, TravelTime}; use rosomaxa::prelude::Float; +use rosomaxa::utils::UnwrapValue; custom_activity_state!(pub(crate) LatestArrival typeof Timestamp); custom_activity_state!(pub(crate) WaitingTime typeof Timestamp); @@ -41,7 +42,7 @@ fn update_schedules(route_ctx: &mut RouteContext, activity: &dyn ActivityCost, t let a = route_ctx.route().tour.get(activity_idx).unwrap(); let location = a.place.location; let arrival = dep + transport.duration(route_ctx.route(), loc, location, TravelTime::Departure(dep)); - let departure = activity.estimate_departure(route_ctx.route(), a, arrival); + let departure = activity.estimate_departure(route_ctx.route(), a, arrival).unwrap_value(); (location, arrival, departure) }; @@ -83,7 +84,7 @@ fn update_states(route_ctx: &mut RouteContext, activity: &dyn ActivityCost, tran } else { let latest_departure = end_time - transport.duration(route, act.place.location, prev_loc, TravelTime::Arrival(end_time)); - activity.estimate_arrival(route, act, latest_departure) + activity.estimate_arrival(route, act, latest_departure).unwrap_value() }; let future_waiting = waiting + (act.place.time.start - act.schedule.arrival).max(0.); diff --git a/vrp-core/src/construction/features/fast_service.rs b/vrp-core/src/construction/features/fast_service.rs index f80e1213c..2e8934bd9 100644 --- a/vrp-core/src/construction/features/fast_service.rs +++ b/vrp-core/src/construction/features/fast_service.rs @@ -4,6 +4,7 @@ mod fast_service_test; use super::*; use crate::construction::enablers::{calculate_travel, calculate_travel_delta}; +use rosomaxa::utils::UnwrapValue; use std::collections::HashMap; /// Provides the way to build fast service feature. @@ -189,7 +190,7 @@ impl FastServiceObjective { let (_, (prev_to_tar_dur, _)) = calculate_travel(route_ctx, activity_ctx, self.transport.as_ref()); let arrival = activity_ctx.prev.schedule.departure + prev_to_tar_dur; - self.activity.estimate_departure(route_ctx.route(), activity_ctx.target, arrival) + self.activity.estimate_departure(route_ctx.route(), activity_ctx.target, arrival).unwrap_value() } fn get_cost_for_multi_job(&self, route_ctx: &RouteContext, activity_ctx: &ActivityContext) -> Cost { diff --git a/vrp-core/src/construction/features/transport.rs b/vrp-core/src/construction/features/transport.rs index 39b6291e5..0aa328cac 100644 --- a/vrp-core/src/construction/features/transport.rs +++ b/vrp-core/src/construction/features/transport.rs @@ -4,11 +4,14 @@ #[path = "../../../tests/unit/construction/features/transport_test.rs"] mod transport_test; +use std::ops::ControlFlow; + use super::*; use crate::construction::enablers::*; use crate::models::common::Timestamp; use crate::models::problem::{ActivityCost, Single, TransportCost, TravelTime}; use crate::models::solution::Activity; +use rosomaxa::utils::UnwrapValue; // TODO // remove get_total_cost, get_route_costs, get_max_cost methods from contexts @@ -226,8 +229,12 @@ impl TransportConstraint { TravelTime::Arrival(latest_arr_time_at_next), ); - let latest_arr_time_at_target = - target.place.time.end.min(self.activity.estimate_arrival(route, target, latest_departure_at_target)); + let ControlFlow::Continue(latest_arr_time_at_target) = + self.activity.estimate_arrival(route, target, latest_departure_at_target) + else { + return ConstraintViolation::skip(self.time_window_code); + }; + let latest_arr_time_at_target = target.place.time.end.min(latest_arr_time_at_target); if arr_time_at_target > latest_arr_time_at_target { return ConstraintViolation::skip(self.time_window_code); @@ -237,7 +244,11 @@ impl TransportConstraint { return ConstraintViolation::success(); } - let end_time_at_target = self.activity.estimate_departure(route, target, arr_time_at_target); + let ControlFlow::Continue(end_time_at_target) = + self.activity.estimate_departure(route, target, arr_time_at_target) + else { + return ConstraintViolation::skip(self.time_window_code); + }; let arr_time_at_next = end_time_at_target + self.transport.duration( @@ -335,7 +346,7 @@ where let (prev_target, dep_time_target) = { let time = activity_ctx.prev.schedule.departure; let arrival = time + transport.duration(route, prev, target, prev_dep); - let departure = activity.estimate_departure(route, activity_ctx.target, arrival); + let departure = activity.estimate_departure(route, activity_ctx.target, arrival).unwrap_value(); (estimate_fn(prev, target, prev_dep), departure) }; @@ -429,7 +440,7 @@ impl CostObjective { let arrival = time + self.transport.duration(route, start.place.location, end.place.location, TravelTime::Departure(time)); - let departure = self.activity.estimate_departure(route, end, arrival); + let departure = self.activity.estimate_departure(route, end, arrival).unwrap_value(); let transport_cost = self.transport.cost(route, start.place.location, end.place.location, TravelTime::Departure(time)); diff --git a/vrp-core/src/models/problem/costs.rs b/vrp-core/src/models/problem/costs.rs index c6c032081..2c515dd29 100644 --- a/vrp-core/src/models/problem/costs.rs +++ b/vrp-core/src/models/problem/costs.rs @@ -7,6 +7,7 @@ use crate::models::solution::{Activity, Route}; use rosomaxa::prelude::{Float, GenericError, GenericResult}; use rosomaxa::utils::CollectGroupBy; use std::collections::HashMap; +use std::ops::ControlFlow; use std::sync::Arc; /// Specifies a travel time type. @@ -32,10 +33,14 @@ pub trait ActivityCost: Send + Sync { } /// Estimates departure time for activity and actor at given arrival time. - fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> Timestamp; + /// Returns `ControlFlow::Continue(timestamp)` if the departure time is feasible, + /// or `ControlFlow::Break(timestamp)` if constraints are violated (e.g., time window infeasible). + fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow; /// Estimates arrival time for activity and actor at given departure time. - fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> Timestamp; + /// Returns `ControlFlow::Continue(timestamp)` if the arrival time is feasible, + /// or `ControlFlow::Break(timestamp)` if constraints are violated (e.g., time window infeasible). + fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow; } /// An actor independent activity costs. @@ -43,12 +48,12 @@ pub trait ActivityCost: Send + Sync { pub struct SimpleActivityCost {} impl ActivityCost for SimpleActivityCost { - fn estimate_departure(&self, _: &Route, activity: &Activity, arrival: Timestamp) -> Timestamp { - arrival.max(activity.place.time.start) + activity.place.duration + fn estimate_departure(&self, _: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow { + ControlFlow::Continue(arrival.max(activity.place.time.start) + activity.place.duration) } - fn estimate_arrival(&self, _: &Route, activity: &Activity, departure: Timestamp) -> Timestamp { - activity.place.time.end.min(departure - activity.place.duration) + fn estimate_arrival(&self, _: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow { + ControlFlow::Continue(activity.place.time.end.min(departure - activity.place.duration)) } } diff --git a/vrp-core/tests/helpers/models/problem/costs.rs b/vrp-core/tests/helpers/models/problem/costs.rs index 7873343f3..31df10824 100644 --- a/vrp-core/tests/helpers/models/problem/costs.rs +++ b/vrp-core/tests/helpers/models/problem/costs.rs @@ -2,6 +2,7 @@ use crate::models::common::{Distance, Duration, Location, Profile, Timestamp}; use crate::models::problem::{ActivityCost, SimpleActivityCost, TransportCost, TravelTime}; use crate::models::solution::{Activity, Route}; use rosomaxa::prelude::Float; +use std::ops::ControlFlow; use std::sync::Arc; #[derive(Default)] @@ -45,11 +46,11 @@ pub struct TestActivityCost { } impl ActivityCost for TestActivityCost { - fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> Timestamp { + fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow { self.inner.estimate_departure(route, activity, arrival) } - fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> Timestamp { + fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow { self.inner.estimate_arrival(route, activity, departure) } } diff --git a/vrp-core/tests/unit/construction/enablers/reserved_time_test.rs b/vrp-core/tests/unit/construction/enablers/reserved_time_test.rs index 2efadcbc6..f844293d2 100644 --- a/vrp-core/tests/unit/construction/enablers/reserved_time_test.rs +++ b/vrp-core/tests/unit/construction/enablers/reserved_time_test.rs @@ -7,6 +7,7 @@ use crate::helpers::models::problem::*; use crate::helpers::models::solution::*; use crate::models::problem::*; use crate::models::{Feature, ViolationCode}; +use rosomaxa::prelude::Float; const VIOLATION_CODE: ViolationCode = ViolationCode(1); diff --git a/vrp-pragmatic/tests/features/breaks/required_break.rs b/vrp-pragmatic/tests/features/breaks/required_break.rs index d8e52d3a3..1219ef2ac 100644 --- a/vrp-pragmatic/tests/features/breaks/required_break.rs +++ b/vrp-pragmatic/tests/features/breaks/required_break.rs @@ -326,8 +326,11 @@ fn can_handle_required_break_with_infeasible_sequence_relation() { error_codes: None, }; - let solution = solve_with_metaheuristic(problem, Some(vec![matrix])); + let solution = solve_with_metaheuristic_and_iterations_without_check(problem, Some(vec![matrix]), 200); - // Basic assertion - no crash, solution should exist and have at least one tour + // With proper constraint handling via ControlFlow, job "0" (with very wide time window) cannot be + // scheduled due to the required break at specific time conflicting with the strict sequence relation. assert!(!solution.tours.is_empty()); + assert_eq!(solution.unassigned.as_ref().map(|u| u.len()), Some(1)); + assert_eq!(solution.unassigned.as_ref().and_then(|u| u.first()).map(|j| j.job_id.as_str()), Some("0")); } From ca239990db19d0b41c6da3c09cf2d35cb44d053c Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Thu, 25 Dec 2025 13:05:11 +0100 Subject: [PATCH 04/30] Improve format_time error handling --- vrp-pragmatic/src/lib.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vrp-pragmatic/src/lib.rs b/vrp-pragmatic/src/lib.rs index 56e6fe63f..a3b6160bf 100644 --- a/vrp-pragmatic/src/lib.rs +++ b/vrp-pragmatic/src/lib.rs @@ -47,8 +47,10 @@ pub fn get_unique_locations(problem: &Problem) -> Vec { } fn format_time(time: Float) -> String { - // TODO avoid using implicitly unwrap - OffsetDateTime::from_unix_timestamp(time as i64).map(|time| time.format(&Rfc3339).unwrap()).unwrap() + OffsetDateTime::from_unix_timestamp(time as i64) + .map_err(|err| format!("Invalid timestamp {}: {}", time, err)) + .and_then(|time| time.format(&Rfc3339).map_err(|err| format!("Format error: {}", err))) + .unwrap() } fn parse_time(time: &str) -> Float { From 12e66bf9feab93806a040b48534a985ec2fd17c3 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Fri, 26 Dec 2025 12:34:31 +0100 Subject: [PATCH 05/30] Update to rust 1.92 --- Dockerfile | 4 +-- vrp-cli/tests/unit/commands/solve_test.rs | 2 +- .../enablers/only_vehicle_activity_cost.rs | 14 ++++++++-- .../construction/enablers/reserved_time.rs | 14 ++++++++-- vrp-core/src/models/problem/costs.rs | 28 ++++++++++++++++--- .../tests/helpers/models/problem/costs.rs | 14 ++++++++-- vrp-pragmatic/src/format/solution/mod.rs | 8 ++---- 7 files changed, 65 insertions(+), 19 deletions(-) diff --git a/Dockerfile b/Dockerfile index a375e5c92..3753e06b3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM rust:1.88-alpine AS Builder +FROM rust:1.92-alpine AS builder LABEL maintainer="Ilya Builuk " \ org.opencontainers.image.title="A Vehicle Routing Problem solver CLI" \ @@ -29,6 +29,6 @@ FROM alpine:3.18 ENV SOLVER_DIR=/solver RUN mkdir $SOLVER_DIR -COPY --from=Builder /src/target/release/vrp-cli $SOLVER_DIR/vrp-cli +COPY --from=builder /src/target/release/vrp-cli $SOLVER_DIR/vrp-cli WORKDIR $SOLVER_DIR diff --git a/vrp-cli/tests/unit/commands/solve_test.rs b/vrp-cli/tests/unit/commands/solve_test.rs index 68cf9fa40..b350cbb66 100644 --- a/vrp-cli/tests/unit/commands/solve_test.rs +++ b/vrp-cli/tests/unit/commands/solve_test.rs @@ -161,7 +161,7 @@ fn can_use_init_size() { #[test] fn can_specify_cv() { - for (params, result) in vec![ + for (params, result) in [ (vec!["--min-cv", "sample,200,0.05,true"], Ok(Some(("sample".to_string(), 200, 0.05, true)))), (vec!["--min-cv", "period,100,0.01,false"], Ok(Some(("period".to_string(), 100, 0.01, false)))), (vec!["--min-cv", "sample,200,0,tru"], Err("cannot parse min_cv parameter".into())), diff --git a/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs b/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs index 9e884c65d..87f4f4ca4 100644 --- a/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs +++ b/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs @@ -20,11 +20,21 @@ impl ActivityCost for OnlyVehicleActivityCost { waiting * actor.vehicle.costs.per_waiting_time + service * actor.vehicle.costs.per_service_time } - fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow { + fn estimate_departure( + &self, + route: &Route, + activity: &Activity, + arrival: Timestamp, + ) -> ControlFlow { self.inner.estimate_departure(route, activity, arrival) } - fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow { + fn estimate_arrival( + &self, + route: &Route, + activity: &Activity, + departure: Timestamp, + ) -> ControlFlow { self.inner.estimate_arrival(route, activity, departure) } } diff --git a/vrp-core/src/construction/enablers/reserved_time.rs b/vrp-core/src/construction/enablers/reserved_time.rs index dc7eb0613..7e918e231 100644 --- a/vrp-core/src/construction/enablers/reserved_time.rs +++ b/vrp-core/src/construction/enablers/reserved_time.rs @@ -55,7 +55,12 @@ impl DynamicActivityCost { } impl ActivityCost for DynamicActivityCost { - fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow { + fn estimate_departure( + &self, + route: &Route, + activity: &Activity, + arrival: Timestamp, + ) -> ControlFlow { let activity_start = arrival.max(activity.place.time.start); let departure = activity_start + activity.place.duration; let schedule = TimeWindow::new(arrival, departure); @@ -89,7 +94,12 @@ impl ActivityCost for DynamicActivityCost { }) } - fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow { + fn estimate_arrival( + &self, + route: &Route, + activity: &Activity, + departure: Timestamp, + ) -> ControlFlow { let arrival = activity.place.time.end.min(departure - activity.place.duration); let schedule = TimeWindow::new(arrival, departure); diff --git a/vrp-core/src/models/problem/costs.rs b/vrp-core/src/models/problem/costs.rs index 2c515dd29..2fe526c4e 100644 --- a/vrp-core/src/models/problem/costs.rs +++ b/vrp-core/src/models/problem/costs.rs @@ -35,12 +35,22 @@ pub trait ActivityCost: Send + Sync { /// Estimates departure time for activity and actor at given arrival time. /// Returns `ControlFlow::Continue(timestamp)` if the departure time is feasible, /// or `ControlFlow::Break(timestamp)` if constraints are violated (e.g., time window infeasible). - fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow; + fn estimate_departure( + &self, + route: &Route, + activity: &Activity, + arrival: Timestamp, + ) -> ControlFlow; /// Estimates arrival time for activity and actor at given departure time. /// Returns `ControlFlow::Continue(timestamp)` if the arrival time is feasible, /// or `ControlFlow::Break(timestamp)` if constraints are violated (e.g., time window infeasible). - fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow; + fn estimate_arrival( + &self, + route: &Route, + activity: &Activity, + departure: Timestamp, + ) -> ControlFlow; } /// An actor independent activity costs. @@ -48,11 +58,21 @@ pub trait ActivityCost: Send + Sync { pub struct SimpleActivityCost {} impl ActivityCost for SimpleActivityCost { - fn estimate_departure(&self, _: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow { + fn estimate_departure( + &self, + _: &Route, + activity: &Activity, + arrival: Timestamp, + ) -> ControlFlow { ControlFlow::Continue(arrival.max(activity.place.time.start) + activity.place.duration) } - fn estimate_arrival(&self, _: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow { + fn estimate_arrival( + &self, + _: &Route, + activity: &Activity, + departure: Timestamp, + ) -> ControlFlow { ControlFlow::Continue(activity.place.time.end.min(departure - activity.place.duration)) } } diff --git a/vrp-core/tests/helpers/models/problem/costs.rs b/vrp-core/tests/helpers/models/problem/costs.rs index 31df10824..4b91ff86b 100644 --- a/vrp-core/tests/helpers/models/problem/costs.rs +++ b/vrp-core/tests/helpers/models/problem/costs.rs @@ -46,11 +46,21 @@ pub struct TestActivityCost { } impl ActivityCost for TestActivityCost { - fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow { + fn estimate_departure( + &self, + route: &Route, + activity: &Activity, + arrival: Timestamp, + ) -> ControlFlow { self.inner.estimate_departure(route, activity, arrival) } - fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow { + fn estimate_arrival( + &self, + route: &Route, + activity: &Activity, + departure: Timestamp, + ) -> ControlFlow { self.inner.estimate_arrival(route, activity, departure) } } diff --git a/vrp-pragmatic/src/format/solution/mod.rs b/vrp-pragmatic/src/format/solution/mod.rs index fae7280fa..825195cab 100644 --- a/vrp-pragmatic/src/format/solution/mod.rs +++ b/vrp-pragmatic/src/format/solution/mod.rs @@ -38,8 +38,10 @@ type DomainLocation = vrp_core::models::common::Location; type DomainExtras = vrp_core::models::Extras; /// Specifies possible options for solution output. +#[derive(Default)] pub enum PragmaticOutputType { /// Only pragmatic is needed. + #[default] OnlyPragmatic, /// Only geojson is needed. OnlyGeoJson, @@ -47,12 +49,6 @@ pub enum PragmaticOutputType { Combined, } -impl Default for PragmaticOutputType { - fn default() -> Self { - Self::OnlyPragmatic - } -} - /// Writes solution in pragmatic format variation defined by output type argument. pub fn write_pragmatic( problem: &DomainProblem, From 7b86780237f9a0106026d18b982b71375fc41095 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Fri, 9 Jan 2026 00:05:09 +0100 Subject: [PATCH 06/30] Improve SISR implementation --- CHANGELOG.md | 1 + .../search/recreate/recreate_with_blinks.rs | 312 +++++++++++++++--- 2 files changed, 261 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e053fac4..a5b330fad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ are already published. So, I stick to it for now. * improved remedian algorithm * separate calculations for distance/duration from cost minimization * change GSOM distance function +* improved SISR implementation ### Added diff --git a/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs b/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs index 961233634..286a551b9 100644 --- a/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs +++ b/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs @@ -1,26 +1,179 @@ -use crate::construction::heuristics::InsertionContext; use crate::construction::heuristics::*; +use crate::construction::heuristics::{InsertionContext, JobSelector}; use crate::models::Problem; -use crate::models::common::*; use crate::models::problem::Job; +use crate::prelude::Cost; use crate::solver::RefinementContext; use crate::solver::search::recreate::Recreate; use rosomaxa::prelude::*; +use rosomaxa::utils::fold_reduce; use std::sync::Arc; -struct ChunkJobSelector { - size: usize, +/// A recreate method as described in "Slack Induction by String Removals for +/// Vehicle Routing Problems" (aka SISR) paper by Jan Christiaens, Greet Vanden Berghe. +pub struct RecreateWithBlinks { + job_selectors: Vec>, + route_selector: Box, + leg_selection: LegSelection, + result_selector: Box, + insertion_heuristic: InsertionHeuristic, + weights: Vec, +} + +impl RecreateWithBlinks { + /// Creates a new instance of `RecreateWithBlinks`. + pub fn new(selectors: Vec<(Box, usize)>, blink_ratio: f64, random: Arc) -> Self { + let weights = selectors.iter().map(|(_, weight)| *weight).collect(); + let evaluator = Box::new(BlinkInsertionEvaluator::new(blink_ratio, random.clone())); + + Self { + job_selectors: selectors.into_iter().map(|(selector, _)| selector).collect(), + route_selector: Box::::default(), + leg_selection: LegSelection::Exhaustive, + result_selector: Box::::default(), + insertion_heuristic: InsertionHeuristic::new(evaluator), + weights, + } + } + + /// Creates a new instance with defaults compliant with SISR paper (Section 5.3 + Table 13). + pub fn new_with_defaults(random: Arc) -> Self { + let blink_ratio = 0.01; + + // Helper to wrap selectors with SingletonJobSelector + fn singleton(selector: T) -> Box { + Box::new(SingletonJobSelector(selector)) + } + + Self::new( + vec![ + // --- Core Selectors (Section 5.3) --- + // 1. Random (Weight: 4) + (singleton(AllJobSelector::default()), 4), + // 2. Demand: Largest First (Weight: 4) + (singleton(DemandJobSelector::new(true)), 4), + // 3. Far: Largest Distance First (Weight: 2) + (singleton(RankedJobSelector::new(true)), 2), + // 4. Close: Smallest Distance First (Weight: 1) + (singleton(RankedJobSelector::new(false)), 1), + // --- VRPTW Extensions (Table 13) --- + // 5. TW Length: Increasing (Shortest/Hardest first) (Weight: 2) + (singleton(TimeWindowJobSelector::new(TimeWindowSelectionMode::LengthAscending)), 2), + // 6. TW Start: Increasing (Earliest first) (Weight: 2) + (singleton(TimeWindowJobSelector::new(TimeWindowSelectionMode::StartAscending)), 2), + // 7. TW End: Decreasing (Latest first) (Weight: 2) + (singleton(TimeWindowJobSelector::new(TimeWindowSelectionMode::EndDescending)), 2), + ], + blink_ratio, + random, + ) + } +} + +impl Recreate for RecreateWithBlinks { + fn run(&self, _: &RefinementContext, insertion_ctx: InsertionContext) -> InsertionContext { + let index = insertion_ctx.environment.random.weighted(self.weights.as_slice()); + let job_selector = self.job_selectors.get(index).unwrap().as_ref(); + + self.insertion_heuristic.process( + insertion_ctx, + job_selector, + self.route_selector.as_ref(), + &self.leg_selection, + self.result_selector.as_ref(), + ) + } +} + +struct BlinkInsertionEvaluator { + blink_ratio: f64, + random: Arc, } -impl ChunkJobSelector { - pub fn new(size: usize) -> Self { - Self { size } +impl BlinkInsertionEvaluator { + pub fn new(blink_ratio: f64, random: Arc) -> Self { + Self { blink_ratio, random } + } +} + +impl InsertionEvaluator for BlinkInsertionEvaluator { + fn evaluate_all( + &self, + insertion_ctx: &InsertionContext, + jobs: &[&Job], + routes: &[&RouteContext], + _leg_selection: &LegSelection, + result_selector: &dyn ResultSelector, + ) -> InsertionResult { + // With SingletonJobSelector, this is guaranteed to be the single target job. + let job = match jobs.first() { + Some(job) => job, + None => return InsertionResult::make_failure(), + }; + + let eval_ctx = EvaluationContext { + goal: &insertion_ctx.problem.goal, + job, + leg_selection: &LegSelection::Exhaustive, + result_selector, + }; + + let result = fold_reduce( + routes, + InsertionResult::make_failure, + |best_in_thread, route_ctx| { + let mut best_in_route = best_in_thread; + let tour = &route_ctx.route().tour; + + for leg_index in 0..tour.legs().count() { + // The Blink: "Each position is evaluated with a probability of 1 - gamma" + if self.random.is_hit(self.blink_ratio) { + continue; + } + + // Evaluate specific position + let result = eval_job_insertion_in_route( + insertion_ctx, + &eval_ctx, + route_ctx, + InsertionPosition::Concrete(leg_index), + best_in_route, + ); + + best_in_route = result; + } + + best_in_route + }, + // Reduce: Merge results from threads (Greedy/Best selection) + |a, b| InsertionResult::choose_best_result(a, b), + ); + + match result { + InsertionResult::Success(_) => result, + InsertionResult::Failure(mut failure) => { + // If we failed to insert, we must blame the specific job we were trying + // to insert. Otherwise, the heuristic thinks the issue is systemic (no routes) + // and might abort entirely. + if failure.job.is_none() { + failure.job = Some((*job).clone()); + } + InsertionResult::Failure(failure) + } + } } } -impl JobSelector for ChunkJobSelector { - fn select<'a>(&'a self, insertion_ctx: &'a InsertionContext) -> Box + 'a> { - Box::new(insertion_ctx.solution.required.iter().take(self.size)) +struct SingletonJobSelector(T); + +impl JobSelector for SingletonJobSelector { + fn prepare(&self, ctx: &mut InsertionContext) { + self.0.prepare(ctx); + } + + fn select<'a>(&'a self, ctx: &'a InsertionContext) -> Box + 'a> { + // Only yield the first job: this forces InsertionHeuristic to align with BlinkInsertionEvaluator. + Box::new(self.0.select(ctx).take(1)) } } @@ -59,56 +212,111 @@ impl JobSelector for RankedJobSelector { } } -/// A recreate method as described in "Slack Induction by String Removals for -/// Vehicle Routing Problems" (aka SISR) paper by Jan Christiaens, Greet Vanden Berghe. -pub struct RecreateWithBlinks { - job_selectors: Vec>, - route_selector: Box, - leg_selection: LegSelection, - result_selector: Box, - insertion_heuristic: InsertionHeuristic, - weights: Vec, +struct DemandJobSelector { + asc_order: bool, } -impl RecreateWithBlinks { - /// Creates a new instance of `RecreateWithBlinks`. - pub fn new(selectors: Vec<(Box, usize)>, random: Arc) -> Self { - let weights = selectors.iter().map(|(_, weight)| *weight).collect(); - Self { - job_selectors: selectors.into_iter().map(|(selector, _)| selector).collect(), - route_selector: Box::::default(), - leg_selection: LegSelection::Stochastic(random.clone()), - result_selector: Box::new(BlinkResultSelector::new_with_defaults(random)), - insertion_heuristic: Default::default(), - weights, +impl DemandJobSelector { + pub fn new(asc_order: bool) -> Self { + Self { asc_order } + } +} + +impl JobSelector for DemandJobSelector { + fn prepare(&self, insertion_ctx: &mut InsertionContext) { + insertion_ctx.solution.required.sort_by(|a, b| { + use crate::construction::features::JobDemandDimension; + use crate::models::common::{MultiDimLoad, SingleDimLoad}; + + // Get total demand size (sum of all pickup and delivery components) + // Try SingleDimLoad first, then MultiDimLoad (only one type per problem) + let get_demand = |job: &Job| -> i32 { + let dimens = job.dimens(); + + if let Some(demand) = dimens.get_job_demand::() { + demand.pickup.0.value + demand.pickup.1.value + demand.delivery.0.value + demand.delivery.1.value + } else if let Some(demand) = dimens.get_job_demand::() { + let sum_load = |load: &MultiDimLoad| load.load[..load.size].iter().sum::(); + sum_load(&demand.pickup.0) + + sum_load(&demand.pickup.1) + + sum_load(&demand.delivery.0) + + sum_load(&demand.delivery.1) + } else { + 0 + } + }; + + get_demand(a).cmp(&get_demand(b)) + }); + + if !self.asc_order { + insertion_ctx.solution.required.reverse(); } } +} - /// Creates a new instance of `RecreateWithBlinks` with default prameters. - pub fn new_with_defaults(random: Arc) -> Self { - Self::new( - vec![ - (Box::::default(), 10), - (Box::new(ChunkJobSelector::new(8)), 10), - (Box::new(RankedJobSelector::new(true)), 5), - (Box::new(RankedJobSelector::new(false)), 1), - ], - random, - ) +/// Specifies the sorting criterion for Time Windows (Table 13 of SISR paper). +pub enum TimeWindowSelectionMode { + /// Sort by time window duration (End - Start). + /// Paper: "Increasing time window length" + LengthAscending, + + /// Sort by start time. + /// Paper: "Increasing time window start" + StartAscending, + + /// Sort by end time. + /// Paper: "Decreasing time window end" + EndDescending, +} + +pub struct TimeWindowJobSelector { + mode: TimeWindowSelectionMode, +} + +impl TimeWindowJobSelector { + pub fn new(mode: TimeWindowSelectionMode) -> Self { + Self { mode } + } + + /// Helper to extract the relevant time window metric from a job. + /// Returns (min_start, max_end, min_length). + /// Handles multiple time windows by finding the extreme boundaries. + fn get_tw_metric(job: &Job) -> (f64, f64, f64) { + job.places() + .flat_map(|place| place.times.iter()) + // NOTE: ignore non-time window spans. This is the most easy way to handle + // when multiple span types mixed. + .filter_map(|time| time.as_time_window()) + .fold(None, |acc: Option<(f64, f64, f64)>, tw| { + let length = tw.end - tw.start; + Some(match acc { + None => (tw.start, tw.end, length), + Some((min_start, max_end, min_length)) => { + (min_start.min(tw.start), max_end.max(tw.end), min_length.min(length)) + } + }) + }) + .unwrap_or((0.0, f64::MAX, f64::MAX)) } } -impl Recreate for RecreateWithBlinks { - fn run(&self, _: &RefinementContext, insertion_ctx: InsertionContext) -> InsertionContext { - let index = insertion_ctx.environment.random.weighted(self.weights.as_slice()); - let job_selector = self.job_selectors.get(index).unwrap().as_ref(); +impl JobSelector for TimeWindowJobSelector { + fn prepare(&self, insertion_ctx: &mut InsertionContext) { + insertion_ctx.solution.required.sort_by(|a, b| { + let (start_a, end_a, len_a) = Self::get_tw_metric(a); + let (start_b, end_b, len_b) = Self::get_tw_metric(b); - self.insertion_heuristic.process( - insertion_ctx, - job_selector, - self.route_selector.as_ref(), - &self.leg_selection, - self.result_selector.as_ref(), - ) + match self.mode { + // Increasing Length: Shortest windows first (hardest to fit) + TimeWindowSelectionMode::LengthAscending => len_a.total_cmp(&len_b), + + // Increasing Start: Earliest start first (chronological) + TimeWindowSelectionMode::StartAscending => start_a.total_cmp(&start_b), + + // Decreasing End: Latest end first (reverse chronological/urgency) + TimeWindowSelectionMode::EndDescending => end_b.total_cmp(&end_a), + } + }); } } From 12422e085391f9c8ebdf837c7b45efb7720aac73 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Fri, 9 Jan 2026 00:25:22 +0100 Subject: [PATCH 07/30] Increase SISR probability --- vrp-core/src/solver/heuristic.rs | 41 ++++++++++++++++---------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/vrp-core/src/solver/heuristic.rs b/vrp-core/src/solver/heuristic.rs index daf8648d0..0a0762b81 100644 --- a/vrp-core/src/solver/heuristic.rs +++ b/vrp-core/src/solver/heuristic.rs @@ -399,13 +399,13 @@ mod statik { // initialize recreate let recreate = Arc::new(WeightedRecreate::new(vec![ - (Arc::new(RecreateWithSkipBest::new(1, 2, random.clone())), 50), + (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), 50), + (Arc::new(RecreateWithSkipBest::new(1, 2, random.clone())), 20), (Arc::new(RecreateWithRegret::new(2, 3, random.clone())), 20), (Arc::new(RecreateWithCheapest::new(random.clone())), 20), (Arc::new(RecreateWithPerturbation::new_with_defaults(random.clone())), 10), (Arc::new(RecreateWithSkipBest::new(3, 4, random.clone())), 5), (Arc::new(RecreateWithGaps::new(2, 20, random.clone())), 5), - (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), 5), (Arc::new(RecreateWithFarthest::new(random.clone())), 2), (Arc::new(RecreateWithSkipBest::new(4, 8, random.clone())), 2), (Arc::new(RecreateWithNearestNeighbor::new(random.clone())), 1), @@ -430,7 +430,7 @@ mod statik { let ruin = Arc::new(WeightedRuin::new(vec![ ( Arc::new(CompositeRuin::new(vec![ - (Arc::new(AdjustedStringRemoval::new_with_defaults(normal_limits.clone())), 1.), + (Arc::new(AdjustedStringRemoval::new_with_defaults(normal_limits.clone())), 2.), (extra_random_job.clone(), 0.1), ])), 100, @@ -492,22 +492,23 @@ mod statik { mod dynamic { use super::*; - fn get_recreates(problem: &Problem, random: Arc) -> Vec<(Arc, String)> { + fn get_recreates(problem: &Problem, random: Arc) -> Vec<(Arc, String, Float)> { let cheapest: Arc = Arc::new(RecreateWithCheapest::new(random.clone())); vec![ - (cheapest.clone(), "cheapest".to_string()), - (Arc::new(RecreateWithSkipBest::new(1, 2, random.clone())), "skip_best".to_string()), - (Arc::new(RecreateWithRegret::new(1, 3, random.clone())), "regret".to_string()), - (Arc::new(RecreateWithPerturbation::new_with_defaults(random.clone())), "perturbation".to_string()), - (Arc::new(RecreateWithGaps::new(2, 20, random.clone())), "gaps".to_string()), - (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), "blinks".to_string()), - (Arc::new(RecreateWithFarthest::new(random.clone())), "farthest".to_string()), - (Arc::new(RecreateWithNearestNeighbor::new(random.clone())), "nearest".to_string()), + (cheapest.clone(), "cheapest".to_string(), 1.), + (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), "blinks".to_string(), 3.), + (Arc::new(RecreateWithSkipBest::new(1, 2, random.clone())), "skip_best".to_string(), 1.), + (Arc::new(RecreateWithRegret::new(1, 3, random.clone())), "regret".to_string(), 1.), + (Arc::new(RecreateWithPerturbation::new_with_defaults(random.clone())), "perturbation".to_string(), 1.), + (Arc::new(RecreateWithGaps::new(2, 20, random.clone())), "gaps".to_string(), 1.), + (Arc::new(RecreateWithFarthest::new(random.clone())), "farthest".to_string(), 1.), + (Arc::new(RecreateWithNearestNeighbor::new(random.clone())), "nearest".to_string(), 1.), ( Arc::new(RecreateWithSkipRandom::default_explorative_phased(cheapest.clone(), random.clone())), "skip_random".to_string(), + 1., ), - (Arc::new(RecreateWithSlice::new(random.clone())), "slice".to_string()), + (Arc::new(RecreateWithSlice::new(random.clone())), "slice".to_string(), 1.), ] .into_iter() .chain( @@ -516,14 +517,14 @@ mod dynamic { move || RecreateWithCheapest::new(random.clone()) }) .enumerate() - .map(|(idx, recreate)| (recreate, format!("alternative_{idx}"))), + .map(|(idx, recreate)| (recreate, format!("alternative_{idx}"), 1.)), ) .collect() } fn get_ruins_with_limits(limits: RemovalLimits, prefix: &str) -> Vec<(Arc, String, Float)> { vec![ - (Arc::new(AdjustedStringRemoval::new_with_defaults(limits.clone())), format!("{prefix}_asr"), 2.), + (Arc::new(AdjustedStringRemoval::new_with_defaults(limits.clone())), format!("{prefix}_asr"), 7.), (Arc::new(NeighbourRemoval::new(limits.clone())), format!("{prefix}_neighbour_removal"), 5.), (Arc::new(WorstJobRemoval::new(4, limits.clone())), format!("{prefix}_worst_job"), 4.), (Arc::new(RandomJobRemoval::new(limits.clone())), format!("{prefix}_random_job_removal"), 4.), @@ -619,12 +620,12 @@ mod dynamic { recreates .iter() - .flat_map(|(recreate, recreate_name)| { - ruins.iter().map::<(TargetSearchOperator, String, Float), _>(move |(ruin, ruin_name, weight)| { + .flat_map(|(recreate, recreate_name, recreate_weight)| { + ruins.iter().map::<(TargetSearchOperator, String, Float), _>(move |(ruin, ruin_name, ruin_weight)| { ( Arc::new(RuinAndRecreate::new(ruin.clone(), recreate.clone())), format!("{ruin_name}+{recreate_name}"), - *weight, + ruin_weight * recreate_weight, ) }) }) @@ -644,11 +645,11 @@ mod dynamic { let cheapest = Arc::new(RecreateWithCheapest::new(random.clone())); let recreate = Arc::new(WeightedRecreate::new(vec![ (cheapest.clone(), 1), + (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), 3), (Arc::new(RecreateWithSkipBest::new(1, 2, random.clone())), 1), (Arc::new(RecreateWithPerturbation::new_with_defaults(random.clone())), 1), (Arc::new(RecreateWithSkipBest::new(3, 4, random.clone())), 1), (Arc::new(RecreateWithGaps::new(2, 20, random.clone())), 1), - (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), 1), (Arc::new(RecreateWithFarthest::new(random.clone())), 1), (Arc::new(RecreateWithSlice::new(random.clone())), 1), (Arc::new(RecreateWithSkipRandom::default_explorative_phased(cheapest, random.clone())), 1), @@ -661,7 +662,7 @@ mod dynamic { let ruin = Arc::new(WeightedRuin::new(vec![ ( Arc::new(CompositeRuin::new(vec![ - (Arc::new(AdjustedStringRemoval::new_with_defaults(small_limits.clone())), 1.), + (Arc::new(AdjustedStringRemoval::new_with_defaults(small_limits.clone())), 5.), (random_ruin.clone(), 0.1), ])), 1, From 10d46e919b46d88ed7f6ab892aa06890a3e0f1ca Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Fri, 9 Jan 2026 19:19:58 +0100 Subject: [PATCH 08/30] Fix potential issue in lkh search --- vrp-core/src/algorithms/lkh/kopt.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vrp-core/src/algorithms/lkh/kopt.rs b/vrp-core/src/algorithms/lkh/kopt.rs index d95eb8ce0..db73b76fb 100644 --- a/vrp-core/src/algorithms/lkh/kopt.rs +++ b/vrp-core/src/algorithms/lkh/kopt.rs @@ -31,9 +31,17 @@ where /// Tries to optimize a given path using modified Lin-Kernighan-Helsgaun algorithm. /// Returns discovered solutions in the order of their improvement. pub fn optimize(mut self, path: Path) -> Vec { + // Scale with problem size: linear growth with safety cap to prevent infinite loops + let max_iterations = (path.len() * 10).min(2000); + self.solutions.push(path); + let mut iterations = 0; while let Some(improved_path) = self.solutions.last().and_then(|p| self.improve(p.iter().copied())) { + iterations += 1; + if iterations >= max_iterations { + break; + } self.solutions.clear(); self.solutions.push(improved_path); } From 5d3ce5cd38f4530a922fc05dc2820d59c40ca778 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Fri, 9 Jan 2026 20:00:03 +0100 Subject: [PATCH 09/30] Apply some tweaks in dynamic heuristic --- rosomaxa/src/hyper/dynamic_selective.rs | 31 ++++++++++++------- .../unit/hyper/dynamic_selective_test.rs | 22 +++++-------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs index 0a911f77a..42fc3ac8c 100644 --- a/rosomaxa/src/hyper/dynamic_selective.rs +++ b/rosomaxa/src/hyper/dynamic_selective.rs @@ -159,7 +159,7 @@ where let duration = duration.as_millis() as usize; let base_reward = estimate_distance_reward(context.heuristic_ctx, context.solution, &new_solution); - let reward_multiplier = estimate_reward_perf_multiplier(&context, duration, is_new_best); + let reward_multiplier = estimate_reward_perf_multiplier(&context, duration); let reward = base_reward * reward_multiplier; let to = if is_new_best { SearchState::BestKnown } else { SearchState::Diverse }; @@ -197,13 +197,24 @@ where S: HeuristicSolution + 'a, { pub fn new(search_operators: HeuristicSearchOperators, environment: &Environment) -> Self { + // Normalize weights to achievable reward scale to prevent optimistic bias locking + // Find max weight for scaling + let max_weight = search_operators.iter().map(|(_, _, weight)| *weight).fold(0.0_f64, |a, b| a.max(b)); + + // Target max prior: rewards typically [0.75, 4.5], max theoretical ~9-10 + // 4.0 is optimistic but achievable for good operators + const TARGET_MAX_PRIOR: Float = 4.0; + let scale = if max_weight > 0.0 { TARGET_MAX_PRIOR / max_weight } else { 1.0 }; + let slot_machines = search_operators .into_iter() - .map(|(operator, name, _)| { - // TODO use initial weight as prior mean estimation? + .map(|(operator, name, initial_weight)| { + // Scale weight to reward range while preserving hierarchy + // E.g., weight 21.0 -> 4.0, weight 2.0 -> 0.38 + let prior_mean = initial_weight * scale; ( SlotMachine::new( - 1., + prior_mean, SearchAction { operator, operator_name: name.to_string() }, DefaultDistributionSampler::new(environment.random.clone()), ), @@ -363,11 +374,7 @@ where /// Estimates performance of used operation based on its duration and overall improvement statistics. /// Returns a reward multiplier in `(~0.5, 3]` range. -fn estimate_reward_perf_multiplier( - search_ctx: &SearchContext, - duration: usize, - has_improvement: bool, -) -> Float +fn estimate_reward_perf_multiplier(search_ctx: &SearchContext, duration: usize) -> Float where C: HeuristicContext, O: HeuristicObjective, @@ -385,11 +392,11 @@ where _ => 1., // Moderato }; - let improvement_ratio = match (improvement_ratio, has_improvement) { + let improvement_ratio = match improvement_ratio { // stagnation: increase reward - (ratio, true) if ratio < 0.05 => 2., + ratio if ratio < 0.05 => 2., // fast convergence: decrease reward - (ratio, true) if ratio > 0.150 => 0.75, + ratio if ratio > 0.150 => 0.75, // moderate convergence _ => 1., }; diff --git a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs index 66285b424..7d4958e89 100644 --- a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs +++ b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs @@ -56,24 +56,18 @@ fn can_estimate_median() { assert!(median > 0); } -parameterized_test! {can_estimate_reward_multiplier, (approx_median, duration, has_improvement, expected), { - can_estimate_reward_multiplier_impl(approx_median, duration, has_improvement, expected); +parameterized_test! {can_estimate_reward_multiplier, (approx_median, duration, expected), { + can_estimate_reward_multiplier_impl(approx_median, duration, expected); }} can_estimate_reward_multiplier! { - case_01_moderate: (Some(1), 1, false, 1.), - case_02_allegro: (Some(2), 1, false, 1.5), - case_03_allegretto: (Some(10), 8, false, 1.25), - case_04_andante: (Some(8), 13, false, 0.75), - case_05_moderato_improvement: (Some(1), 1, true, 2.), + case_01_moderate: (Some(1), 1, 2.0), // 1.0 (moderate) * 2.0 (stagnation bonus) + case_02_allegro: (Some(2), 1, 3.0), // 1.5 (fast) * 2.0 (stagnation bonus) + case_03_allegretto: (Some(10), 8, 2.5), // 1.25 (fast-ish) * 2.0 (stagnation bonus) + case_04_andante: (Some(8), 13, 1.5), // 0.75 (slow) * 2.0 (stagnation bonus) } -fn can_estimate_reward_multiplier_impl( - approx_median: Option, - duration: usize, - has_improvement: bool, - expected: Float, -) { +fn can_estimate_reward_multiplier_impl(approx_median: Option, duration: usize, expected: Float) { let heuristic_ctx = create_default_heuristic_context(); let solution = VectorSolution::new(vec![], 0., vec![]); let search_ctx = SearchContext { @@ -84,7 +78,7 @@ fn can_estimate_reward_multiplier_impl( approx_median, }; - let result = estimate_reward_perf_multiplier(&search_ctx, duration, has_improvement); + let result = estimate_reward_perf_multiplier(&search_ctx, duration); assert_eq!(result, expected); } From 3c268c6dce899635568f1e57f3ce795b17c5602b Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Fri, 9 Jan 2026 23:39:33 +0100 Subject: [PATCH 10/30] Fix clippy warning --- vrp-core/src/solver/search/recreate/recreate_with_blinks.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs b/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs index 286a551b9..078cb8d0b 100644 --- a/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs +++ b/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs @@ -146,7 +146,7 @@ impl InsertionEvaluator for BlinkInsertionEvaluator { best_in_route }, // Reduce: Merge results from threads (Greedy/Best selection) - |a, b| InsertionResult::choose_best_result(a, b), + InsertionResult::choose_best_result, ); match result { From afb26d501a3170c890112ada04a79fef1d7a00c7 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Sun, 11 Jan 2026 00:41:25 +0100 Subject: [PATCH 11/30] Redesign dynamic selective heuristic --- rosomaxa/src/algorithms/rl/slot_machine.rs | 120 +++++++--- rosomaxa/src/hyper/dynamic_selective.rs | 217 ++++++++++++------ .../unit/hyper/dynamic_selective_test.rs | 16 +- 3 files changed, 252 insertions(+), 101 deletions(-) diff --git a/rosomaxa/src/algorithms/rl/slot_machine.rs b/rosomaxa/src/algorithms/rl/slot_machine.rs index 2eca72c89..a3cac2732 100644 --- a/rosomaxa/src/algorithms/rl/slot_machine.rs +++ b/rosomaxa/src/algorithms/rl/slot_machine.rs @@ -4,11 +4,11 @@ mod slot_machine_test; use crate::utils::{DistributionSampler, Float}; -/// Represents an action on slot machine; +/// Represents an action on slot machine. pub trait SlotAction { /// An environment context. type Context; - /// A feedback from taking slot action. + /// A feedback from taking slot action. type Feedback: SlotFeedback; /// Take an action for given context and return reward. @@ -21,23 +21,27 @@ pub trait SlotFeedback { fn reward(&self) -> Float; } -/// Simulates a slot machine. -/// Internally tries to estimate reward probability distribution using one of methods from Thompson sampling. +/// Simulates a slot machine using Non-Stationary Thompson Sampling. +/// +/// This implementation uses a Normal-Inverse-Gamma (NIG) conjugate prior to model +/// the unknown mean and variance of the reward distribution. It employs exponential +/// decay (weighted likelihood) to handle non-stationary environments where the +/// effectiveness of operators changes over time (e.g., VRP search phases). #[derive(Clone)] pub struct SlotMachine { - /// The number of times this slot machine has been tried. + /// The number of times this slot machine has been used (telemetry only). n: usize, - /// Gamma shape parameter. + /// Shape parameter (α) of the Inverse-Gamma distribution (tracks sample count/confidence). alpha: Float, - /// Gamma rate parameter. + /// Rate parameter (β) of the Inverse-Gamma distribution (tracks sum of squared errors). beta: Float, - /// Estimated mean. + /// Estimated mean (μ) of the Normal distribution. mu: Float, - /// Estimated variance. + /// Estimated variance (E[σ²]) derived from α and β. v: Float, - /// Sampler: used to provide samples from underlying estimated distribution. + /// Sampler used to draw values from the estimated distribution. sampler: S, - /// Actual slot action function. + /// The actual action associated with this slot. action: A, } @@ -46,47 +50,109 @@ where A: SlotAction + Clone, S: DistributionSampler + Clone, { - /// Creates a new instance of `SlotMachine`. + /// Creates a new instance with Universal Priors. + /// + /// Since the `SearchAgent` externally normalizes rewards to standard scores (Z-scores), + /// we can use universal constants for the priors rather than problem-specific guesses. pub fn new(prior_mean: Float, action: A, sampler: S) -> Self { - let alpha = 1.; - let beta = 10.; - let mu = prior_mean; - let v = beta / (alpha + 1.); + // Universal priors for a Standard Normal distribution N(0, 1): + // Alpha = 2.0 implies a weak prior belief with mathematically defined variance. + // Beta = 1.0 combined with Alpha=2.0 implies an expected variance of ~1.0. + let alpha = 2.0; + let beta = 1.0; + + // Prior mean is clamped to a reasonable Z-score range (-1 to 1) to prevent + // initialization bias, though typically 0.0 is used for centered data. + let mu = prior_mean.min(1.0).max(-1.0); + + // Variance expectation for Inverse-Gamma: Beta / (Alpha - 1) + // With Alpha=2.0, this results in v=1.0. + let v = beta / (alpha - 1.0); Self { n: 0, alpha, beta, mu, v, action, sampler } } - /// Samples from estimated normal distribution. + /// Samples a reward prediction from the estimated Normal-Inverse-Gamma distribution. + /// + /// 1. Samples precision (τ) from Gamma(α, β). + /// 2. Samples reward from Normal(μ, 1/√(τ)). pub fn sample(&self) -> Float { + // Sample precision from Gamma distribution let precision = self.sampler.gamma(self.alpha, 1. / self.beta); + + // Safety: If precision is numerically zero (rare), fallback to high variance let precision = if precision == 0. || self.n == 0 { 0.001 } else { precision }; let variance = 1. / precision; self.sampler.normal(self.mu, variance.sqrt()) } - /// Plays a game by taking action within given context. As result, updates a slot state. + /// Plays the slot machine by executing the action within the given context. pub fn play(&self, context: A::Context) -> A::Feedback { self.action.take(context) } - /// Updates slot machine. + /// Updates the internal Bayesian state with a new reward observation. + /// + /// The update logic performs two key functions: + /// 1. **Decay:** Forgets old observations to adapt to the changing search landscape. + /// 2. **Bayesian Update:** Refines estimates of Mean and Variance using the new data. + /// + /// `reward` is expected to be a normalized relative value (e.g., success ≈ 1.0, failure = 0.0). pub fn update(&mut self, feedback: &A::Feedback) { let reward = feedback.reward(); - let n = 1.; - let v = self.n as Float; + // --- 1. Memory Decay (Non-Stationarity) --- - self.alpha += n / 2.; - self.beta += (n * v / (v + n)) * (reward - self.mu).powi(2) / 2.; + // A decay factor of 0.999 implies a "memory horizon" of ~1000 samples. + // This is crucial for VRP where improvements are rare (sparse signals). + // It provides enough patience to wait for "lottery ticket" wins while still + // allowing the agent to abandon operators that stop working in later phases. + const DECAY_FACTOR: Float = 0.999; - // estimate the variance: calculate running mean from the gamma hyper-parameters - self.v = self.beta / (self.alpha + 1.); + // Decay the sufficient statistics. + // CRITICAL: We clamp alpha to >= 2.0. The variance of the Inverse-Gamma + // distribution is defined as Beta / (Alpha - 1). If Alpha <= 1, variance is undefined. + // Keeping Alpha >= 2.0 ensures numerical stability and prevents division by zero. + self.alpha = (self.alpha * DECAY_FACTOR).max(2.0); + self.beta *= DECAY_FACTOR; + + // Increment usage counter (purely for human telemetry/diagnostics). + // We do not decay this value so we can track total lifetime usage. self.n += 1; - self.mu += (reward - self.mu) / self.n as Float; + + // --- 2. Bayesian Update (Normal-Gamma) --- + + // Standard update adds 0.5 to Alpha for each new observation n=1. + self.alpha += 0.5; + let old_mu = self.mu; + + // Calculate Effective N derived from shape parameter. + // In Normal-Gamma, Alpha grows by 0.5 per sample, so N ~ 2 * Alpha. + // This avoids maintaining a separate floating-point 'n' variable for the math. + let effective_n = self.alpha * 2.0; + + // Update Mean (Mu) + // Uses linear interpolation based on the effective sample size. + let learning_rate = 1.0 / effective_n; + self.mu += learning_rate * (reward - self.mu); + + // Update Variance (Beta) + // This is the Bayesian adaptation of Welford's online variance algorithm. + // It incrementally updates the sum of squared errors. + // The term `effective_n / (effective_n + 1.0)` weights the new sample's + // contribution to the variance relative to prior knowledge. + self.beta += 0.5 * (reward - old_mu).powi(2) * effective_n / (effective_n + 1.0); + + // --- 3. Variance Estimation --- + + // Calculate expected variance E[σ²] = Beta / (Alpha - 1). + // Since we enforced Alpha >= 2.0 (decayed) + 0.5 (update) = 2.5, + // the denominator is guaranteed to be >= 1.5. Safe division. + self.v = self.beta / (self.alpha - 1.0); } - /// Gets learned params (alpha, beta, mean and variants) and usage amount. + /// Gets learned params (alpha, beta, mean, variance) and usage amount. pub fn get_params(&self) -> (Float, Float, Float, Float, usize) { (self.alpha, self.beta, self.mu, self.v, self.n) } diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs index 42fc3ac8c..2d28648d9 100644 --- a/rosomaxa/src/hyper/dynamic_selective.rs +++ b/rosomaxa/src/hyper/dynamic_selective.rs @@ -51,7 +51,7 @@ where self.agent.update(generation, &feedback); - vec![feedback.solution] + feedback.solution.into_iter().collect() } fn search_many(&mut self, heuristic_ctx: &Self::Context, solutions: Vec<&Self::Solution>) -> Vec { @@ -69,7 +69,7 @@ where self.agent.save_params(generation); - feedbacks.into_iter().map(|feedback| feedback.solution).collect() + feedbacks.into_iter().filter_map(|feedback| feedback.solution).collect() } fn diversify(&self, heuristic_ctx: &Self::Context, solution: &Self::Solution) -> Vec { @@ -102,11 +102,15 @@ where } } -type SlotMachines<'a, C, O, S> = Vec<(SlotMachine, DefaultDistributionSampler>, String)>; +/// Type alias for slot machines used in Thompson sampling +pub type SlotMachines<'a, C, O, S> = Vec<(SlotMachine, DefaultDistributionSampler>, String)>; -#[derive(PartialEq, Eq, Hash, Clone)] -enum SearchState { +#[derive(PartialEq, Eq, Hash, Clone, Debug)] +/// Search state for Thompson sampling +pub enum SearchState { + /// Best known solution state BestKnown, + /// Diverse solution state Diverse, } @@ -119,10 +123,11 @@ impl Display for SearchState { } } -struct SearchFeedback { +/// Search feedback result for Thompson sampling +pub struct SearchFeedback { sample: SearchSample, slot_idx: usize, - solution: S, + solution: Option, } impl SlotFeedback for SearchFeedback { @@ -131,7 +136,8 @@ impl SlotFeedback for SearchFeedback { } } -struct SearchAction<'a, C, O, S> { +/// Search action wrapper for Thompson sampling +pub struct SearchAction<'a, C, O, S> { operator: Arc + Send + Sync + 'a>, operator_name: String, } @@ -167,11 +173,12 @@ where let sample = SearchSample { name: self.operator_name.clone(), duration, reward, transition }; - SearchFeedback { sample, slot_idx: context.slot_idx, solution: new_solution } + SearchFeedback { sample, slot_idx: context.slot_idx, solution: Some(new_solution) } } } -struct SearchContext<'a, C, O, S> +/// Search context for Thompson sampling +pub struct SearchContext<'a, C, O, S> where C: HeuristicContext, O: HeuristicObjective, @@ -185,7 +192,10 @@ where } struct SearchAgent<'a, C, O, S> { + // Separate learning contexts for different search phases slot_machines: HashMap>, + // Shared scale invariance (universal physics) + signal_stats: SignalStats, tracker: HeuristicTracker, random: Arc, } @@ -197,56 +207,52 @@ where S: HeuristicSolution + 'a, { pub fn new(search_operators: HeuristicSearchOperators, environment: &Environment) -> Self { - // Normalize weights to achievable reward scale to prevent optimistic bias locking - // Find max weight for scaling + // Calculate scaling factor to normalize operator weights to target range let max_weight = search_operators.iter().map(|(_, _, weight)| *weight).fold(0.0_f64, |a, b| a.max(b)); - - // Target max prior: rewards typically [0.75, 4.5], max theoretical ~9-10 - // 4.0 is optimistic but achievable for good operators const TARGET_MAX_PRIOR: Float = 4.0; let scale = if max_weight > 0.0 { TARGET_MAX_PRIOR / max_weight } else { 1.0 }; - let slot_machines = search_operators - .into_iter() - .map(|(operator, name, initial_weight)| { - // Scale weight to reward range while preserving hierarchy - // E.g., weight 21.0 -> 4.0, weight 2.0 -> 0.38 - let prior_mean = initial_weight * scale; - ( - SlotMachine::new( - prior_mean, - SearchAction { operator, operator_name: name.to_string() }, - DefaultDistributionSampler::new(environment.random.clone()), - ), - name, - ) - }) - .collect::>(); + // Factory function to create identical slot configurations for each state + let create_slots = || { + search_operators + .iter() + .map(|(operator, name, initial_weight)| { + let prior_mean = initial_weight * scale; + ( + SlotMachine::new( + prior_mean, + SearchAction { operator: operator.clone(), operator_name: name.to_string() }, + DefaultDistributionSampler::new(environment.random.clone()), + ), + name.clone(), + ) + }) + .collect::>() + }; - let slot_machines = once((SearchState::BestKnown, slot_machines.clone())) - .chain(once((SearchState::Diverse, slot_machines))) + // Initialize separate states with identical priors but independent learning + let slot_machines = once((SearchState::BestKnown, create_slots())) + .chain(once((SearchState::Diverse, create_slots()))) .collect(); Self { slot_machines, - tracker: HeuristicTracker { - total_median: RemedianUsize::new(11, 7, |a, b| a.cmp(b)), - search_telemetry: Default::default(), - heuristic_telemetry: Default::default(), - is_experimental: environment.is_experimental, - }, + signal_stats: SignalStats::new(), + tracker: HeuristicTracker::new(environment.is_experimental), random: environment.random.clone(), } } /// Picks relevant search operator based on learnings and runs the search. pub fn search(&self, heuristic_ctx: &C, solution: &S) -> SearchFeedback { + // Determine search context - critical for operator selection let from = if matches!(compare_to_best(heuristic_ctx, solution), Ordering::Equal) { SearchState::BestKnown } else { SearchState::Diverse }; + // Get contextually appropriate slot machines let (slot_idx, slot_machine) = self .slot_machines .get(&from) @@ -258,18 +264,45 @@ where let approx_median = self.tracker.approx_median(); + // Execute with full context information slot_machine.play(SearchContext { heuristic_ctx, from, slot_idx, solution, approx_median }) } - /// Updates estimations based on search feedback. + /// Updates estimations based on search feedback with protected signal baseline. pub fn update(&mut self, generation: usize, feedback: &SearchFeedback) { - let from = &feedback.sample.transition.0; - - if let Some(slots) = self.slot_machines.get_mut(from) { - slots[feedback.slot_idx].0.update(feedback); + let raw_reward = feedback.sample.reward; + let current_scale = self.signal_stats.scale(); + + // 1. Update Shared Signal Baseline (Protected) + // Clamp observation to 3x current scale to prevent a single + // "Best Known Jackpot" from disrupting baseline for Diverse state + if raw_reward > f64::EPSILON { + self.signal_stats.update(raw_reward.min(current_scale * 3.0)); } - self.tracker.observe_sample(generation, feedback.sample.clone()) + // 2. Normalize for Contextual Learning (Uncapped) + // If we hit a jackpot (e.g., reward 60.0 vs scale 1.0), we pass the full 60.0. + // This ensures the specific slot machine learns "I AM THE BEST" + let normalized_reward = if raw_reward > f64::EPSILON { + raw_reward / self.signal_stats.scale() + } else { + // Zero reward strategy: don't penalize, just indicate no progress + // Allows variance in "good" operators to keep them alive vs "bad" operators + 0.0 + }; + let normalized_feedback = SearchFeedback { + sample: SearchSample { reward: normalized_reward, ..feedback.sample.clone() }, + slot_idx: feedback.slot_idx, + solution: None, + }; + + // 3. Update Contextual Slot Machine + let from = &feedback.sample.transition.0; + let slots = self.slot_machines.get_mut(from).expect("cannot get slot machines"); + slots[feedback.slot_idx].0.update(&normalized_feedback); + + // 4. Observe telemetry + self.tracker.observe_sample(generation, feedback.sample.clone()); } /// Updates statistics about heuristic internal parameters. @@ -279,18 +312,36 @@ where } self.slot_machines.iter().for_each(|(state, slots)| { - slots.iter().map(|(slot, name)| (name.clone(), slot.get_params())).for_each( - |(name, (alpha, beta, mu, v, n))| { - self.tracker.observe_params( - generation, - HeuristicSample { state: state.clone(), name, alpha, beta, mu, v, n }, - ); - }, - ) + slots.iter().for_each(|(slot, name)| { + let (alpha, beta, mu, v, n) = slot.get_params(); + self.tracker.observe_params( + generation, + HeuristicSample { state: state.clone(), name: name.clone(), alpha, beta, mu, v, n }, + ); + }); }); } } +/// Sample of search telemetry. +#[derive(Clone)] +struct SearchSample { + name: String, + duration: usize, + reward: Float, + transition: (SearchState, SearchState), +} + +struct HeuristicSample { + state: SearchState, + name: String, + alpha: Float, + beta: Float, + mu: Float, + v: Float, + n: usize, +} + impl Display for DynamicSelective where C: HeuristicContext, @@ -448,26 +499,46 @@ where value * sign * priority_amplifier } -/// Sample of search telemetry. +/// Signal tracker that observes ONLY positive values to establish baseline for "Success". +/// Uses exponential moving average for stability in sparse signals. #[derive(Clone)] -struct SearchSample { - name: String, - duration: usize, - reward: Float, - transition: (SearchState, SearchState), +struct SignalStats { + mean: Float, + n: Float, } -struct HeuristicSample { - state: SearchState, - name: String, - alpha: Float, - beta: Float, - mu: Float, - v: Float, - n: usize, +impl SignalStats { + fn new() -> Self { + Self { mean: 0.0, n: 0.0 } + } + + /// Observe ONLY positive values to establish a baseline for "Success" + fn update(&mut self, value: Float) { + if value <= f64::EPSILON { + return; + } + + // Horizon: Adapt to the scale of the last ~200 SUCCESSFUL operations + // This is structural (adaptation speed), not problem-specific. + let window_size = 200.0; + let decay = 1.0 - (1.0 / window_size); + + self.n = self.n * decay + 1.0; + + // Exponential Moving Average of the Magnitude + // We use this instead of Welford's variance for stability in sparse signals + let learning_rate = 1.0 / self.n; + self.mean = self.mean * (1.0 - learning_rate) + value * learning_rate; + } + + /// Returns the scale factor. + /// If we haven't seen enough data, return 1.0 to avoid division by zero. + fn scale(&self) -> Float { + if self.mean < f64::EPSILON { 1.0 } else { self.mean } + } } -/// Provides way to track heuristic's telemetry and duration median estimation. +/// Enhanced diagnostic tracker for Thompson sampling analysis struct HeuristicTracker { total_median: RemedianUsize, search_telemetry: Vec<(usize, SearchSample)>, @@ -476,6 +547,16 @@ struct HeuristicTracker { } impl HeuristicTracker { + /// Creates new tracker with diagnostic configuration + pub fn new(is_experimental: bool) -> Self { + Self { + total_median: RemedianUsize::new(11, 7, |a, b| a.cmp(b)), + search_telemetry: Default::default(), + heuristic_telemetry: Default::default(), + is_experimental, + } + } + /// Returns true if telemetry is enabled. pub fn telemetry_enabled(&self) -> bool { self.is_experimental diff --git a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs index 7d4958e89..814091be0 100644 --- a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs +++ b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs @@ -87,17 +87,21 @@ fn can_estimate_reward_multiplier_impl(approx_median: Option, duration: u fn can_display_heuristic_info() { let is_experimental = true; let environment = Environment { is_experimental, ..Environment::default() }; - let duration = 1; - let reward = 1.; - let transition = (SearchState::Diverse, SearchState::BestKnown); - let mut heuristic = + let heuristic = DynamicSelective::::new(vec![], vec![], &environment); - heuristic.agent.tracker.observe_sample(1, SearchSample { name: "name1".to_string(), duration, reward, transition }); + // Test that diagnostic system is properly initialized + assert_eq!(heuristic.agent.tracker.telemetry_enabled(), is_experimental); let formatted = format!("{heuristic}"); - assert!(!formatted.is_empty()); + // Should contain TELEMETRY section when experimental mode is enabled + if is_experimental { + assert!(formatted.contains("TELEMETRY")); + } else { + // When not experimental, should be empty or minimal + assert!(formatted.is_empty() || !formatted.contains("thompson_diagnostics:")); + } } #[test] From 604c7e1ce5bd4ea8b7f240b242d6d1cda2343d10 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Sun, 11 Jan 2026 13:15:40 +0100 Subject: [PATCH 12/30] Change reward multiplier calculations --- rosomaxa/src/hyper/dynamic_selective.rs | 70 ++++++++++++++++++------- 1 file changed, 50 insertions(+), 20 deletions(-) diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs index 2d28648d9..92da75a38 100644 --- a/rosomaxa/src/hyper/dynamic_selective.rs +++ b/rosomaxa/src/hyper/dynamic_selective.rs @@ -423,36 +423,66 @@ where .unwrap_or(0.) } -/// Estimates performance of used operation based on its duration and overall improvement statistics. -/// Returns a reward multiplier in `(~0.5, 3]` range. +/// Estimates performance of the used operation based on its duration and the current search phase. +/// Returns a reward multiplier in the range `[0.8, 1.2]`. +/// +/// # Strategy: Context-Aware Time Dilation +/// +/// This function balances **Throughput (Speed)** vs. **Depth (Quality)** by adapting to the +/// current `improvement_1000_ratio`: +/// +/// 1. **Flow State (High Improvement):** When the solver is finding frequent improvements, +/// we prioritize **Efficiency**. Fast operators are rewarded, and slow operators are penalized. +/// This maximizes the generation of diverse individuals to populate the pool. +/// 2. **Stagnation (Low Improvement):** When the solver is stuck, we prioritize **Power**. +/// The time penalty is dampened or removed. This ensures that "Heavy" operators (e.g., +/// complex Ruin & Recreate), which are naturally slower but capable of escaping local optima, +/// are not unfairly penalized against faster, ineffective local search operators. +/// +/// # Logic +/// +/// * **Continuous Scaling:** Uses a logarithmic curve to avoid artificial "cliffs" in reward. +/// * **Phase Damping:** The time modifier is scaled by the improvement ratio. +/// * **Safety Clamp:** The final multiplier is bounded to `+/- 20%` to ensure that execution +/// time never overrides the actual quality signal (distance improvement). fn estimate_reward_perf_multiplier(search_ctx: &SearchContext, duration: usize) -> Float where C: HeuristicContext, O: HeuristicObjective, S: HeuristicSolution, { - let improvement_ratio = search_ctx.heuristic_ctx.statistics().improvement_1000_ratio; + let stats = search_ctx.heuristic_ctx.statistics(); + let improvement_ratio = stats.improvement_1000_ratio; + let approx_median = &search_ctx.approx_median; - let median_ratio = - approx_median.map_or(1., |median| if median == 0 { 1. } else { duration as Float / median as Float }); - - let median_ratio = match median_ratio.clamp(0.5, 2.) { - ratio if ratio < 0.75 => 1.5, // Allegro - ratio if ratio < 1. => 1.25, // Allegretto - ratio if ratio > 1.5 => 0.75, // Andante - _ => 1., // Moderato + let median = match approx_median { + Some(m) if *m > 0 => *m as Float, + _ => return 1.0, }; - let improvement_ratio = match improvement_ratio { - // stagnation: increase reward - ratio if ratio < 0.05 => 2., - // fast convergence: decrease reward - ratio if ratio > 0.150 => 0.75, - // moderate convergence - _ => 1., - }; + let time_ratio = duration as Float / median; + + // 1. Calculate the raw time modifier (Logarithmic) + // Fast (0.5x) -> +0.15 reward + // Slow (2.0x) -> -0.15 reward + // We use a gentle curve so we don't distort the signal too much. + let raw_modifier = (1.0 / time_ratio).ln() * 0.15; + + // 2. Apply Phase-Dependent Damping + // If improvement is HIGH (> 0.1), we care about speed. Damping = 1.0. + // If improvement is LOW (< 0.001), we ignore speed. Damping = 0.0. + // This allows slow, heavy operators to "catch up" in ranking when the easy gains are gone. + + // Smooth transition from 0.0 to 1.0 based on improvement ratio + // We saturate at 0.1 (10% improvement is considered "Fast Flow") + let phase_damping = (improvement_ratio * 10.0).clamp(0.0, 1.0); + + // Apply damping + let final_modifier = raw_modifier * phase_damping; - median_ratio * improvement_ratio + // 3. Final Safety Clamp + // Ensure we never boost/penalize by more than 20%, regardless of how extreme the time is. + (1.0 + final_modifier).clamp(0.8, 1.2) } /// Returns distance in `[-N., N]` where: From 0cc6ed623e101f45c0ae25a4187c383b1e875d2c Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Sun, 11 Jan 2026 17:05:40 +0100 Subject: [PATCH 13/30] Improve predictability of dynamic heuristic --- rosomaxa/src/algorithms/rl/slot_machine.rs | 10 +- rosomaxa/src/hyper/dynamic_selective.rs | 177 ++++++++++++++++----- 2 files changed, 139 insertions(+), 48 deletions(-) diff --git a/rosomaxa/src/algorithms/rl/slot_machine.rs b/rosomaxa/src/algorithms/rl/slot_machine.rs index a3cac2732..cb314b0fe 100644 --- a/rosomaxa/src/algorithms/rl/slot_machine.rs +++ b/rosomaxa/src/algorithms/rl/slot_machine.rs @@ -61,12 +61,12 @@ where let alpha = 2.0; let beta = 1.0; - // Prior mean is clamped to a reasonable Z-score range (-1 to 1) to prevent - // initialization bias, though typically 0.0 is used for centered data. - let mu = prior_mean.min(1.0).max(-1.0); + // Prior mean is clamped to a reasonable Z-score range (-0 to 10) + let mu = prior_mean.max(0.01).min(10.0); - // Variance expectation for Inverse-Gamma: Beta / (Alpha - 1) - // With Alpha=2.0, this results in v=1.0. + // Variance expectation v = Beta / (Alpha - 1) = 1.0. + // This implies we are "uncertain" by about +/- 1.0 standard deviation unit, + // which allows the bandit to explore even if one operator starts ahead. let v = beta / (alpha - 1.0); Self { n: 0, alpha, beta, mu, v, action, sampler } diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs index 92da75a38..3586d4520 100644 --- a/rosomaxa/src/hyper/dynamic_selective.rs +++ b/rosomaxa/src/hyper/dynamic_selective.rs @@ -105,6 +105,60 @@ where /// Type alias for slot machines used in Thompson sampling pub type SlotMachines<'a, C, O, S> = Vec<(SlotMachine, DefaultDistributionSampler>, String)>; +/// Centralized "Physics" for the Reward System. +/// +/// This struct defines the constants that control how rewards are calculated, +/// scaled, and clamped. Changing values here automatically propagates to +/// reward estimation and signal normalization logic. +struct SearchRewards; + +impl SearchRewards { + /// The base value for a standard improvement (Local Search success). + /// Used as the atomic unit for signal normalization. + pub const BASE_REWARD: Float = 2.0; + + /// The offset added to relative distance to ensure positive rewards. + /// Logic: `(distance + OFFSET)`. + pub const DISTANCE_OFFSET: Float = 1.0; + + /// Multiplier applied when a solution improves the Global Best Known. + /// This is the "Jackpot" factor. + pub const GLOBAL_BEST_MULTIPLIER: Float = 2.5; + + /// Multiplier applied when a solution is diverse but not a global improvement. + /// Keeps "Diverse" operators alive but with low signal. + pub const DIVERSE_MULTIPLIER: Float = 0.05; + + /// The maximum percentage (+/-) that execution duration can affect the reward. + /// e.g., 0.2 means performance can scale reward by [0.8, 1.2]. + pub const PERF_TOLERANCE: Float = 0.2; + + /// The "Cheap Failure" + /// The penalty applied when an operator produces no improvement. + /// A small negative value ensures that "doing nothing" is worse than "finding diverse solutions". + /// This forces the Slot Machine to eventually lower the confidence of stagnating operators. + pub const PENALTY_MIN: Float = -0.01; + + /// The "Expensive Failure" + /// The penalty applied when an operator produces a negative outcome. + /// A larger negative value ensures that failures are strongly discouraged. + pub const PENALTY_MAX: Float = -0.1; + + /// Calculates the ratio between the "Jackpot" and the "Normal" signal. + /// Used by the Agent to determine how many times larger than the average + /// a signal must be to be considered an "Outlier" that needs clamping. + pub fn signal_clamp_ratio(max_dist: Float) -> Float { + // Recalculate based on new constants + let base_term = max_dist + Self::DISTANCE_OFFSET; + let global_term = base_term * Self::GLOBAL_BEST_MULTIPLIER; + let max_theoretical = (base_term + global_term) * (1.0 + Self::PERF_TOLERANCE); + + let typical_reward = (1.0 + Self::DISTANCE_OFFSET) * Self::BASE_REWARD; + + max_theoretical / typical_reward + } +} + #[derive(PartialEq, Eq, Hash, Clone, Debug)] /// Search state for Thompson sampling pub enum SearchState { @@ -161,13 +215,42 @@ where let (new_solution, duration) = Timer::measure_duration(|| self.operator.search(context.heuristic_ctx, context.solution)); - let is_new_best = compare_to_best(context.heuristic_ctx, &new_solution) == Ordering::Less; let duration = duration.as_millis() as usize; - let base_reward = estimate_distance_reward(context.heuristic_ctx, context.solution, &new_solution); - let reward_multiplier = estimate_reward_perf_multiplier(&context, duration); - let reward = base_reward * reward_multiplier; + // 1. Analyze Result + let distance_score = estimate_distance_reward(context.heuristic_ctx, context.solution, &new_solution); + // 2. Calculate Final Reward / Penalty + let reward = match distance_score { + // SUCCESS: Apply performance multiplier (Bonus/Penalty within +/- 20%) + Some(base_score) => { + let mult = estimate_reward_perf_multiplier(&context, duration); + base_score * mult + } + // FAILURE: Apply Time-Weighted Penalty + None => { + // Get median (defensive default to duration itself to avoid div/0) + let median = context.approx_median.unwrap_or(duration).max(1) as Float; + let ratio = duration as Float / median; + + // Logic: + // Ratio 0.2 (Fast) -> -0.1 * 0.2 = -0.02 (Too small, clamp to MIN) -> -0.1 + // Ratio 1.0 (Avg) -> -0.1 * 1.0 = -0.1 + // Ratio 5.0 (Slow) -> -0.1 * 5.0 = -0.5 + // Ratio 20.0 (Very Slow) -> -2.0 (Clamp to MAX) -> -1.0 + + // Base penalty unit: -0.1 per "Median Unit of Time" + let raw_penalty = SearchRewards::PENALTY_MIN * ratio; + + // Clamp to the range [-1.0, -0.1] + // Note: min/max semantics with negative numbers: + // max(-1.0) ensures we don't go below -1. + // min(-0.1) ensures we don't go above -0.1. + raw_penalty.max(SearchRewards::PENALTY_MAX).min(SearchRewards::PENALTY_MIN) + } + }; + + let is_new_best = compare_to_best(context.heuristic_ctx, &new_solution) == Ordering::Less; let to = if is_new_best { SearchState::BestKnown } else { SearchState::Diverse }; let transition = (context.from, to); @@ -207,10 +290,17 @@ where S: HeuristicSolution + 'a, { pub fn new(search_operators: HeuristicSearchOperators, environment: &Environment) -> Self { - // Calculate scaling factor to normalize operator weights to target range - let max_weight = search_operators.iter().map(|(_, _, weight)| *weight).fold(0.0_f64, |a, b| a.max(b)); - const TARGET_MAX_PRIOR: Float = 4.0; - let scale = if max_weight > 0.0 { TARGET_MAX_PRIOR / max_weight } else { 1.0 }; + // Calculate the Mean Weight + // We want the "Average Operator" to have a prior mu = 1.0. + // This aligns with SignalStats which normalizes the average reward to 1.0. + let total_weight: Float = search_operators.iter().map(|(_, _, w)| *w).sum(); + let count = search_operators.len() as Float; + + let avg_weight = if count > 0.0 { total_weight / count } else { 1.0 }; + + // Avoid division by zero if weights are weird + let target_mean = SearchRewards::BASE_REWARD; + let scale = if avg_weight > f64::EPSILON { target_mean / avg_weight } else { target_mean }; // Factory function to create identical slot configurations for each state let create_slots = || { @@ -270,26 +360,21 @@ where /// Updates estimations based on search feedback with protected signal baseline. pub fn update(&mut self, generation: usize, feedback: &SearchFeedback) { + let max_dist = feedback.solution.as_ref().map_or(1., |s| s.fitness().count() as Float); let raw_reward = feedback.sample.reward; let current_scale = self.signal_stats.scale(); - // 1. Update Shared Signal Baseline (Protected) - // Clamp observation to 3x current scale to prevent a single - // "Best Known Jackpot" from disrupting baseline for Diverse state + // 1. Update Ruler (PHYSICS) + // We only update the "Scale of Success" based on positive outcomes. + // We do NOT shrink the ruler just because we failed. if raw_reward > f64::EPSILON { - self.signal_stats.update(raw_reward.min(current_scale * 3.0)); + let clamp_limit = current_scale * SearchRewards::signal_clamp_ratio(max_dist); + self.signal_stats.update(raw_reward.min(clamp_limit)); } - // 2. Normalize for Contextual Learning (Uncapped) - // If we hit a jackpot (e.g., reward 60.0 vs scale 1.0), we pass the full 60.0. - // This ensures the specific slot machine learns "I AM THE BEST" - let normalized_reward = if raw_reward > f64::EPSILON { - raw_reward / self.signal_stats.scale() - } else { - // Zero reward strategy: don't penalize, just indicate no progress - // Allows variance in "good" operators to keep them alive vs "bad" operators - 0.0 - }; + // 2. Normalize (Adaptive) + let normalized_reward = + if raw_reward.abs() > f64::EPSILON { raw_reward / self.signal_stats.scale() } else { 0.0 }; let normalized_feedback = SearchFeedback { sample: SearchSample { reward: normalized_reward, ..feedback.sample.clone() }, slot_idx: feedback.slot_idx, @@ -390,10 +475,8 @@ where } /// Estimates new solution discovery reward based on distance metric. -/// Returns a reward estimation in `[0, 6]` range. This range consists of: -/// - a initial distance improvement gives `[0, 2]` -/// - a best known improvement gives `[0, 2]` * BEST_DISCOVERY_REWARD_MULTIPLIER -fn estimate_distance_reward(heuristic_ctx: &C, initial_solution: &S, new_solution: &S) -> Float +/// Returns `Some(reward)` for improvement, or `None` for stagnation. +fn estimate_distance_reward(heuristic_ctx: &C, initial_solution: &S, new_solution: &S) -> Option where C: HeuristicContext, O: HeuristicObjective, @@ -403,24 +486,30 @@ where .ranked() .next() .map(|best_known| { - const BEST_DISCOVERY_REWARD_MULTIPLIER: Float = 2.; - const DIVERSE_DISCOVERY_REWARD_MULTIPLIER: Float = 0.05; - let objective = heuristic_ctx.objective(); + // Calculate normalized relative distances [0, 1] let distance_initial = get_relative_distance(objective, new_solution, initial_solution); let distance_best = get_relative_distance(objective, new_solution, best_known); - // NOTE remap distances to range [0, 2] + // Reward Components (Max ~4.0 for Local, ~10.0 for Global) + let reward_initial = (distance_initial + SearchRewards::DISTANCE_OFFSET) * SearchRewards::BASE_REWARD; + let reward_best = (distance_best + SearchRewards::DISTANCE_OFFSET) + * SearchRewards::BASE_REWARD + * SearchRewards::GLOBAL_BEST_MULTIPLIER; + match (distance_initial.total_cmp(&0.), distance_best.total_cmp(&0.)) { - (Ordering::Greater, Ordering::Greater) => { - (distance_initial + 1.) + (distance_best + 1.) * BEST_DISCOVERY_REWARD_MULTIPLIER - } - (Ordering::Greater, _) => (distance_initial + 1.) * DIVERSE_DISCOVERY_REWARD_MULTIPLIER, - _ => 0., + // Global Jackpot + (Ordering::Greater, Ordering::Greater) => Some(reward_initial + reward_best), + + // Local/Diverse Improvement + (Ordering::Greater, _) => Some(reward_initial * SearchRewards::DIVERSE_MULTIPLIER), + + // Stagnation + _ => None, } }) - .unwrap_or(0.) + .unwrap_or(None) } /// Estimates performance of the used operation based on its duration and the current search phase. @@ -480,14 +569,14 @@ where // Apply damping let final_modifier = raw_modifier * phase_damping; - // 3. Final Safety Clamp - // Ensure we never boost/penalize by more than 20%, regardless of how extreme the time is. - (1.0 + final_modifier).clamp(0.8, 1.2) + // 3. Final Safety Clamp defined by Reward Physics + let tolerance = SearchRewards::PERF_TOLERANCE; + (1.0 + final_modifier).clamp(1.0 - tolerance, 1.0 + tolerance) } -/// Returns distance in `[-N., N]` where: -/// - N: in range `[1, total amount of objectives]` -/// - sign specifies whether a solution is better (positive) or worse (negative). +/// Returns normalized distance in `[0.0, 1.0]` (absolute magnitude) +/// where 1.0 = Improvement on Primary Objective. +/// Returns negative value if worse. fn get_relative_distance(objective: &O, a: &S, b: &S) -> Float where O: HeuristicObjective, @@ -517,7 +606,9 @@ where let total_objectives = a.fitness().count(); assert_ne!(total_objectives, 0, "cannot have an empty objective here"); assert_ne!(total_objectives, idx, "cannot have the index equal to total amount of objectives"); - let priority_amplifier = (total_objectives - idx) as Float; + + // Normalization: Divide by total_objectives to map to [0, 1] + let priority_amplifier = (total_objectives - idx) as Float / total_objectives as Float; let value = a .fitness() From 647d4e6343c097073ce00c9258d5cdd7d11f27ed Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Sun, 11 Jan 2026 18:23:55 +0100 Subject: [PATCH 14/30] Improve comments --- rosomaxa/src/hyper/dynamic_selective.rs | 234 +++++++++++------------- 1 file changed, 104 insertions(+), 130 deletions(-) diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs index 3586d4520..3fec3874f 100644 --- a/rosomaxa/src/hyper/dynamic_selective.rs +++ b/rosomaxa/src/hyper/dynamic_selective.rs @@ -24,7 +24,7 @@ pub type HeuristicDiversifyOperators = /// An experimental dynamic selective hyper heuristic which selects inner heuristics /// based on how they work during the search. The selection process is modeled using reinforcement -/// learning technics. +/// learning techniques. pub struct DynamicSelective where C: HeuristicContext, @@ -102,7 +102,7 @@ where } } -/// Type alias for slot machines used in Thompson sampling +/// Type alias for slot machines used in Thompson sampling. pub type SlotMachines<'a, C, O, S> = Vec<(SlotMachine, DefaultDistributionSampler>, String)>; /// Centralized "Physics" for the Reward System. @@ -159,12 +159,12 @@ impl SearchRewards { } } +/// Search state for Thompson sampling. #[derive(PartialEq, Eq, Hash, Clone, Debug)] -/// Search state for Thompson sampling pub enum SearchState { - /// Best known solution state + /// Best known solution state. BestKnown, - /// Diverse solution state + /// Diverse solution state. Diverse, } @@ -177,7 +177,7 @@ impl Display for SearchState { } } -/// Search feedback result for Thompson sampling +/// Search feedback result for Thompson sampling. pub struct SearchFeedback { sample: SearchSample, slot_idx: usize, @@ -190,7 +190,7 @@ impl SlotFeedback for SearchFeedback { } } -/// Search action wrapper for Thompson sampling +/// Search action wrapper for Thompson sampling. pub struct SearchAction<'a, C, O, S> { operator: Arc + Send + Sync + 'a>, operator_name: String, @@ -217,35 +217,36 @@ where let duration = duration.as_millis() as usize; - // 1. Analyze Result + // 1. Analyze result. let distance_score = estimate_distance_reward(context.heuristic_ctx, context.solution, &new_solution); - // 2. Calculate Final Reward / Penalty + // 2. Calculate final reward / penalty. let reward = match distance_score { - // SUCCESS: Apply performance multiplier (Bonus/Penalty within +/- 20%) + // Success: Apply performance multiplier (bonus/penalty within +/- 20%). Some(base_score) => { let mult = estimate_reward_perf_multiplier(&context, duration); base_score * mult } - // FAILURE: Apply Time-Weighted Penalty + // Failure: Apply time-weighted penalty. None => { - // Get median (defensive default to duration itself to avoid div/0) + // Get median (defensive default to duration itself to avoid div/0). let median = context.approx_median.unwrap_or(duration).max(1) as Float; let ratio = duration as Float / median; // Logic: - // Ratio 0.2 (Fast) -> -0.1 * 0.2 = -0.02 (Too small, clamp to MIN) -> -0.1 - // Ratio 1.0 (Avg) -> -0.1 * 1.0 = -0.1 - // Ratio 5.0 (Slow) -> -0.1 * 5.0 = -0.5 - // Ratio 20.0 (Very Slow) -> -2.0 (Clamp to MAX) -> -1.0 + // Ratio 0.2 (Fast) -> -0.01 * 0.2 = -0.002 (Too small, clamp to MIN) -> -0.01 + // Ratio 1.0 (Avg) -> -0.01 * 1.0 = -0.01 + // Ratio 5.0 (Slow) -> -0.01 * 5.0 = -0.05 + // Ratio 10.0 (Slower) -> -0.01 * 10.0 = -0.1 (At MAX boundary) + // Ratio 20.0 (Very Slow) -> -0.01 * 20.0 = -0.2 (Clamp to MAX) -> -0.1 - // Base penalty unit: -0.1 per "Median Unit of Time" + // Base penalty unit: PENALTY_MIN (-0.01) per "Median Unit of Time". let raw_penalty = SearchRewards::PENALTY_MIN * ratio; - // Clamp to the range [-1.0, -0.1] + // Clamp to the range [PENALTY_MAX, PENALTY_MIN] = [-0.1, -0.01]. // Note: min/max semantics with negative numbers: - // max(-1.0) ensures we don't go below -1. - // min(-0.1) ensures we don't go above -0.1. + // - max(PENALTY_MAX) ensures we don't go below -0.1 (more negative). + // - min(PENALTY_MIN) ensures we don't go above -0.01 (less negative). raw_penalty.max(SearchRewards::PENALTY_MAX).min(SearchRewards::PENALTY_MIN) } }; @@ -260,7 +261,7 @@ where } } -/// Search context for Thompson sampling +/// Search context for Thompson sampling. pub struct SearchContext<'a, C, O, S> where C: HeuristicContext, @@ -333,16 +334,16 @@ where } } - /// Picks relevant search operator based on learnings and runs the search. + /// Picks the relevant search operator based on learnings and runs the search. pub fn search(&self, heuristic_ctx: &C, solution: &S) -> SearchFeedback { - // Determine search context - critical for operator selection + // Determine search context - critical for operator selection. let from = if matches!(compare_to_best(heuristic_ctx, solution), Ordering::Equal) { SearchState::BestKnown } else { SearchState::Diverse }; - // Get contextually appropriate slot machines + // Get contextually appropriate slot machines. let (slot_idx, slot_machine) = self .slot_machines .get(&from) @@ -354,7 +355,7 @@ where let approx_median = self.tracker.approx_median(); - // Execute with full context information + // Execute with full context information. slot_machine.play(SearchContext { heuristic_ctx, from, slot_idx, solution, approx_median }) } @@ -364,7 +365,7 @@ where let raw_reward = feedback.sample.reward; let current_scale = self.signal_stats.scale(); - // 1. Update Ruler (PHYSICS) + // 1. Update ruler. // We only update the "Scale of Success" based on positive outcomes. // We do NOT shrink the ruler just because we failed. if raw_reward > f64::EPSILON { @@ -372,7 +373,7 @@ where self.signal_stats.update(raw_reward.min(clamp_limit)); } - // 2. Normalize (Adaptive) + // 2. Normalize. let normalized_reward = if raw_reward.abs() > f64::EPSILON { raw_reward / self.signal_stats.scale() } else { 0.0 }; let normalized_feedback = SearchFeedback { @@ -381,12 +382,12 @@ where solution: None, }; - // 3. Update Contextual Slot Machine + // 3. Update contextual slot machine. let from = &feedback.sample.transition.0; let slots = self.slot_machines.get_mut(from).expect("cannot get slot machines"); slots[feedback.slot_idx].0.update(&normalized_feedback); - // 4. Observe telemetry + // 4. Observe telemetry. self.tracker.observe_sample(generation, feedback.sample.clone()); } @@ -408,59 +409,6 @@ where } } -/// Sample of search telemetry. -#[derive(Clone)] -struct SearchSample { - name: String, - duration: usize, - reward: Float, - transition: (SearchState, SearchState), -} - -struct HeuristicSample { - state: SearchState, - name: String, - alpha: Float, - beta: Float, - mu: Float, - v: Float, - n: usize, -} - -impl Display for DynamicSelective -where - C: HeuristicContext, - O: HeuristicObjective, - S: HeuristicSolution, -{ - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - if !self.agent.tracker.telemetry_enabled() { - return Ok(()); - } - - f.write_fmt(format_args!("TELEMETRY\n"))?; - f.write_fmt(format_args!("search:\n"))?; - f.write_fmt(format_args!("name,generation,reward,from,to,duration\n"))?; - for (generation, sample) in self.agent.tracker.search_telemetry.iter() { - f.write_fmt(format_args!( - "{},{},{},{},{},{}\n", - sample.name, generation, sample.reward, sample.transition.0, sample.transition.1, sample.duration - ))?; - } - - f.write_fmt(format_args!("heuristic:\n"))?; - f.write_fmt(format_args!("generation,state,name,alpha,beta,mu,v,n\n"))?; - for (generation, sample) in self.agent.tracker.heuristic_telemetry.iter() { - f.write_fmt(format_args!( - "{},{},{},{},{},{},{},{}\n", - generation, sample.state, sample.name, sample.alpha, sample.beta, sample.mu, sample.v, sample.n - ))?; - } - - Ok(()) - } -} - fn compare_to_best(heuristic_ctx: &C, solution: &S) -> Ordering where C: HeuristicContext, @@ -488,11 +436,11 @@ where .map(|best_known| { let objective = heuristic_ctx.objective(); - // Calculate normalized relative distances [0, 1] + // Calculate normalized relative distances [0, 1]. let distance_initial = get_relative_distance(objective, new_solution, initial_solution); let distance_best = get_relative_distance(objective, new_solution, best_known); - // Reward Components (Max ~4.0 for Local, ~10.0 for Global) + // Reward components (max ~4.0 for local, ~10.0 for global). let reward_initial = (distance_initial + SearchRewards::DISTANCE_OFFSET) * SearchRewards::BASE_REWARD; let reward_best = (distance_best + SearchRewards::DISTANCE_OFFSET) * SearchRewards::BASE_REWARD @@ -514,26 +462,6 @@ where /// Estimates performance of the used operation based on its duration and the current search phase. /// Returns a reward multiplier in the range `[0.8, 1.2]`. -/// -/// # Strategy: Context-Aware Time Dilation -/// -/// This function balances **Throughput (Speed)** vs. **Depth (Quality)** by adapting to the -/// current `improvement_1000_ratio`: -/// -/// 1. **Flow State (High Improvement):** When the solver is finding frequent improvements, -/// we prioritize **Efficiency**. Fast operators are rewarded, and slow operators are penalized. -/// This maximizes the generation of diverse individuals to populate the pool. -/// 2. **Stagnation (Low Improvement):** When the solver is stuck, we prioritize **Power**. -/// The time penalty is dampened or removed. This ensures that "Heavy" operators (e.g., -/// complex Ruin & Recreate), which are naturally slower but capable of escaping local optima, -/// are not unfairly penalized against faster, ineffective local search operators. -/// -/// # Logic -/// -/// * **Continuous Scaling:** Uses a logarithmic curve to avoid artificial "cliffs" in reward. -/// * **Phase Damping:** The time modifier is scaled by the improvement ratio. -/// * **Safety Clamp:** The final multiplier is bounded to `+/- 20%` to ensure that execution -/// time never overrides the actual quality signal (distance improvement). fn estimate_reward_perf_multiplier(search_ctx: &SearchContext, duration: usize) -> Float where C: HeuristicContext, @@ -551,32 +479,23 @@ where let time_ratio = duration as Float / median; - // 1. Calculate the raw time modifier (Logarithmic) - // Fast (0.5x) -> +0.15 reward - // Slow (2.0x) -> -0.15 reward - // We use a gentle curve so we don't distort the signal too much. + // Calculate the raw time modifier (logarithmic). let raw_modifier = (1.0 / time_ratio).ln() * 0.15; - // 2. Apply Phase-Dependent Damping - // If improvement is HIGH (> 0.1), we care about speed. Damping = 1.0. - // If improvement is LOW (< 0.001), we ignore speed. Damping = 0.0. - // This allows slow, heavy operators to "catch up" in ranking when the easy gains are gone. - - // Smooth transition from 0.0 to 1.0 based on improvement ratio - // We saturate at 0.1 (10% improvement is considered "Fast Flow") + // Apply phase-dependent damping. + // Smooth transition from 0.0 to 1.0 based on improvement ratio. + // We saturate at 0.1 (10% improvement is considered "Fast Flow"). let phase_damping = (improvement_ratio * 10.0).clamp(0.0, 1.0); - - // Apply damping let final_modifier = raw_modifier * phase_damping; - // 3. Final Safety Clamp defined by Reward Physics + // Final safety clamp defined by reward physics. let tolerance = SearchRewards::PERF_TOLERANCE; (1.0 + final_modifier).clamp(1.0 - tolerance, 1.0 + tolerance) } -/// Returns normalized distance in `[0.0, 1.0]` (absolute magnitude) +/// Returns the normalized distance in `[0.0, 1.0]` (absolute magnitude) /// where 1.0 = Improvement on Primary Objective. -/// Returns negative value if worse. +/// Returns a negative value if worse. fn get_relative_distance(objective: &O, a: &S, b: &S) -> Float where O: HeuristicObjective, @@ -607,7 +526,7 @@ where assert_ne!(total_objectives, 0, "cannot have an empty objective here"); assert_ne!(total_objectives, idx, "cannot have the index equal to total amount of objectives"); - // Normalization: Divide by total_objectives to map to [0, 1] + // Normalization: Divide by total_objectives to map to [0, 1]. let priority_amplifier = (total_objectives - idx) as Float / total_objectives as Float; let value = a @@ -620,7 +539,7 @@ where value * sign * priority_amplifier } -/// Signal tracker that observes ONLY positive values to establish baseline for "Success". +/// Signal tracker that observes ONLY positive values to establish a baseline for "Success". /// Uses exponential moving average for stability in sparse signals. #[derive(Clone)] struct SignalStats { @@ -633,21 +552,21 @@ impl SignalStats { Self { mean: 0.0, n: 0.0 } } - /// Observe ONLY positive values to establish a baseline for "Success" + /// Observes ONLY positive values to establish a baseline for "Success". fn update(&mut self, value: Float) { if value <= f64::EPSILON { return; } - // Horizon: Adapt to the scale of the last ~200 SUCCESSFUL operations + // Horizon: Adapt to the scale of the last ~200 successful operations. // This is structural (adaptation speed), not problem-specific. let window_size = 200.0; let decay = 1.0 - (1.0 / window_size); self.n = self.n * decay + 1.0; - // Exponential Moving Average of the Magnitude - // We use this instead of Welford's variance for stability in sparse signals + // Exponential moving average of the magnitude. + // We use this instead of Welford's variance for stability in sparse signals. let learning_rate = 1.0 / self.n; self.mean = self.mean * (1.0 - learning_rate) + value * learning_rate; } @@ -659,7 +578,7 @@ impl SignalStats { } } -/// Enhanced diagnostic tracker for Thompson sampling analysis +/// Enhanced diagnostic tracker for Thompson sampling analysis. struct HeuristicTracker { total_median: RemedianUsize, search_telemetry: Vec<(usize, SearchSample)>, @@ -668,7 +587,7 @@ struct HeuristicTracker { } impl HeuristicTracker { - /// Creates new tracker with diagnostic configuration + /// Creates a new tracker with diagnostic configuration. pub fn new(is_experimental: bool) -> Self { Self { total_median: RemedianUsize::new(11, 7, |a, b| a.cmp(b)), @@ -683,12 +602,12 @@ impl HeuristicTracker { self.is_experimental } - /// Returns median approximation. + /// Returns the median approximation. pub fn approx_median(&self) -> Option { self.total_median.approx_median() } - /// Observes a current sample. Updates total duration median. + /// Observes the current sample and updates the total duration median. pub fn observe_sample(&mut self, generation: usize, sample: SearchSample) { self.total_median.add_observation(sample.duration); if self.telemetry_enabled() { @@ -696,9 +615,64 @@ impl HeuristicTracker { } } + /// Observes heuristic parameters for telemetry tracking. pub fn observe_params(&mut self, generation: usize, sample: HeuristicSample) { if self.telemetry_enabled() { self.heuristic_telemetry.push((generation, sample)); } } } + +/// A sample of search telemetry. +#[derive(Clone)] +struct SearchSample { + name: String, + duration: usize, + reward: Float, + transition: (SearchState, SearchState), +} + +/// A sample of heuristic parameters telemetry. +struct HeuristicSample { + state: SearchState, + name: String, + alpha: Float, + beta: Float, + mu: Float, + v: Float, + n: usize, +} + +impl Display for DynamicSelective +where + C: HeuristicContext, + O: HeuristicObjective, + S: HeuristicSolution, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + if !self.agent.tracker.telemetry_enabled() { + return Ok(()); + } + + f.write_fmt(format_args!("TELEMETRY\n"))?; + f.write_fmt(format_args!("search:\n"))?; + f.write_fmt(format_args!("name,generation,reward,from,to,duration\n"))?; + for (generation, sample) in self.agent.tracker.search_telemetry.iter() { + f.write_fmt(format_args!( + "{},{},{},{},{},{}\n", + sample.name, generation, sample.reward, sample.transition.0, sample.transition.1, sample.duration + ))?; + } + + f.write_fmt(format_args!("heuristic:\n"))?; + f.write_fmt(format_args!("generation,state,name,alpha,beta,mu,v,n\n"))?; + for (generation, sample) in self.agent.tracker.heuristic_telemetry.iter() { + f.write_fmt(format_args!( + "{},{},{},{},{},{},{},{}\n", + generation, sample.state, sample.name, sample.alpha, sample.beta, sample.mu, sample.v, sample.n + ))?; + } + + Ok(()) + } +} From 5ac3ce3913a87c76a4bab084b02773cd7da66a81 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Sun, 11 Jan 2026 21:25:32 +0100 Subject: [PATCH 15/30] Improve heuristic operators list --- vrp-core/src/solver/heuristic.rs | 135 +++++++++++++------------------ 1 file changed, 56 insertions(+), 79 deletions(-) diff --git a/vrp-core/src/solver/heuristic.rs b/vrp-core/src/solver/heuristic.rs index 0a0762b81..cc8c544f5 100644 --- a/vrp-core/src/solver/heuristic.rs +++ b/vrp-core/src/solver/heuristic.rs @@ -408,7 +408,6 @@ mod statik { (Arc::new(RecreateWithGaps::new(2, 20, random.clone())), 5), (Arc::new(RecreateWithFarthest::new(random.clone())), 2), (Arc::new(RecreateWithSkipBest::new(4, 8, random.clone())), 2), - (Arc::new(RecreateWithNearestNeighbor::new(random.clone())), 1), (Arc::new(RecreateWithSlice::new(random.clone())), 1), ( Arc::new(RecreateWithSkipRandom::default_explorative_phased( @@ -492,7 +491,7 @@ mod statik { mod dynamic { use super::*; - fn get_recreates(problem: &Problem, random: Arc) -> Vec<(Arc, String, Float)> { + fn get_weighted_recreates(problem: &Problem, random: Arc) -> Vec<(Arc, String, Float)> { let cheapest: Arc = Arc::new(RecreateWithCheapest::new(random.clone())); vec![ (cheapest.clone(), "cheapest".to_string(), 1.), @@ -502,7 +501,6 @@ mod dynamic { (Arc::new(RecreateWithPerturbation::new_with_defaults(random.clone())), "perturbation".to_string(), 1.), (Arc::new(RecreateWithGaps::new(2, 20, random.clone())), "gaps".to_string(), 1.), (Arc::new(RecreateWithFarthest::new(random.clone())), "farthest".to_string(), 1.), - (Arc::new(RecreateWithNearestNeighbor::new(random.clone())), "nearest".to_string(), 1.), ( Arc::new(RecreateWithSkipRandom::default_explorative_phased(cheapest.clone(), random.clone())), "skip_random".to_string(), @@ -522,27 +520,35 @@ mod dynamic { .collect() } - fn get_ruins_with_limits(limits: RemovalLimits, prefix: &str) -> Vec<(Arc, String, Float)> { + /// Returns weighted ruin strategies that combine normal and small limits with 1:2 ratio. + /// This reduces operator count while preserving exploration of both limit ranges. + fn get_weighted_ruins( + problem: Arc, + normal_limits: RemovalLimits, + small_limits: RemovalLimits, + ) -> Vec<(Arc, String, Float)> { + // Helper to create weighted ruin combining normal and small limits (1:2 ratio, favoring small) + let create_weighted = |factory: fn(RemovalLimits) -> Arc| { + Arc::new(WeightedRuin::new(vec![(factory(normal_limits.clone()), 2), (factory(small_limits.clone()), 1)])) + as Arc + }; + vec![ - (Arc::new(AdjustedStringRemoval::new_with_defaults(limits.clone())), format!("{prefix}_asr"), 7.), - (Arc::new(NeighbourRemoval::new(limits.clone())), format!("{prefix}_neighbour_removal"), 5.), - (Arc::new(WorstJobRemoval::new(4, limits.clone())), format!("{prefix}_worst_job"), 4.), - (Arc::new(RandomJobRemoval::new(limits.clone())), format!("{prefix}_random_job_removal"), 4.), - (Arc::new(RandomRouteRemoval::new(limits.clone())), format!("{prefix}_random_route_removal"), 2.), - (Arc::new(CloseRouteRemoval::new(limits.clone())), format!("{prefix}_close_route_removal"), 4.), - (Arc::new(WorstRouteRemoval::new(limits)), format!("{prefix}_worst_route_removal"), 5.), + ( + create_weighted(|limits| Arc::new(AdjustedStringRemoval::new_with_defaults(limits))), + "asr".to_string(), + 7., + ), + (create_weighted(|limits| Arc::new(NeighbourRemoval::new(limits))), "neighbour".to_string(), 5.), + (create_weighted(|limits| Arc::new(WorstRouteRemoval::new(limits))), "worst_route".to_string(), 5.), + (create_weighted(|limits| Arc::new(WorstJobRemoval::new(4, limits))), "worst_job".to_string(), 4.), + (create_weighted(|limits| Arc::new(CloseRouteRemoval::new(limits))), "close_route".to_string(), 4.), + (create_weighted(|limits| Arc::new(RandomJobRemoval::new(limits))), "random_job".to_string(), 4.), + (create_weighted(|limits| Arc::new(RandomRouteRemoval::new(limits))), "random_route".to_string(), 2.), + (Arc::new(ClusterRemoval::new_with_defaults(problem).unwrap()), "cluster".to_string(), 4.), ] } - fn get_ruins(problem: Arc) -> Vec<(Arc, String, Float)> { - vec![( - // TODO avoid unwrap - Arc::new(ClusterRemoval::new_with_defaults(problem.clone()).unwrap()), - "cluster_removal".to_string(), - 4., - )] - } - fn get_mutations( problem: Arc, environment: Arc, @@ -596,17 +602,11 @@ mod dynamic { // NOTE: consider checking usage of names within heuristic filter before changing them - let recreates = get_recreates(problem.as_ref(), random.clone()); - - let ruins = get_ruins_with_limits(normal_limits.clone(), "normal") - .into_iter() - .chain(get_ruins_with_limits(small_limits.clone(), "small")) - .chain(get_ruins(problem.clone())) - .collect::>(); - + let recreates = get_weighted_recreates(problem.as_ref(), random.clone()); + let ruins = get_weighted_ruins(problem.clone(), normal_limits.clone(), small_limits.clone()); let extra_random_job = Arc::new(RandomJobRemoval::new(small_limits)); - // NOTE we need to wrap any of ruin methods in composite which calls restore context before recreate + // Wrap ruins in composite which calls restore context before recreate let ruins = ruins .into_iter() .map::<(Arc, String, Float), _>(|(ruin, name, weight)| { @@ -614,11 +614,7 @@ mod dynamic { }) .collect::>(); - let mutations = get_mutations(problem.clone(), environment.clone()); - - let heuristic_filter = problem.extras.get_heuristic_filter(); - - recreates + let ruin_recreate_ops = recreates .iter() .flat_map(|(recreate, recreate_name, recreate_weight)| { ruins.iter().map::<(TargetSearchOperator, String, Float), _>(move |(ruin, ruin_name, ruin_weight)| { @@ -629,61 +625,39 @@ mod dynamic { ) }) }) + .collect::>(); + + let mutations = get_mutations(problem.clone(), environment.clone()); + let heuristic_filter = problem.extras.get_heuristic_filter(); + + ruin_recreate_ops + .into_iter() .chain(mutations) .filter(|(_, name, _)| heuristic_filter.as_ref().is_none_or(|filter| (filter)(name.as_str()))) .collect::>() } + /// Creates a default ruin-and-recreate operator for internal use (e.g., decompose search, infeasible search). + /// Uses weighted ruins and recreates from the main operator building logic. pub fn create_default_inner_ruin_recreate( problem: Arc, environment: Arc, ) -> Arc { - let (_, small_limits) = get_limits(problem.as_ref()); + let (normal_limits, small_limits) = get_limits(problem.as_ref()); let random = environment.random.clone(); - // initialize recreate - let cheapest = Arc::new(RecreateWithCheapest::new(random.clone())); - let recreate = Arc::new(WeightedRecreate::new(vec![ - (cheapest.clone(), 1), - (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), 3), - (Arc::new(RecreateWithSkipBest::new(1, 2, random.clone())), 1), - (Arc::new(RecreateWithPerturbation::new_with_defaults(random.clone())), 1), - (Arc::new(RecreateWithSkipBest::new(3, 4, random.clone())), 1), - (Arc::new(RecreateWithGaps::new(2, 20, random.clone())), 1), - (Arc::new(RecreateWithFarthest::new(random.clone())), 1), - (Arc::new(RecreateWithSlice::new(random.clone())), 1), - (Arc::new(RecreateWithSkipRandom::default_explorative_phased(cheapest, random.clone())), 1), - ])); + let recreates = get_weighted_recreates(problem.as_ref(), random.clone()); + let ruins = get_weighted_ruins(problem.clone(), normal_limits, small_limits); - // initialize ruin - let random_route = Arc::new(RandomRouteRemoval::new(small_limits.clone())); - let random_job = Arc::new(RandomJobRemoval::new(small_limits.clone())); - let random_ruin = Arc::new(WeightedRuin::new(vec![(random_job.clone(), 10), (random_route.clone(), 1)])); - let ruin = Arc::new(WeightedRuin::new(vec![ - ( - Arc::new(CompositeRuin::new(vec![ - (Arc::new(AdjustedStringRemoval::new_with_defaults(small_limits.clone())), 5.), - (random_ruin.clone(), 0.1), - ])), - 1, - ), - ( - Arc::new(CompositeRuin::new(vec![ - (Arc::new(NeighbourRemoval::new(small_limits.clone())), 1.), - (random_ruin.clone(), 0.1), - ])), - 1, - ), - ( - Arc::new(CompositeRuin::new(vec![ - (Arc::new(WorstJobRemoval::new(4, small_limits)), 1.), - (random_ruin.clone(), 0.1), - ])), - 1, - ), - ])); + // Convert to weighted format (drop names, keep weights) + let weighted_ruins = ruins.into_iter().map(|(ruin, _, weight)| (ruin, weight as usize)).collect(); + let weighted_recreates = + recreates.into_iter().map(|(recreate, _, weight)| (recreate, weight as usize)).collect(); - Arc::new(RuinAndRecreate::new(ruin, recreate)) + Arc::new(RuinAndRecreate::new( + Arc::new(WeightedRuin::new(weighted_ruins)), + Arc::new(WeightedRecreate::new(weighted_recreates)), + )) } pub fn create_default_local_search(random: Arc) -> Arc { @@ -719,9 +693,12 @@ mod dynamic { } fn create_composite_decompose_search(problem: Arc, environment: Arc) -> TargetSearchOperator { - let limits = RemovalLimits { removed_activities_range: (10..100), affected_routes_range: 1..1 }; - let route_removal_operator = - Arc::new(RuinAndRecreate::new(Arc::new(RandomRouteRemoval::new(limits)), Arc::new(DummyRecreate))); + let limits = RemovalLimits { removed_activities_range: (10..100), affected_routes_range: 1..2 }; + let ruin = WeightedRuin::new(vec![ + (Arc::new(RandomRouteRemoval::new(limits.clone())), 1), + (Arc::new(WorstRouteRemoval::new(limits)), 1), + ]); + let route_removal_operator = Arc::new(RuinAndRecreate::new(Arc::new(ruin), Arc::new(DummyRecreate))); Arc::new(DecomposeSearch::new( Arc::new(CompositeHeuristicOperator::new(vec![ From bf82f5f7ca6ed2edffb7b712754be2f86c0febc0 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Sun, 11 Jan 2026 21:45:54 +0100 Subject: [PATCH 16/30] Fix broken tests --- rosomaxa/src/hyper/dynamic_selective.rs | 4 +- .../unit/algorithms/rl/slot_machine_test.rs | 16 ++++--- .../unit/hyper/dynamic_selective_test.rs | 45 +++++++++++++++---- 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs index 3fec3874f..314c0427c 100644 --- a/rosomaxa/src/hyper/dynamic_selective.rs +++ b/rosomaxa/src/hyper/dynamic_selective.rs @@ -468,9 +468,7 @@ where O: HeuristicObjective, S: HeuristicSolution, { - let stats = search_ctx.heuristic_ctx.statistics(); - let improvement_ratio = stats.improvement_1000_ratio; - + let improvement_ratio = search_ctx.heuristic_ctx.statistics().improvement_1000_ratio; let approx_median = &search_ctx.approx_median; let median = match approx_median { Some(m) if *m > 0 => *m as Float, diff --git a/rosomaxa/tests/unit/algorithms/rl/slot_machine_test.rs b/rosomaxa/tests/unit/algorithms/rl/slot_machine_test.rs index b8c2b3c5b..9fc616356 100644 --- a/rosomaxa/tests/unit/algorithms/rl/slot_machine_test.rs +++ b/rosomaxa/tests/unit/algorithms/rl/slot_machine_test.rs @@ -1,6 +1,6 @@ use super::*; use crate::helpers::utils::create_test_random; -use crate::utils::{DefaultDistributionSampler, random_argmax}; +use crate::utils::DefaultDistributionSampler; #[derive(Clone)] struct TestAction(DefaultDistributionSampler); @@ -35,7 +35,7 @@ fn can_find_proper_estimations() { let slot_means: &[Float; 5] = &[5., 9., 7., 13., 11.]; let slot_vars: &[Float; 5] = &[2., 3., 4., 6., 1.]; let prior_mean = 1.; - let attempts = 1000; + let attempts_per_slot = 1000; let delta = 2.; let random = create_test_random(); @@ -44,11 +44,13 @@ fn can_find_proper_estimations() { .map(|_| SlotMachine::new(prior_mean, TestAction(sampler.clone()), sampler.clone())) .collect::>(); - for _ in 0..attempts { - let slot_idx = random_argmax(slots.iter().map(|slot| slot.sample()), random.as_ref()).unwrap(); - let slot = &mut slots[slot_idx]; - let feedback = slot.play((slot_means[slot_idx], slot_vars[slot_idx].sqrt())); - slot.update(&feedback); + // Play each slot independently to test estimation convergence + for slot_idx in 0..sockets { + for _ in 0..attempts_per_slot { + let slot = &mut slots[slot_idx]; + let feedback = slot.play((slot_means[slot_idx], slot_vars[slot_idx])); + slot.update(&feedback); + } } slots diff --git a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs index 814091be0..4d0449243 100644 --- a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs +++ b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs @@ -56,19 +56,46 @@ fn can_estimate_median() { assert!(median > 0); } -parameterized_test! {can_estimate_reward_multiplier, (approx_median, duration, expected), { - can_estimate_reward_multiplier_impl(approx_median, duration, expected); +parameterized_test! {can_estimate_reward_multiplier, (improvement_ratio, approx_median, duration, expected), { + can_estimate_reward_multiplier_impl(improvement_ratio, approx_median, duration, expected); }} can_estimate_reward_multiplier! { - case_01_moderate: (Some(1), 1, 2.0), // 1.0 (moderate) * 2.0 (stagnation bonus) - case_02_allegro: (Some(2), 1, 3.0), // 1.5 (fast) * 2.0 (stagnation bonus) - case_03_allegretto: (Some(10), 8, 2.5), // 1.25 (fast-ish) * 2.0 (stagnation bonus) - case_04_andante: (Some(8), 13, 1.5), // 0.75 (slow) * 2.0 (stagnation bonus) + case_01_fast_with_improvement: (0.1, Some(10), 5, 1.104), // Fast operator during improvement: ln(2.0)*0.15*1.0 ≈ 0.104 + case_02_slow_with_improvement: (0.1, Some(10), 20, 0.896), // Slow operator during improvement: ln(0.5)*0.15*1.0 ≈ -0.104 + case_03_fast_partial_improvement: (0.05, Some(10), 5, 1.052), // Fast with 50% damping: ln(2.0)*0.15*0.5 ≈ 0.052 + case_04_no_improvement: (0.0, Some(10), 5, 1.0), // No improvement = no bonus + case_05_no_median: (0.1, None, 5, 1.0), // No median = baseline + case_06_clamped_fast: (0.1, Some(10), 1, 1.2), // Very fast but clamped to PERF_TOLERANCE (0.2) + case_07_clamped_slow: (0.1, Some(10), 100, 0.8), // Very slow but clamped to -PERF_TOLERANCE (-0.2) } -fn can_estimate_reward_multiplier_impl(approx_median: Option, duration: usize, expected: Float) { - let heuristic_ctx = create_default_heuristic_context(); +fn can_estimate_reward_multiplier_impl( + improvement_ratio: Float, + approx_median: Option, + duration: usize, + expected: Float, +) { + // Create a mock context with the specified improvement ratio + let mut heuristic_ctx = create_default_heuristic_context(); + + // Simulate improvement ratio by triggering generations + if improvement_ratio > 0.0 { + let num_improvements = (improvement_ratio * 1000.0) as usize; + + // Add improving solutions + for i in 0..num_improvements { + let solution = VectorSolution::new(vec![], -(i as Float), vec![]); + heuristic_ctx.on_generation(vec![solution], 0.0, Timer::start()); + } + + // Add non-improving solutions + for _ in num_improvements..1000 { + let solution = VectorSolution::new(vec![], 100.0, vec![]); + heuristic_ctx.on_generation(vec![solution], 0.0, Timer::start()); + } + } + let solution = VectorSolution::new(vec![], 0., vec![]); let search_ctx = SearchContext { heuristic_ctx: &heuristic_ctx, @@ -80,7 +107,7 @@ fn can_estimate_reward_multiplier_impl(approx_median: Option, duration: u let result = estimate_reward_perf_multiplier(&search_ctx, duration); - assert_eq!(result, expected); + assert!((result - expected).abs() < 0.001, "Expected {expected}, got {result}"); } #[test] From 0de8b355d525fbdff45812d478968169a2470bf4 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Sun, 11 Jan 2026 21:47:20 +0100 Subject: [PATCH 17/30] Update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5b330fad..7020d457f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,8 @@ are already published. So, I stick to it for now. * improved remedian algorithm * separate calculations for distance/duration from cost minimization * change GSOM distance function -* improved SISR implementation +* improve SISR implementation +* improve dynamic selective heuristic ### Added From ba8e3bae59166b6c505ee5e67588ee4ab0f6476a Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Sun, 11 Jan 2026 21:54:05 +0100 Subject: [PATCH 18/30] Fix clippy warnings --- rosomaxa/src/algorithms/rl/slot_machine.rs | 2 +- rosomaxa/src/hyper/dynamic_selective.rs | 12 +----------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/rosomaxa/src/algorithms/rl/slot_machine.rs b/rosomaxa/src/algorithms/rl/slot_machine.rs index cb314b0fe..32f20aa96 100644 --- a/rosomaxa/src/algorithms/rl/slot_machine.rs +++ b/rosomaxa/src/algorithms/rl/slot_machine.rs @@ -62,7 +62,7 @@ where let beta = 1.0; // Prior mean is clamped to a reasonable Z-score range (-0 to 10) - let mu = prior_mean.max(0.01).min(10.0); + let mu = prior_mean.clamp(0.01, 10.0); // Variance expectation v = Beta / (Alpha - 1) = 1.0. // This implies we are "uncertain" by about +/- 1.0 standard deviation unit, diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs index 314c0427c..f3a593f06 100644 --- a/rosomaxa/src/hyper/dynamic_selective.rs +++ b/rosomaxa/src/hyper/dynamic_selective.rs @@ -233,21 +233,11 @@ where let median = context.approx_median.unwrap_or(duration).max(1) as Float; let ratio = duration as Float / median; - // Logic: - // Ratio 0.2 (Fast) -> -0.01 * 0.2 = -0.002 (Too small, clamp to MIN) -> -0.01 - // Ratio 1.0 (Avg) -> -0.01 * 1.0 = -0.01 - // Ratio 5.0 (Slow) -> -0.01 * 5.0 = -0.05 - // Ratio 10.0 (Slower) -> -0.01 * 10.0 = -0.1 (At MAX boundary) - // Ratio 20.0 (Very Slow) -> -0.01 * 20.0 = -0.2 (Clamp to MAX) -> -0.1 - // Base penalty unit: PENALTY_MIN (-0.01) per "Median Unit of Time". let raw_penalty = SearchRewards::PENALTY_MIN * ratio; // Clamp to the range [PENALTY_MAX, PENALTY_MIN] = [-0.1, -0.01]. - // Note: min/max semantics with negative numbers: - // - max(PENALTY_MAX) ensures we don't go below -0.1 (more negative). - // - min(PENALTY_MIN) ensures we don't go above -0.01 (less negative). - raw_penalty.max(SearchRewards::PENALTY_MAX).min(SearchRewards::PENALTY_MIN) + raw_penalty.clamp(SearchRewards::PENALTY_MAX, SearchRewards::PENALTY_MIN) } }; From a8328610a5724e2c13a92d5487557898bbb1efed Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Mon, 12 Jan 2026 22:59:15 +0100 Subject: [PATCH 19/30] Improve UI for heuristic playground --- experiments/heuristic-research/www/index.html | 163 ++++--- experiments/heuristic-research/www/index.js | 297 ++++++++---- experiments/heuristic-research/www/style.css | 432 +++++++++++++----- 3 files changed, 644 insertions(+), 248 deletions(-) diff --git a/experiments/heuristic-research/www/index.html b/experiments/heuristic-research/www/index.html index a37846797..d7ac391c9 100644 --- a/experiments/heuristic-research/www/index.html +++ b/experiments/heuristic-research/www/index.html @@ -10,106 +10,149 @@
-

Heuristic Research Playground

+

🔬 Heuristic Research Playground

- -
-
- - -
- -
+ +
+
- - + +
-
-
-
- - +
+
+
+ + +
- -
- + +
+ + +
+
+
+ +
+
+ +
+
+
+
+
+
+ + +
+
+
+
+ +
+
+
- -
-
- - + +
+
+
+ + +
-
-
- -
-
- +
+ +
-
Loading WebAssembly...
- +
+ + +
-
-
-
- + +
+
+
+ + +
+
+ + +
+
+ +
Loading WebAssembly...
+
- + - - - + + +
- +
- +
- +
- +
- +
- +
diff --git a/experiments/heuristic-research/www/index.js b/experiments/heuristic-research/www/index.js index 6c34caa6b..a1dcab44f 100644 --- a/experiments/heuristic-research/www/index.js +++ b/experiments/heuristic-research/www/index.js @@ -7,27 +7,33 @@ const bestCanvas = document.getElementById("bestCanvas"); const durationCanvas = document.getElementById("durationCanvas"); const fitnessCanvas = document.getElementById("fitnessCanvas"); -const coordLabel = document.getElementById("coordLabel"); +const benchmarkType = document.getElementById("benchmarkType"); +const functionControls = document.getElementById("functionControls"); +const vrpControls = document.getElementById("vrpControls"); const fileSelector = document.getElementById("fileSelector"); const plotPopulation = document.getElementById("plotPopulation"); const plotFunction = document.getElementById("plotFunction"); const vrpFormat = document.getElementById("vrpFormat"); const pitch = document.getElementById("pitch"); const yaw = document.getElementById("yaw"); +const pitchValue = document.getElementById("pitchValue"); +const yawValue = document.getElementById("yawValue"); const status = document.getElementById("status"); const run = document.getElementById("run"); const generations = document.getElementById("generations"); +const generationControl = document.getElementById("generationControl"); +const currentGen = document.getElementById("currentGen"); +const maxGen = document.getElementById("maxGen"); +const maxGenerations = document.getElementById("maxGenerations"); +const autoInitPoint = document.getElementById("autoInitPoint"); +const manualPointControls = document.getElementById("manualPointControls"); +const initX = document.getElementById("initX"); +const initZ = document.getElementById("initZ"); /** Main entry point */ export function main() { setupListeners(); - setupCanvas(solutionCanvas, 800); - setupCanvas(searchCanvas, 800); - setupCanvas(overallCanvas, 800); - setupCanvas(bestCanvas, 800); - setupCanvas(durationCanvas, 800); - setupCanvas(fitnessCanvas, 800); - + resizeAllCanvases(); updateDynamicPlots(); updateStaticPlots(); } @@ -43,27 +49,36 @@ export function setup(WasmChart, run_function_experiment, run_vrp_experiment, lo /** Add event listeners. */ function setupListeners() { - status.innerText = "WebAssembly loaded!"; + status.innerText = "✓ WebAssembly loaded!"; + status.classList.add('success'); + + benchmarkType.addEventListener("change", switchBenchmarkType); fileSelector.addEventListener("change", openFile); plotFunction.addEventListener("change", changePlot); plotPopulation.addEventListener("change", changePlot); + autoInitPoint.addEventListener("change", toggleInitPointMode); yaw.addEventListener("change", updatePlots); pitch.addEventListener("change", updatePlots); generations.addEventListener("change", updatePlots); - yaw.addEventListener("input", updatePlots); - pitch.addEventListener("input", updatePlots); - generations.addEventListener("input", updatePlots); - - run.addEventListener("click", runExperiment) - window.addEventListener("resize", setupCanvas); + yaw.addEventListener("input", (e) => { + updateSliderValue(e.target, yawValue); + updatePlots(); + }); + pitch.addEventListener("input", (e) => { + updateSliderValue(e.target, pitchValue); + updatePlots(); + }); + generations.addEventListener("input", (e) => { + currentGen.innerText = e.target.value; + updatePlots(); + }); - // setup vertical tabs buttons - ['function', 'vrp'].forEach(function(type) { - document.getElementById(type + 'TabButton').addEventListener("click", function(evt) { - openTab(evt, 'controlTab', type + 'Tab', '-vert'); - }); + run.addEventListener("click", runExperiment); + window.addEventListener("resize", () => { + resizeAllCanvases(); + updatePlots(); }); // setup horizontal tab buttons @@ -74,7 +89,6 @@ function setupListeners() { }); // open default tabs - document.getElementById("functionTabButton").click(); document.getElementById("solutionTabButton").click(); // allow to control generation range using left-right arrows @@ -82,34 +96,129 @@ function setupListeners() { switch (event.key) { case "ArrowLeft": generations.value = Math.max(parseInt(generations.value) - 1, parseInt(generations.min)); + currentGen.innerText = generations.value; updatePlots(); break; case "ArrowRight": generations.value = Math.min(parseInt(generations.value) + 1, parseInt(generations.max)); + currentGen.innerText = generations.value; updatePlots(); break; } }); + + // Initialize slider values + updateSliderValue(pitch, pitchValue); + updateSliderValue(yaw, yawValue); + + // Set initial ranges for function + updateInitPointRanges(); } /** Setup canvas to properly handle high DPI and redraw current plot. */ -function setupCanvas(canvas, size) { - if (canvas.style === undefined) { +function setupCanvas(canvas) { + if (!canvas || !canvas.style) { return; } - const aspectRatio = canvas.width / canvas.height; - //const size = canvas.parentNode.offsetWidth * 1.2; - canvas.style.width = size + "px"; - canvas.style.height = size / aspectRatio + "px"; - canvas.width = size; - canvas.height = size / aspectRatio; + const container = canvas.parentNode; + const containerWidth = container.offsetWidth - 20; // subtract padding + const originalAspectRatio = canvas.width / canvas.height; + + // Set display size (CSS pixels) + const displayWidth = Math.min(containerWidth, 950); + const displayHeight = displayWidth / originalAspectRatio; + + canvas.style.width = displayWidth + "px"; + canvas.style.height = displayHeight + "px"; +} + +/** Resize all canvases */ +function resizeAllCanvases() { + [solutionCanvas, searchCanvas, overallCanvas, bestCanvas, durationCanvas, fitnessCanvas].forEach(canvas => { + setupCanvas(canvas); + }); +} + +/** Update slider display value */ +function updateSliderValue(slider, display) { + const value = (Number(slider.value) / 100.0).toFixed(2); + display.innerText = value; +} + +/** Switch between Function and VRP benchmark types */ +function switchBenchmarkType() { + const type = benchmarkType.value; + if (type === 'function') { + functionControls.classList.remove('hide'); + vrpControls.classList.add('hide'); + } else { + functionControls.classList.add('hide'); + vrpControls.classList.remove('hide'); + + // Show message if no file loaded + if (!Chart.data) { + status.innerText = 'Please load a VRP problem file'; + status.classList.remove('success', 'loading'); + } + } + changePlot(); +} + +/** Toggle between automatic and manual initial point selection */ +function toggleInitPointMode() { + if (autoInitPoint.checked) { + manualPointControls.classList.add('hide'); + } else { + manualPointControls.classList.remove('hide'); + } +} + +/** Update initial point input ranges based on selected function */ +function updateInitPointRanges() { + const functionName = plotFunction.value; + let min, max; + + switch(functionName) { + case 'rosenbrock': + min = -2.0; max = 2.0; + break; + case 'rastrigin': + min = -5.12; max = 5.12; + break; + case 'himmelblau': + min = -5.0; max = 5.0; + break; + case 'ackley': + min = -5.0; max = 5.0; + break; + case 'matyas': + min = -10.0; max = 10.0; + break; + default: + min = -5.0; max = 5.0; + } + + initX.min = min; + initX.max = max; + initZ.min = min; + initZ.max = max; + + // Reset to center if out of range + if (parseFloat(initX.value) < min || parseFloat(initX.value) > max) { + initX.value = 0; + } + if (parseFloat(initZ.value) < min || parseFloat(initZ.value) > max) { + initZ.value = 0; + } } /** Changes plot **/ function changePlot() { Chart.clear(); - generations.classList.add("hide"); + generationControl.classList.add("hide"); + currentGen.innerText = "0"; + updateInitPointRanges(); updatePlots(); } @@ -123,6 +232,11 @@ function openFile(event) { Chart.data = content; run.classList.remove("hide"); + + // Update status to show file loaded + status.innerText = `✓ File loaded: ${input.files[0].name}`; + status.classList.add('success'); + status.classList.remove('loading'); }; reader.readAsText(input.files[0]); } @@ -139,10 +253,8 @@ function updateDynamicPlots(run) { let population_type = plotPopulation.selectedOptions[0].value; let heuristic_kind = "best"; - coordLabel.innerText = `Rotation: pitch=${pitch_value}, yaw=${yaw_value}` - - // TODO configure parameters from outside - let max_gen = 2000 + // Get max generations from user input + let max_gen = parseInt(maxGenerations.value); const start = performance.now(); switch (getExperimentType()) { @@ -172,29 +284,36 @@ function updateDynamicPlots(run) { if (run) { let function_name = plotFunction.selectedOptions[0].value; var x = 0.0, z = 0.0; - switch(function_name) { - case 'rosenbrock': - x = getRandomInRange(-2.0, 2.0) - z = getRandomInRange(-2.0, 2.0) - break; - case 'rastrigin': - x = getRandomInRange(-5.12, 5.12) - z = getRandomInRange(-5.12, 5.12) - break; - case 'himmelblau': - x = getRandomInRange(-5.0, 5.0) - z = getRandomInRange(-5.0, 5.0) - break; - case 'ackley': - x = getRandomInRange(-5.0, 5.0) - z = getRandomInRange(-5.0, 5.0) - break; - case 'matyas': - x = getRandomInRange(-10.0, 10.0) - z = getRandomInRange(-10.0, 10.0) - break; - default: - break; + + // Use manual point if checkbox is unchecked, otherwise random + if (!autoInitPoint.checked) { + x = parseFloat(initX.value); + z = parseFloat(initZ.value); + } else { + switch(function_name) { + case 'rosenbrock': + x = getRandomInRange(-2.0, 2.0) + z = getRandomInRange(-2.0, 2.0) + break; + case 'rastrigin': + x = getRandomInRange(-5.12, 5.12) + z = getRandomInRange(-5.12, 5.12) + break; + case 'himmelblau': + x = getRandomInRange(-5.0, 5.0) + z = getRandomInRange(-5.0, 5.0) + break; + case 'ackley': + x = getRandomInRange(-5.0, 5.0) + z = getRandomInRange(-5.0, 5.0) + break; + case 'matyas': + x = getRandomInRange(-10.0, 10.0) + z = getRandomInRange(-10.0, 10.0) + break; + default: + break; + } } console.log(`init point is: (${x}, ${z})`) @@ -213,24 +332,35 @@ function updateDynamicPlots(run) { } } - Chart.vrp(solutionCanvas, generation_value, pitch_value, yaw_value); + // Only render if data has been loaded + if (Chart.data) { + Chart.vrp(solutionCanvas, generation_value, pitch_value, yaw_value); + } break; } } - Chart.search_iteration(searchCanvas, generation_value, heuristic_kind); - Chart.search_best_statistics(bestCanvas, generation_value, heuristic_kind); - Chart.search_duration_statistics(durationCanvas, generation_value, heuristic_kind); - Chart.search_overall_statistics(overallCanvas, generation_value, heuristic_kind); + // Only render statistics if there's data to display + if (Chart.data || getExperimentType() === 'function') { + Chart.search_iteration(searchCanvas, generation_value, heuristic_kind); + Chart.search_best_statistics(bestCanvas, generation_value, heuristic_kind); + Chart.search_duration_statistics(durationCanvas, generation_value, heuristic_kind); + Chart.search_overall_statistics(overallCanvas, generation_value, heuristic_kind); + } const end = performance.now(); if (run) { generations.max = max_gen; - generations.classList.remove("hide"); + maxGen.innerText = max_gen; + currentGen.innerText = "0"; + generations.value = "0"; + generationControl.classList.remove("hide"); } - status.innerText = `Generation: ${generation_value} rendered in ${Math.ceil(end - start)}ms`; + status.innerText = `Generation: ${generation_value} | Rendered in ${Math.ceil(end - start)}ms`; + status.classList.remove('loading'); + status.classList.add('success'); } function updateStaticPlots() { @@ -239,15 +369,29 @@ function updateStaticPlots() { Chart.fitness_func(fitnessCanvas); break; case 'vrp': - Chart.fitness_vrp(fitnessCanvas) + // Only render if data has been loaded + if (Chart.data) { + Chart.fitness_vrp(fitnessCanvas); + } break; } } /** Runs experiment. */ function runExperiment() { - updateDynamicPlots(true); - updateStaticPlots(true); + run.disabled = true; + run.innerHTML = '⏳ Running...'; + status.innerText = 'Running experiment...'; + status.classList.add('loading'); + status.classList.remove('success'); + + // Use setTimeout to allow UI to update + setTimeout(() => { + updateDynamicPlots(true); + updateStaticPlots(true); + run.disabled = false; + run.innerHTML = '▶ Run Experiment'; + }, 50); } function updatePlots() { @@ -256,24 +400,7 @@ function updatePlots() { } function getExperimentType() { - const buttons = document.querySelectorAll('.tablinks-vert'); - for (const button of buttons) { - if (button.classList.contains('active')) { - switch (button.textContent) { - case 'Function Bench': - return 'function' - case 'VRP Bench': - return 'vrp' - default: - console.error("unknown experiment type: '" + button.textContent + "'"); - return 'function' - } - } - } - - console.error("no active tab detected"); - - return 'function'; + return benchmarkType.value; } function openTab(evt, containerId, tabId, suffix) { diff --git a/experiments/heuristic-research/www/style.css b/experiments/heuristic-research/www/style.css index b22d5cf22..f3e4fa2e7 100644 --- a/experiments/heuristic-research/www/style.css +++ b/experiments/heuristic-research/www/style.css @@ -1,20 +1,44 @@ +/* CSS Variables for easy theming */ +:root { + --primary-color: #0366d6; + --primary-hover: #0256c5; + --secondary-color: #586069; + --background: #ffffff; + --surface: #f6f8fa; + --border: #e1e4e8; + --text-primary: #24292e; + --text-secondary: #586069; + --success: #28a745; + --warning: #ffa500; + --shadow: rgba(0, 0, 0, 0.1); + --transition: all 0.3s ease; +} + html, body, main { width: 100%; margin: 0; padding: 0; } +* { + box-sizing: border-box; +} + body { margin: auto; - max-width: 840px; + max-width: 1000px; display: flex; flex-direction: column; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif; + color: var(--text-primary); + background-color: var(--background); + line-height: 1.6; + padding: 10px; } -@media (max-width: 800px) { +@media (max-width: 768px) { body { - padding: 10px; - box-sizing: border-box; + padding: 5px; } } @@ -22,187 +46,389 @@ main { display: flex; flex-direction: column; align-items: center; + padding: 0; +} + +h1 { + font-size: 1.5rem; + font-weight: 600; + margin: 10px 0 15px 0; + color: var(--text-primary); + text-align: center; } footer { - margin-top: 2em; + margin-top: 1.5em; + padding: 1em 0; font-size: 12px; text-align: center; + border-top: 1px solid var(--border); + color: var(--text-secondary); +} + +footer a { + color: var(--primary-color); + text-decoration: none; + margin: 0 8px; + transition: var(--transition); +} + +footer a:hover { + text-decoration: underline; } label, #coordLabel, #status { - color: grey; - font-size: 10px; - height: 15px + color: var(--text-secondary); + font-size: 11px; + font-weight: 500; + height: auto; + margin-bottom: 4px; + display: block; +} + +/* Control panels */ +.controls-panel { + display: flex; + flex-direction: column; + gap: 8px; +} + +.control-group { + margin-bottom: 0; +} + +.control-group label { + display: flex; + align-items: center; +} + +.control-row { + display: flex; + gap: 10px; +} + +.flex-1 { + flex: 1; +} + +.benchmark-controls { + display: flex; + flex-direction: column; + gap: 8px; +} + +input[type="checkbox"] { + width: 16px; + height: 16px; + cursor: pointer; + accent-color: var(--primary-color); +} + +input[type="number"] { + font-family: inherit; + color: var(--text-primary); +} + +input[type="number"]:focus { + outline: none; + border-color: var(--primary-color) !important; + box-shadow: 0 0 0 3px rgba(3, 102, 214, 0.1); +} + +.action-panel { + justify-content: flex-start; + min-width: 200px; + gap: 10px; +} + +.run-button { + flex-shrink: 0; +} + +.status-box { + flex-shrink: 0; } #run { display: block; - margin-right: auto; - margin-left: auto; + margin: 0 auto; } .hide { - visibility: hidden; - height: 0px; + display: none !important; } .row { display: flex; - width: 840px; + width: 100%; + gap: 10px; + margin-bottom: 10px; +} + +@media (max-width: 768px) { + .row { + flex-direction: column; + gap: 8px; + } } .column { - flex: 50%; - padding: 8px; + flex: 1; + padding: 12px; + background: var(--surface); + border-radius: 6px; + border: 1px solid var(--border); } .block { display: block; width: 100%; border: none; - background-color: #00b7e4; - padding: 14px 28px; - font-size: 16px; + background-color: var(--primary-color); + color: white; + padding: 12px 24px; + font-size: 15px; + font-weight: 600; cursor: pointer; text-align: center; + border-radius: 6px; + transition: var(--transition); + box-shadow: 0 1px 3px var(--shadow); } -/* Change background color of buttons on hover */ .block:hover { - background-color: #0ecfff; + background-color: var(--primary-hover); + box-shadow: 0 2px 6px var(--shadow); + transform: translateY(-1px); +} + +.block:active { + transform: translateY(0); + box-shadow: 0 1px 2px var(--shadow); } -* {box-sizing: border-box} +.block:disabled { + opacity: 0.6; + cursor: not-allowed; + transform: none; +} /* Horizontal Tabs */ .tab { - overflow: hidden; - border: 1px solid #ccc; - background-color: #f1f1f1; + display: flex; + flex-wrap: wrap; + gap: 3px; + background-color: var(--surface); + padding: 6px; + border-radius: 6px 6px 0 0; + border: 1px solid var(--border); + border-bottom: none; } -/* Style the buttons that are used to open the tab content */ .tab button { - background-color: inherit; - float: left; + background-color: transparent; border: none; outline: none; cursor: pointer; - padding: 14px 16px; - transition: 0.3s; + padding: 8px 12px; + font-size: 12px; + font-weight: 500; + color: var(--text-secondary); + border-radius: 4px; + transition: var(--transition); + white-space: nowrap; } -/* Change background color of buttons on hover */ .tab button:hover { - background-color: #ddd; + background-color: rgba(3, 102, 214, 0.1); + color: var(--primary-color); } -/* Create an active/current tablink class */ .tab button.active { - background-color: #ccc; + background-color: white; + color: var(--primary-color); + box-shadow: 0 1px 3px var(--shadow); } -/* Style the tab content */ .tabcontent { display: none; - padding: 6px 12px; - border: 1px solid #ccc; + padding: 10px; + background: white; + border: 1px solid var(--border); border-top: none; + border-radius: 0 0 6px 6px; +} + +.tabcontent canvas { + display: block; + margin: 0 auto; + max-width: 100%; + width: 100%; + height: auto; + border-radius: 4px; } -/* Vertical Tab */ +/* Select */ +.select-wrap { + border: 1px solid var(--border); + border-radius: 6px; + padding: 6px 10px; + width: 100%; + background-color: white; + position: relative; + margin-bottom: 10px; + transition: var(--transition); +} -.tab-vert { - float: left; - border: 1px solid #ccc; - background-color: #f1f1f1; - width: 30%; - height: 200px; +.select-wrap:focus-within { + border-color: var(--primary-color); + box-shadow: 0 0 0 3px rgba(3, 102, 214, 0.1); } -/* Style the buttons that are used to open the tab content */ -.tab-vert button { - display: block; - background-color: inherit; - color: black; - padding: 22px 16px; +.select-wrap label { + font-size: 9px; + text-transform: uppercase; + color: var(--text-secondary); + padding: 0; + position: absolute; + top: 6px; + left: 10px; + font-weight: 600; + letter-spacing: 0.5px; +} + +select { + background-color: transparent; + border: 0; + height: 40px; + font-size: 14px; + color: var(--text-primary); width: 100%; - border: none; + padding-top: 14px; + cursor: pointer; outline: none; - text-align: left; +} + +select option { + padding: 10px; +} + +/* File uploader */ +.input-container { + border: 1px solid var(--border); + border-radius: 6px; + overflow: hidden; + background: white; +} + +input[type=file] { + width: 100%; + font-size: 12px; + color: var(--text-primary); +} + +input[type=file]::file-selector-button { + background-color: var(--primary-color); + color: white; + border: 0; + border-right: 1px solid var(--border); + padding: 8px 12px; + margin-right: 10px; + font-weight: 600; + font-size: 12px; cursor: pointer; - transition: 0.3s; + transition: var(--transition); } -/* Change background color of buttons on hover */ -.tab-vert button:hover { - background-color: #ddd; +input[type=file]::file-selector-button:hover { + background-color: var(--primary-hover); } -/* Create an active/current "tab button" class */ -.tab-vert button.active { - background-color: #ccc; +/* Range inputs (sliders) */ +input[type=range] { + width: 100%; + height: 4px; + border-radius: 2px; + background: var(--border); + outline: none; + margin: 4px 0 0 0; + -webkit-appearance: none; } -/* Style the tab content */ -.tabcontent-vert { - padding: 8px; - float: left; - border: 1px solid #ccc; - width: 70%; - border-left: none; - height: 200px; +input[type=range]::-webkit-slider-thumb { + -webkit-appearance: none; + appearance: none; + width: 14px; + height: 14px; + border-radius: 50%; + background: var(--primary-color); + cursor: pointer; + transition: var(--transition); + box-shadow: 0 1px 3px var(--shadow); } -#commonControls { - padding: 16px; +input[type=range]::-webkit-slider-thumb:hover { + background: var(--primary-hover); + transform: scale(1.1); } -/* Select */ -.select-wrap { - border: 1px solid #777; - border-radius: 4px; - padding: 0 5px; - width: 140px; - background-color:#fff; - position:relative; +input[type=range]::-moz-range-thumb { + width: 14px; + height: 14px; + border-radius: 50%; + background: var(--primary-color); + cursor: pointer; + border: none; + transition: var(--transition); + box-shadow: 0 1px 3px var(--shadow); } -.select-wrap label{ - font-size:8px; - text-transform: uppercase; - color: #777; - padding: 0 8px; - position: absolute; - top:6px; + +input[type=range]::-moz-range-thumb:hover { + background: var(--primary-hover); + transform: scale(1.1); } -select{ - background-color: #fff; - border:0px; - height:50px; - font-size: 16px; +/* Loading spinner */ +.spinner { + display: inline-block; + width: 16px; + height: 16px; + border: 2px solid rgba(255, 255, 255, 0.3); + border-radius: 50%; + border-top-color: white; + animation: spin 0.8s linear infinite; + margin-left: 8px; + vertical-align: middle; } -/* file uploader */ +@keyframes spin { + to { transform: rotate(360deg); } +} -.input-container { - border: 1px solid #e5e5e5; +/* Status indicator */ +#status { + padding: 8px 12px; + background: var(--surface); + border-radius: 4px; + text-align: center; + margin: 0; + font-size: 11px; + border: 1px solid var(--border); } -input[type=file]::file-selector-button { - background-color: #00b7e4; - color: #000; - border: 0px; - border-right: 1px solid #e5e5e5; - padding: 10px 15px; - margin-right: 20px; - transition: .5s; +#status.loading { + color: var(--warning); + border-color: var(--warning); } -input[type=file]::file-selector-button:hover { - background-color: #0ecfff; - border: 0px; - border-right: 1px solid #e5e5e5; +#status.success { + color: var(--success); + border-color: var(--success); +} + +/* Canvas container */ +#canvasTab { + width: 100%; + margin-top: 10px; } \ No newline at end of file From 07bad8a22b7a41abbf1ee73f41cec199e7f07b7f Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Tue, 13 Jan 2026 13:22:30 +0100 Subject: [PATCH 20/30] Support loading of raw experimental output for dynamic heuristic --- experiments/heuristic-research/src/lib.rs | 1 + .../heuristic-research/src/plots/mod.rs | 26 +++++- .../heuristic-research/src/solver/proxies.rs | 25 ++++++ .../heuristic-research/src/solver/state.rs | 51 ++++++------ .../src/solver/vrp/population.rs | 6 +- experiments/heuristic-research/www/index.js | 9 +++ rosomaxa/src/hyper/dynamic_selective.rs | 79 ++++++++++++++++--- 7 files changed, 155 insertions(+), 42 deletions(-) diff --git a/experiments/heuristic-research/src/lib.rs b/experiments/heuristic-research/src/lib.rs index f92a15ace..8e305e557 100644 --- a/experiments/heuristic-research/src/lib.rs +++ b/experiments/heuristic-research/src/lib.rs @@ -126,6 +126,7 @@ pub fn run_vrp_experiment(format_type: &str, problem: &str, population_type: &st /// Loads experiment data from json serialized representation. #[wasm_bindgen] pub fn load_state(data: &str) -> usize { + set_panic_hook_once(); match ExperimentData::try_from(data) { Ok(data) => *EXPERIMENT_DATA.lock().unwrap() = data, Err(err) => web_sys::console::log_1(&err.into()), diff --git a/experiments/heuristic-research/src/plots/mod.rs b/experiments/heuristic-research/src/plots/mod.rs index 7d82726d9..33c35223b 100644 --- a/experiments/heuristic-research/src/plots/mod.rs +++ b/experiments/heuristic-research/src/plots/mod.rs @@ -247,7 +247,9 @@ fn get_search_config(generation: usize, kind: &str) -> SearchDrawConfig { let names_rev = data.heuristic_state.names.iter().map(|(k, v)| (*v, k)).collect::>(); let _states_rev = data.heuristic_state.states.iter().map(|(k, v)| (*v, k)).collect::>(); - data.heuristic_state.search_states.get(&generation).map(|states| { + // Find nearest available generation if exact match doesn't exist + let nearest_gen = find_nearest_generation(&data.heuristic_state.search_states, generation); + data.heuristic_state.search_states.get(&nearest_gen).map(|states| { let estimations = states .iter() // NOTE: just show all transitions @@ -264,7 +266,7 @@ fn get_search_config(generation: usize, kind: &str) -> SearchDrawConfig { get_search_statistics( &data.heuristic_state.search_states, &names_rev, - generation, + nearest_gen, |SearchResult(_, _, (_, to_state_idx), _)| to_state_idx == best_state_idx, |acc, SearchResult(name_idx, ..)| { acc[*name_idx] += 1; @@ -276,7 +278,7 @@ fn get_search_config(generation: usize, kind: &str) -> SearchDrawConfig { let overall = get_search_statistics( &data.heuristic_state.search_states, &names_rev, - generation, + nearest_gen, |_| true, |acc, SearchResult(name_idx, ..)| { acc[*name_idx] += 1; @@ -287,7 +289,7 @@ fn get_search_config(generation: usize, kind: &str) -> SearchDrawConfig { let durations = get_search_statistics( &data.heuristic_state.search_states, &names_rev, - generation, + nearest_gen, |_| true, |acc: &mut Vec<(usize, usize)>, SearchResult(name_idx, _, _, duration)| { let (total, count) = (acc[*name_idx].0, acc[*name_idx].1); @@ -305,6 +307,22 @@ fn get_search_config(generation: usize, kind: &str) -> SearchDrawConfig { .unwrap_or_default() } +/// Finds the nearest available generation that is <= the requested generation. +/// Returns the requested generation if it exists, otherwise the closest smaller one. +fn find_nearest_generation(generations: &HashMap>, requested: usize) -> usize { + if generations.contains_key(&requested) { + return requested; + } + + // Find the largest generation that is smaller than requested + generations + .keys() + .filter(|&&g| g <= requested) + .copied() + .max() + .unwrap_or(requested) +} + fn get_search_statistics( search_states: &HashMap>, names_rev: &HashMap, diff --git a/experiments/heuristic-research/src/solver/proxies.rs b/experiments/heuristic-research/src/solver/proxies.rs index fd705b06f..197c2d3fd 100644 --- a/experiments/heuristic-research/src/solver/proxies.rs +++ b/experiments/heuristic-research/src/solver/proxies.rs @@ -41,6 +41,31 @@ impl<'a> TryFrom<&'a str> for ExperimentData { type Error = String; fn try_from(value: &'a str) -> Result { + // Check if this is telemetry CSV format (contains "TELEMETRY" somewhere in the content) + // Extract telemetry section if present, otherwise try JSON + if let Some(telemetry_start) = value.find("TELEMETRY") { + // Extract everything from TELEMETRY onwards + let telemetry_content = &value[telemetry_start..]; + + // Parse telemetry CSV using existing parser + let heuristic_state = HyperHeuristicState::try_parse_all(telemetry_content) + .ok_or_else(|| "Failed to parse telemetry data".to_string())?; + + // Find max generation from telemetry data + let max_generation = heuristic_state.search_states.keys() + .chain(heuristic_state.heuristic_states.keys()) + .copied() + .max() + .unwrap_or(0); + + let mut experiment_data = ExperimentData::default(); + experiment_data.heuristic_state = heuristic_state; + experiment_data.generation = max_generation; + + return Ok(experiment_data); + } + + // Try parsing as JSON serde_json::from_str(value).map_err(|err| format!("cannot deserialize experiment data: {err}")) } } diff --git a/experiments/heuristic-research/src/solver/state.rs b/experiments/heuristic-research/src/solver/state.rs index fdb68f9cc..f2eff096d 100644 --- a/experiments/heuristic-research/src/solver/state.rs +++ b/experiments/heuristic-research/src/solver/state.rs @@ -172,31 +172,32 @@ impl HyperHeuristicState { .values_mut() .for_each(|states| states.sort_by(|SearchResult(a, ..), SearchResult(b, ..)| a.cmp(b))); - let mut heuristic_states = data.lines().skip_while(|line| *line != "heuristic:").skip(2).fold( - HashMap::<_, Vec<_>>::new(), - |mut data, line| { - let fields: Vec = line.split(',').map(|s| s.to_string()).collect(); - - let generation: usize = fields[0].parse().unwrap(); - let state = fields[1].clone(); - let name = fields[2].clone(); - let alpha = fields[3].parse().unwrap(); - let beta = fields[4].parse().unwrap(); - let mu = fields[5].parse().unwrap(); - let v = fields[6].parse().unwrap(); - let n = fields[7].parse().unwrap(); - - insert_to_map(&mut states, state.clone()); - insert_to_map(&mut names, name.clone()); - - let state = states.get(&state).copied().unwrap(); - let name = names.get(&name).copied().unwrap(); - - data.entry(generation).or_default().push(HeuristicResult(state, name, alpha, beta, mu, v, n)); - - data - }, - ); + let mut heuristic_states = + data.lines().skip_while(|line| *line != "heuristic:").skip(2).take_while(|line| !line.is_empty()).fold( + HashMap::<_, Vec<_>>::new(), + |mut data, line| { + let fields: Vec = line.split(',').map(|s| s.to_string()).collect(); + + let generation: usize = fields[0].parse().unwrap(); + let state = fields[1].clone(); + let name = fields[2].clone(); + let alpha = fields[3].parse().unwrap(); + let beta = fields[4].parse().unwrap(); + let mu = fields[5].parse().unwrap(); + let v = fields[6].parse().unwrap(); + let n = fields[7].parse().unwrap(); + + insert_to_map(&mut states, state.clone()); + insert_to_map(&mut names, name.clone()); + + let state = states.get(&state).copied().unwrap(); + let name = names.get(&name).copied().unwrap(); + + data.entry(generation).or_default().push(HeuristicResult(state, name, alpha, beta, mu, v, n)); + + data + }, + ); heuristic_states .values_mut() .for_each(|states| states.sort_by(|HeuristicResult(_, a, ..), HeuristicResult(_, b, ..)| a.cmp(b))); diff --git a/experiments/heuristic-research/src/solver/vrp/population.rs b/experiments/heuristic-research/src/solver/vrp/population.rs index c14f5c663..ee485d4a8 100644 --- a/experiments/heuristic-research/src/solver/vrp/population.rs +++ b/experiments/heuristic-research/src/solver/vrp/population.rs @@ -10,7 +10,7 @@ pub fn get_population_fitness_fn(generation: usize) -> FitnessFn { .lock() .ok() .and_then(|data| data.on_generation.get(&generation).map(|(footprint, _)| footprint.clone())) - .map(|footprint| { + .map(|footprint| -> FitnessFn { Arc::new(move |input: &[Float]| { if let &[from, to] = input { footprint.get(from as usize, to as usize) as Float @@ -19,7 +19,7 @@ pub fn get_population_fitness_fn(generation: usize) -> FitnessFn { } }) }) - .expect("cannot get data from EXPERIMENT_DATA") + .unwrap_or_else(|| Arc::new(|_: &[Float]| 0.0) as FitnessFn) } /// Returns a description of population state. @@ -32,5 +32,5 @@ pub fn get_population_desc(generation: usize) -> String { format!("total [{}], known edges [{}]", individuals.len(), footprint.desc()) }) }) - .expect("cannot get data from EXPERIMENT_DATA") + .unwrap_or_else(|| String::from("no data")) } diff --git a/experiments/heuristic-research/www/index.js b/experiments/heuristic-research/www/index.js index a1dcab44f..be706b17b 100644 --- a/experiments/heuristic-research/www/index.js +++ b/experiments/heuristic-research/www/index.js @@ -325,6 +325,15 @@ function updateDynamicPlots(run) { case 'vrp': { if (run) { let format_type = vrpFormat.selectedOptions[0].value; + + // Check if data has been loaded + if (!Chart.data) { + status.innerText = '⚠ Please load a VRP file first'; + status.classList.remove('success'); + status.classList.add('loading'); + return; + } + if (format_type === "state") { max_gen = Chart.load_state(Chart.data); } else { diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs index f3a593f06..9b1bfadf6 100644 --- a/rosomaxa/src/hyper/dynamic_selective.rs +++ b/rosomaxa/src/hyper/dynamic_selective.rs @@ -642,23 +642,82 @@ where return Ok(()); } + // To avoid WASM memory issues, downsample telemetry while preserving analysis capability + // Strategy: keep early samples, sample periodically, and keep recent samples + const MAX_SAMPLES: usize = 5000; + const EARLY_SAMPLES: usize = 500; + const RECENT_SAMPLES: usize = 500; + f.write_fmt(format_args!("TELEMETRY\n"))?; f.write_fmt(format_args!("search:\n"))?; f.write_fmt(format_args!("name,generation,reward,from,to,duration\n"))?; - for (generation, sample) in self.agent.tracker.search_telemetry.iter() { - f.write_fmt(format_args!( - "{},{},{},{},{},{}\n", - sample.name, generation, sample.reward, sample.transition.0, sample.transition.1, sample.duration - ))?; + + let search_total = self.agent.tracker.search_telemetry.len(); + if search_total <= MAX_SAMPLES { + // Small enough, output all + for (generation, sample) in self.agent.tracker.search_telemetry.iter() { + f.write_fmt(format_args!( + "{},{},{},{},{},{}\n", + sample.name, generation, sample.reward, sample.transition.0, sample.transition.1, sample.duration + ))?; + } + } else { + // Downsample: early + periodic middle + recent + let middle_samples = MAX_SAMPLES - EARLY_SAMPLES - RECENT_SAMPLES; + let middle_start = EARLY_SAMPLES; + let middle_end = search_total - RECENT_SAMPLES; + let step = (middle_end - middle_start) / middle_samples; + + for (i, (generation, sample)) in self.agent.tracker.search_telemetry.iter().enumerate() { + let include = i < EARLY_SAMPLES + || i >= search_total - RECENT_SAMPLES + || (i >= middle_start && i < middle_end && (i - middle_start) % step == 0); + + if include { + f.write_fmt(format_args!( + "{},{},{},{},{},{}\n", + sample.name, + generation, + sample.reward, + sample.transition.0, + sample.transition.1, + sample.duration + ))?; + } + } } f.write_fmt(format_args!("heuristic:\n"))?; f.write_fmt(format_args!("generation,state,name,alpha,beta,mu,v,n\n"))?; - for (generation, sample) in self.agent.tracker.heuristic_telemetry.iter() { - f.write_fmt(format_args!( - "{},{},{},{},{},{},{},{}\n", - generation, sample.state, sample.name, sample.alpha, sample.beta, sample.mu, sample.v, sample.n - ))?; + + let heuristic_total = self.agent.tracker.heuristic_telemetry.len(); + if heuristic_total <= MAX_SAMPLES { + // Small enough, output all + for (generation, sample) in self.agent.tracker.heuristic_telemetry.iter() { + f.write_fmt(format_args!( + "{},{},{},{},{},{},{},{}\n", + generation, sample.state, sample.name, sample.alpha, sample.beta, sample.mu, sample.v, sample.n + ))?; + } + } else { + // Downsample: early + periodic middle + recent + let middle_samples = MAX_SAMPLES - EARLY_SAMPLES - RECENT_SAMPLES; + let middle_start = EARLY_SAMPLES; + let middle_end = heuristic_total - RECENT_SAMPLES; + let step = (middle_end - middle_start) / middle_samples; + + for (i, (generation, sample)) in self.agent.tracker.heuristic_telemetry.iter().enumerate() { + let include = i < EARLY_SAMPLES + || i >= heuristic_total - RECENT_SAMPLES + || (i >= middle_start && i < middle_end && (i - middle_start) % step == 0); + + if include { + f.write_fmt(format_args!( + "{},{},{},{},{},{},{},{}\n", + generation, sample.state, sample.name, sample.alpha, sample.beta, sample.mu, sample.v, sample.n + ))?; + } + } } Ok(()) From 725aa17519f94af300dfb408bff2023f88c5a790 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Tue, 13 Jan 2026 20:49:08 +0100 Subject: [PATCH 21/30] Improve a bit code --- experiments/heuristic-research/src/plots/mod.rs | 9 ++------- .../heuristic-research/src/solver/proxies.rs | 14 ++++++++------ vrp-core/src/solver/heuristic.rs | 8 ++++---- vrp-core/src/solver/search/decompose_search.rs | 2 +- 4 files changed, 15 insertions(+), 18 deletions(-) diff --git a/experiments/heuristic-research/src/plots/mod.rs b/experiments/heuristic-research/src/plots/mod.rs index 33c35223b..b24cb5b09 100644 --- a/experiments/heuristic-research/src/plots/mod.rs +++ b/experiments/heuristic-research/src/plots/mod.rs @@ -313,14 +313,9 @@ fn find_nearest_generation(generations: &HashMap>, requ if generations.contains_key(&requested) { return requested; } - + // Find the largest generation that is smaller than requested - generations - .keys() - .filter(|&&g| g <= requested) - .copied() - .max() - .unwrap_or(requested) + generations.keys().filter(|&&g| g <= requested).copied().max().unwrap_or(requested) } fn get_search_statistics( diff --git a/experiments/heuristic-research/src/solver/proxies.rs b/experiments/heuristic-research/src/solver/proxies.rs index 197c2d3fd..abc17a6f6 100644 --- a/experiments/heuristic-research/src/solver/proxies.rs +++ b/experiments/heuristic-research/src/solver/proxies.rs @@ -46,25 +46,27 @@ impl<'a> TryFrom<&'a str> for ExperimentData { if let Some(telemetry_start) = value.find("TELEMETRY") { // Extract everything from TELEMETRY onwards let telemetry_content = &value[telemetry_start..]; - + // Parse telemetry CSV using existing parser let heuristic_state = HyperHeuristicState::try_parse_all(telemetry_content) .ok_or_else(|| "Failed to parse telemetry data".to_string())?; - + // Find max generation from telemetry data - let max_generation = heuristic_state.search_states.keys() + let max_generation = heuristic_state + .search_states + .keys() .chain(heuristic_state.heuristic_states.keys()) .copied() .max() .unwrap_or(0); - + let mut experiment_data = ExperimentData::default(); experiment_data.heuristic_state = heuristic_state; experiment_data.generation = max_generation; - + return Ok(experiment_data); } - + // Try parsing as JSON serde_json::from_str(value).map_err(|err| format!("cannot deserialize experiment data: {err}")) } diff --git a/vrp-core/src/solver/heuristic.rs b/vrp-core/src/solver/heuristic.rs index cc8c544f5..426286b47 100644 --- a/vrp-core/src/solver/heuristic.rs +++ b/vrp-core/src/solver/heuristic.rs @@ -574,22 +574,22 @@ mod dynamic { "local_reschedule_departure".to_string(), 1., ), - (Arc::new(LKHSearch::new(LKHSearchMode::ImprovementOnly)), "lkh_strict".to_string(), 5.), + (Arc::new(LKHSearch::new(LKHSearchMode::ImprovementOnly)), "lkh_strict".to_string(), 3.), ( Arc::new(LocalSearch::new(Arc::new(ExchangeSwapStar::new( environment.random.clone(), SINGLE_HEURISTIC_QUOTA_LIMIT, )))), "local_swap_star".to_string(), - 10., + 5., ), // decompose search methods with different inner heuristic ( create_variable_search_decompose_search(problem.clone(), environment.clone()), "decompose_search_var".to_string(), - 25., + 10., ), - (create_composite_decompose_search(problem, environment), "decompose_search_com".to_string(), 25.), + (create_composite_decompose_search(problem, environment), "decompose_search_com".to_string(), 10.), ] } diff --git a/vrp-core/src/solver/search/decompose_search.rs b/vrp-core/src/solver/search/decompose_search.rs index 6855ac16d..7aa6e68e0 100644 --- a/vrp-core/src/solver/search/decompose_search.rs +++ b/vrp-core/src/solver/search/decompose_search.rs @@ -118,7 +118,7 @@ fn create_multiple_insertion_contexts( let route_groups = group_routes_by_proximity(insertion_ctx); let (min, max) = max_routes_range; - let max = if insertion_ctx.solution.routes.len() < 4 { 2 } else { max }; + let max = if insertion_ctx.solution.routes.len() < max as usize { (max / 2).max(min) } else { max }; // identify route groups and create contexts from them let used_indices = RefCell::new(HashSet::new()); From d9446e8511121596a1a5f73eb65584e279ffad2e Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Tue, 13 Jan 2026 20:55:09 +0100 Subject: [PATCH 22/30] Apply some refactorings and fix clippy warning --- .../heuristic-research/src/solver/proxies.rs | 18 +++++++-------- rosomaxa/src/algorithms/gsom/contraction.rs | 2 +- rosomaxa/src/algorithms/gsom/network.rs | 9 ++++++-- rosomaxa/src/algorithms/gsom/state.rs | 2 +- rosomaxa/src/hyper/dynamic_selective.rs | 4 ++-- rosomaxa/src/population/elitism.rs | 9 +++++++- rosomaxa/src/population/greedy.rs | 9 +++++++- rosomaxa/src/population/mod.rs | 9 ++++++-- rosomaxa/src/population/rosomaxa.rs | 23 ++++++++++++++----- .../unit/algorithms/gsom/network_test.rs | 6 ++--- rosomaxa/tests/unit/population/greedy_test.rs | 2 +- 11 files changed, 64 insertions(+), 29 deletions(-) diff --git a/experiments/heuristic-research/src/solver/proxies.rs b/experiments/heuristic-research/src/solver/proxies.rs index abc17a6f6..9601b958a 100644 --- a/experiments/heuristic-research/src/solver/proxies.rs +++ b/experiments/heuristic-research/src/solver/proxies.rs @@ -52,7 +52,7 @@ impl<'a> TryFrom<&'a str> for ExperimentData { .ok_or_else(|| "Failed to parse telemetry data".to_string())?; // Find max generation from telemetry data - let max_generation = heuristic_state + let generation = heuristic_state .search_states .keys() .chain(heuristic_state.heuristic_states.keys()) @@ -60,11 +60,7 @@ impl<'a> TryFrom<&'a str> for ExperimentData { .max() .unwrap_or(0); - let mut experiment_data = ExperimentData::default(); - experiment_data.heuristic_state = heuristic_state; - experiment_data.generation = max_generation; - - return Ok(experiment_data); + return Ok(ExperimentData { heuristic_state, generation, ..Default::default() }); } // Try parsing as JSON @@ -156,7 +152,7 @@ where self.generation = statistics.generation; self.acquire().generation = statistics.generation; - let individuals_data = self.inner.all().map(|individual| individual.into()).collect::>(); + let individuals_data = self.inner.iter().map(|individual| individual.into()).collect::>(); // NOTE this is not exactly how footprint is calculated in production code. // In the production version, approximation of the footprint is used to avoid iterating over // all individuals in the population on generation update. @@ -191,8 +187,12 @@ where self.inner.ranked() } - fn all(&self) -> Box + '_> { - self.inner.all() + fn iter(&self) -> Box + '_> { + self.inner.iter() + } + + fn into_iter(self) -> Box> { + self.inner.into_iter() } fn size(&self) -> usize { diff --git a/rosomaxa/src/algorithms/gsom/contraction.rs b/rosomaxa/src/algorithms/gsom/contraction.rs index ab3c2133f..e94884310 100644 --- a/rosomaxa/src/algorithms/gsom/contraction.rs +++ b/rosomaxa/src/algorithms/gsom/contraction.rs @@ -27,7 +27,7 @@ where // find nodes which should be removed let removed = network - .get_nodes() + .iter_nodes() .map(|node| node.coordinate) .filter(|coord| coord.0 % x_decim == 0 || coord.1 % y_decim == 0) .collect::>(); diff --git a/rosomaxa/src/algorithms/gsom/network.rs b/rosomaxa/src/algorithms/gsom/network.rs index 79b63d774..11bb4ce7d 100644 --- a/rosomaxa/src/algorithms/gsom/network.rs +++ b/rosomaxa/src/algorithms/gsom/network.rs @@ -210,7 +210,7 @@ where } /// Return nodes in arbitrary order. - pub fn get_nodes(&self) -> impl Iterator> + '_ { + pub fn iter_nodes(&self) -> impl Iterator> + '_ { self.nodes.values() } @@ -219,6 +219,11 @@ where self.nodes.iter() } + /// Iterates over coordinates and their nodes. + pub fn into_iter(self) -> impl Iterator)> { + self.nodes.into_iter() + } + /// Returns a total amount of nodes. pub fn size(&self) -> usize { self.nodes.len() @@ -242,7 +247,7 @@ where /// Returns max unified distance of the network. pub fn max_unified_distance(&self) -> Float { - self.get_nodes().map(|node| node.unified_distance(self, 1)).max_by(|a, b| a.total_cmp(b)).unwrap_or_default() + self.iter_nodes().map(|node| node.unified_distance(self, 1)).max_by(|a, b| a.total_cmp(b)).unwrap_or_default() } /// Performs training loop multiple times. diff --git a/rosomaxa/src/algorithms/gsom/state.rs b/rosomaxa/src/algorithms/gsom/state.rs index 9e7414efa..415cd7928 100644 --- a/rosomaxa/src/algorithms/gsom/state.rs +++ b/rosomaxa/src/algorithms/gsom/state.rs @@ -48,7 +48,7 @@ where let mse = network.mse(); let nodes = network - .get_nodes() + .iter_nodes() .map(|node| { let mut dump = String::new(); write!(dump, "{}", node.storage).unwrap(); diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs index 9b1bfadf6..e821555d9 100644 --- a/rosomaxa/src/hyper/dynamic_selective.rs +++ b/rosomaxa/src/hyper/dynamic_selective.rs @@ -671,7 +671,7 @@ where for (i, (generation, sample)) in self.agent.tracker.search_telemetry.iter().enumerate() { let include = i < EARLY_SAMPLES || i >= search_total - RECENT_SAMPLES - || (i >= middle_start && i < middle_end && (i - middle_start) % step == 0); + || (i >= middle_start && i < middle_end && (i - middle_start).is_multiple_of(step)); if include { f.write_fmt(format_args!( @@ -709,7 +709,7 @@ where for (i, (generation, sample)) in self.agent.tracker.heuristic_telemetry.iter().enumerate() { let include = i < EARLY_SAMPLES || i >= heuristic_total - RECENT_SAMPLES - || (i >= middle_start && i < middle_end && (i - middle_start) % step == 0); + || (i >= middle_start && i < middle_end && (i - middle_start).is_multiple_of(step)); if include { f.write_fmt(format_args!( diff --git a/rosomaxa/src/population/elitism.rs b/rosomaxa/src/population/elitism.rs index 70bfdeca3..f94de862c 100644 --- a/rosomaxa/src/population/elitism.rs +++ b/rosomaxa/src/population/elitism.rs @@ -92,10 +92,17 @@ where Box::new(self.individuals.iter()) } - fn all(&self) -> Box + '_> { + fn iter(&self) -> Box + '_> { Box::new(self.individuals.iter()) } + fn into_iter(self) -> Box> + where + Self::Individual: 'static, + { + Box::new(self.individuals.into_iter()) + } + fn size(&self) -> usize { self.individuals.len() } diff --git a/rosomaxa/src/population/greedy.rs b/rosomaxa/src/population/greedy.rs index cf911eb83..a4f78b754 100644 --- a/rosomaxa/src/population/greedy.rs +++ b/rosomaxa/src/population/greedy.rs @@ -62,10 +62,17 @@ where Box::new(self.best_known.iter()) } - fn all(&self) -> Box + '_> { + fn iter(&self) -> Box + '_> { Box::new(self.best_known.iter()) } + fn into_iter(self) -> Box> + where + Self::Individual: 'static, + { + Box::new(self.best_known.into_iter()) + } + fn size(&self) -> usize { usize::from(self.best_known.is_some()) } diff --git a/rosomaxa/src/population/mod.rs b/rosomaxa/src/population/mod.rs index b9183b986..31936cb5c 100644 --- a/rosomaxa/src/population/mod.rs +++ b/rosomaxa/src/population/mod.rs @@ -52,8 +52,13 @@ pub trait HeuristicPopulation: Send + Sync { /// Returns subset of individuals within their rank sorted according their quality. fn ranked(&self) -> Box + '_>; - /// Returns all individuals in arbitrary order. - fn all(&self) -> Box + '_>; + /// Returns iterator over all individuals in arbitrary order. + fn iter(&self) -> Box + '_>; + + /// Consumes population and returns an iterator over all individuals in arbitrary order. + fn into_iter(self) -> Box> + where + Self::Individual: 'static; /// Returns population size. fn size(&self) -> usize; diff --git a/rosomaxa/src/population/rosomaxa.rs b/rosomaxa/src/population/rosomaxa.rs index 245af97e2..1a7a8a30a 100644 --- a/rosomaxa/src/population/rosomaxa.rs +++ b/rosomaxa/src/population/rosomaxa.rs @@ -89,9 +89,9 @@ where impl HeuristicPopulation for Rosomaxa where - C: RosomaxaContext, - O: HeuristicObjective + Alternative, - S: RosomaxaSolution, + C: RosomaxaContext + 'static, + O: HeuristicObjective + Alternative + 'static, + S: RosomaxaSolution + 'static, { type Objective = O; type Individual = S; @@ -177,12 +177,23 @@ where self.elite.ranked() } - fn all(&self) -> Box + '_> { + fn iter(&self) -> Box + '_> { match &self.phase { RosomaxaPhases::Exploration { network, .. } => { - Box::new(self.elite.all().chain(network.get_nodes().flat_map(|node| node.storage.population.all()))) + Box::new(self.elite.iter().chain(network.iter_nodes().flat_map(|node| node.storage.population.iter()))) } - _ => self.elite.all(), + _ => self.elite.iter(), + } + } + + fn into_iter(self) -> Box> { + match self.phase { + RosomaxaPhases::Exploration { network, .. } => Box::new( + self.elite + .into_iter() + .chain(network.into_iter().flat_map(|(_, node)| node.storage.population.into_iter())), + ), + _ => self.elite.into_iter(), } } diff --git a/rosomaxa/tests/unit/algorithms/gsom/network_test.rs b/rosomaxa/tests/unit/algorithms/gsom/network_test.rs index 6d99200db..f1b64ca9a 100644 --- a/rosomaxa/tests/unit/algorithms/gsom/network_test.rs +++ b/rosomaxa/tests/unit/algorithms/gsom/network_test.rs @@ -153,7 +153,7 @@ fn can_create_network() { assert_eq!(count_data_stored(&network.nodes), initial_data.len()); // Check node properties - for node in network.get_nodes() { + for node in network.iter_nodes() { assert_eq!(node.weights.len(), 3); assert!(node.error >= 0.); } @@ -294,7 +294,7 @@ fn can_create_network_with_spiral_distribution() { assert!(non_empty_nodes >= (size / 10), "too sparse {non_empty_nodes}"); assert!(network.size() <= (size * 2), "too big {}", network.size()); let distances: Vec<_> = network - .get_nodes() + .iter_nodes() .flat_map(|node| node.storage.data.iter().map(|data| euclidian_distance(&node.weights, data.weights()))) .collect(); let avg_distance = distances.iter().sum::() / distances.len() as Float; @@ -466,7 +466,7 @@ fn can_create_new_network_empty_regions() { let network = NetworkType::new(&(), initial_data, config, random, |_| DataStorageFactory).unwrap(); - let nodes_vec: Vec<_> = network.get_nodes().collect(); + let nodes_vec: Vec<_> = network.iter_nodes().collect(); let total_pairs = (nodes_vec.len() * (nodes_vec.len() - 1)) / 2; let failed_pairs = (0..nodes_vec.len()) .flat_map(|i| { diff --git a/rosomaxa/tests/unit/population/greedy_test.rs b/rosomaxa/tests/unit/population/greedy_test.rs index 0053849c0..aa90b17e6 100644 --- a/rosomaxa/tests/unit/population/greedy_test.rs +++ b/rosomaxa/tests/unit/population/greedy_test.rs @@ -51,7 +51,7 @@ fn can_select_when_empty() { let population = Greedy::<_, _>::new(objective, 1, None); assert_eq!(population.select().count(), 0); - assert_eq!(population.all().count(), 0); + assert_eq!(population.iter().count(), 0); } #[test] From ba470dc8d747a65ca32f7bbd8ea81f3c7b2af12c Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Tue, 13 Jan 2026 23:16:53 +0100 Subject: [PATCH 23/30] Add into iterators for getting individuals from context --- .../heuristic-research/src/solver/proxies.rs | 4 ++-- rosomaxa/src/algorithms/gsom/network.rs | 2 +- rosomaxa/src/lib.rs | 7 ++++++- rosomaxa/src/population/elitism.rs | 2 +- rosomaxa/src/population/greedy.rs | 2 +- rosomaxa/src/population/mod.rs | 2 +- rosomaxa/src/population/rosomaxa.rs | 14 +++++++------- vrp-core/src/solver/mod.rs | 5 +++++ 8 files changed, 24 insertions(+), 14 deletions(-) diff --git a/experiments/heuristic-research/src/solver/proxies.rs b/experiments/heuristic-research/src/solver/proxies.rs index 9601b958a..ec1b56e3b 100644 --- a/experiments/heuristic-research/src/solver/proxies.rs +++ b/experiments/heuristic-research/src/solver/proxies.rs @@ -191,8 +191,8 @@ where self.inner.iter() } - fn into_iter(self) -> Box> { - self.inner.into_iter() + fn into_iter(self: Box) -> Box> { + Box::new(self.inner).into_iter() } fn size(&self) -> usize { diff --git a/rosomaxa/src/algorithms/gsom/network.rs b/rosomaxa/src/algorithms/gsom/network.rs index 11bb4ce7d..9d43e9f34 100644 --- a/rosomaxa/src/algorithms/gsom/network.rs +++ b/rosomaxa/src/algorithms/gsom/network.rs @@ -220,7 +220,7 @@ where } /// Iterates over coordinates and their nodes. - pub fn into_iter(self) -> impl Iterator)> { + pub fn into_iter_nodes(self) -> impl Iterator)> { self.nodes.into_iter() } diff --git a/rosomaxa/src/lib.rs b/rosomaxa/src/lib.rs index 07cb400c9..859296933 100644 --- a/rosomaxa/src/lib.rs +++ b/rosomaxa/src/lib.rs @@ -163,7 +163,7 @@ where impl TelemetryHeuristicContext where O: HeuristicObjective, - S: HeuristicSolution, + S: HeuristicSolution + 'static, { /// Creates a new instance of `TelemetryHeuristicContext`. pub fn new( @@ -176,6 +176,11 @@ where Self { objective, population, telemetry, environment } } + /// Consumes context and returns all individuals. + pub fn into_individuals(self) -> Box> { + self.population.into_iter() + } + /// Adds solution to population. pub fn add_solution(&mut self, solution: S) { self.population.add(solution); diff --git a/rosomaxa/src/population/elitism.rs b/rosomaxa/src/population/elitism.rs index f94de862c..7b8b8c116 100644 --- a/rosomaxa/src/population/elitism.rs +++ b/rosomaxa/src/population/elitism.rs @@ -96,7 +96,7 @@ where Box::new(self.individuals.iter()) } - fn into_iter(self) -> Box> + fn into_iter(self: Box) -> Box> where Self::Individual: 'static, { diff --git a/rosomaxa/src/population/greedy.rs b/rosomaxa/src/population/greedy.rs index a4f78b754..30e899d7d 100644 --- a/rosomaxa/src/population/greedy.rs +++ b/rosomaxa/src/population/greedy.rs @@ -66,7 +66,7 @@ where Box::new(self.best_known.iter()) } - fn into_iter(self) -> Box> + fn into_iter(self: Box) -> Box> where Self::Individual: 'static, { diff --git a/rosomaxa/src/population/mod.rs b/rosomaxa/src/population/mod.rs index 31936cb5c..2b82a1627 100644 --- a/rosomaxa/src/population/mod.rs +++ b/rosomaxa/src/population/mod.rs @@ -56,7 +56,7 @@ pub trait HeuristicPopulation: Send + Sync { fn iter(&self) -> Box + '_>; /// Consumes population and returns an iterator over all individuals in arbitrary order. - fn into_iter(self) -> Box> + fn into_iter(self: Box) -> Box> where Self::Individual: 'static; diff --git a/rosomaxa/src/population/rosomaxa.rs b/rosomaxa/src/population/rosomaxa.rs index 1a7a8a30a..102eccd8a 100644 --- a/rosomaxa/src/population/rosomaxa.rs +++ b/rosomaxa/src/population/rosomaxa.rs @@ -186,14 +186,14 @@ where } } - fn into_iter(self) -> Box> { + fn into_iter(self: Box) -> Box> { match self.phase { - RosomaxaPhases::Exploration { network, .. } => Box::new( - self.elite - .into_iter() - .chain(network.into_iter().flat_map(|(_, node)| node.storage.population.into_iter())), - ), - _ => self.elite.into_iter(), + RosomaxaPhases::Exploration { network, .. } => { + Box::new(Box::new(self.elite).into_iter().chain( + network.into_iter_nodes().flat_map(|(_, node)| Box::new(node.storage.population).into_iter()), + )) + } + _ => Box::new(self.elite).into_iter(), } } diff --git a/vrp-core/src/solver/mod.rs b/vrp-core/src/solver/mod.rs index 7dd90c513..53a4e3796 100644 --- a/vrp-core/src/solver/mod.rs +++ b/vrp-core/src/solver/mod.rs @@ -124,6 +124,11 @@ impl RefinementContext { Self { problem, environment, inner_context, state: Default::default(), initial_footprint } } + /// Consumes context and returns all individuals. + pub fn into_individuals(self) -> Box> { + self.inner_context.into_individuals() + } + /// Adds solution to population. pub fn add_solution(&mut self, solution: InsertionContext) { self.inner_context.add_solution(solution); From 01a7516d7054ab8d15dbf0843053b8cd76838f60 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Tue, 13 Jan 2026 23:49:19 +0100 Subject: [PATCH 24/30] Modify decompose search to diversify solution if no improvement --- .../src/solver/search/decompose_search.rs | 41 ++++++++++++------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/vrp-core/src/solver/search/decompose_search.rs b/vrp-core/src/solver/search/decompose_search.rs index 7aa6e68e0..70b100177 100644 --- a/vrp-core/src/solver/search/decompose_search.rs +++ b/vrp-core/src/solver/search/decompose_search.rs @@ -90,11 +90,25 @@ impl DecomposeSearch { (refinement_ctx, route_indices) }); - // merge evolution results into one insertion_ctx - let mut insertion_ctx = decomposed.into_iter().fold( - InsertionContext::new_empty(refinement_ctx.problem.clone(), refinement_ctx.environment.clone()), - |insertion_ctx, decomposed| merge_best(decomposed, original_insertion_ctx, insertion_ctx), - ); + // get new and old parts and detect if there was any improvement in any part + let ((new_parts, old_parts), improvements): ((Vec<_>, Vec<_>), Vec<_>) = + decomposed.into_iter().map(|decomposed| get_solution_parts(decomposed, original_insertion_ctx)).unzip(); + + let has_improvements = improvements.iter().any(|is_improvement| *is_improvement); + + let mut insertion_ctx = if has_improvements { + improvements.into_iter().zip(new_parts.into_iter().zip(old_parts.into_iter())).fold( + InsertionContext::new_empty(refinement_ctx.problem.clone(), refinement_ctx.environment.clone()), + |accumulated, (is_improvement, (new_part, old_part))| { + merge_parts(if is_improvement { new_part } else { old_part }, accumulated) + }, + ) + } else { + new_parts.into_iter().fold( + InsertionContext::new_empty(refinement_ctx.problem.clone(), refinement_ctx.environment.clone()), + |accumulated, new_part| merge_parts(new_part, accumulated), + ) + }; insertion_ctx.restore(); finalize_insertion_ctx(&mut insertion_ctx); @@ -248,24 +262,23 @@ fn decompose_insertion_context( .and_then(|contexts| if contexts.len() > 1 { Some(contexts) } else { None }) } -fn merge_best( +fn get_solution_parts( decomposed: (RefinementContext, HashSet), original_insertion_ctx: &InsertionContext, - accumulated: InsertionContext, -) -> InsertionContext { +) -> ((SolutionContext, SolutionContext), bool) { let (decomposed_ctx, route_indices) = decomposed; - let decomposed_insertion_ctx = decomposed_ctx.ranked().next().expect(GREEDY_ERROR); + let decomposed_insertion_ctx = decomposed_ctx.into_individuals().next().expect(GREEDY_ERROR); let environment = original_insertion_ctx.environment.clone(); let (partial_insertion_ctx, _) = create_partial_insertion_ctx(original_insertion_ctx, environment, route_indices); let goal = partial_insertion_ctx.problem.goal.as_ref(); - let source_solution = if goal.total_order(decomposed_insertion_ctx, &partial_insertion_ctx) == Ordering::Less { - &decomposed_insertion_ctx.solution - } else { - &partial_insertion_ctx.solution - }; + let is_improvement = goal.total_order(&decomposed_insertion_ctx, &partial_insertion_ctx) == Ordering::Less; + + ((decomposed_insertion_ctx.solution, partial_insertion_ctx.solution), is_improvement) +} +fn merge_parts(source_solution: SolutionContext, accumulated: InsertionContext) -> InsertionContext { let mut accumulated = accumulated; let dest_solution = &mut accumulated.solution; From 4ba697b154f4065817f1f680a59b6bd3d79bb468 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Wed, 14 Jan 2026 00:15:41 +0100 Subject: [PATCH 25/30] Fix clippy warning --- vrp-core/src/solver/search/decompose_search.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vrp-core/src/solver/search/decompose_search.rs b/vrp-core/src/solver/search/decompose_search.rs index 70b100177..a16b29633 100644 --- a/vrp-core/src/solver/search/decompose_search.rs +++ b/vrp-core/src/solver/search/decompose_search.rs @@ -97,7 +97,7 @@ impl DecomposeSearch { let has_improvements = improvements.iter().any(|is_improvement| *is_improvement); let mut insertion_ctx = if has_improvements { - improvements.into_iter().zip(new_parts.into_iter().zip(old_parts.into_iter())).fold( + improvements.into_iter().zip(new_parts.into_iter().zip(old_parts)).fold( InsertionContext::new_empty(refinement_ctx.problem.clone(), refinement_ctx.environment.clone()), |accumulated, (is_improvement, (new_part, old_part))| { merge_parts(if is_improvement { new_part } else { old_part }, accumulated) From baec260189840464b04ea452fac8e69bb76c0123 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Wed, 14 Jan 2026 19:59:44 +0100 Subject: [PATCH 26/30] Add some improvements on heuristics --- rosomaxa/src/hyper/dynamic_selective.rs | 47 ++-- rosomaxa/src/utils/random.rs | 46 ++++ vrp-cli/src/extensions/solve/config.rs | 4 +- vrp-core/src/solver/heuristic.rs | 77 ++++--- .../src/solver/search/decompose_search.rs | 213 ++++++++++++++---- .../solver/search/local/exchange_swap_star.rs | 12 +- .../solver/search/decompose_search_test.rs | 2 +- .../search/local/exchange_swap_star_test.rs | 4 +- 8 files changed, 289 insertions(+), 116 deletions(-) diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs index e821555d9..0ebe11296 100644 --- a/rosomaxa/src/hyper/dynamic_selective.rs +++ b/rosomaxa/src/hyper/dynamic_selective.rs @@ -6,7 +6,7 @@ use super::*; use crate::Timer; use crate::algorithms::math::RemedianUsize; use crate::algorithms::rl::{SlotAction, SlotFeedback, SlotMachine}; -use crate::utils::{DefaultDistributionSampler, random_argmax}; +use crate::utils::{DefaultDistributionSampler, select_softmax_index}; use std::cmp::Ordering; use std::collections::HashMap; use std::fmt::Formatter; @@ -123,26 +123,31 @@ impl SearchRewards { /// Multiplier applied when a solution improves the Global Best Known. /// This is the "Jackpot" factor. - pub const GLOBAL_BEST_MULTIPLIER: Float = 2.5; + pub const GLOBAL_BEST_MULTIPLIER: Float = 2.0; /// Multiplier applied when a solution is diverse but not a global improvement. /// Keeps "Diverse" operators alive but with low signal. - pub const DIVERSE_MULTIPLIER: Float = 0.05; + pub const DIVERSE_MULTIPLIER: Float = 0.01; /// The maximum percentage (+/-) that execution duration can affect the reward. /// e.g., 0.2 means performance can scale reward by [0.8, 1.2]. pub const PERF_TOLERANCE: Float = 0.2; + /// Softmax/Boltzmann policy temperature range (over Thompson samples). + /// Higher temperature => more exploration. + pub const SOFTMAX_TEMPERATURE_MIN: Float = 0.3; + pub const SOFTMAX_TEMPERATURE_MAX: Float = 2.0; + /// The "Cheap Failure" /// The penalty applied when an operator produces no improvement. /// A small negative value ensures that "doing nothing" is worse than "finding diverse solutions". /// This forces the Slot Machine to eventually lower the confidence of stagnating operators. - pub const PENALTY_MIN: Float = -0.01; + pub const PENALTY_MIN: Float = -0.5; /// The "Expensive Failure" /// The penalty applied when an operator produces a negative outcome. /// A larger negative value ensures that failures are strongly discouraged. - pub const PENALTY_MAX: Float = -0.1; + pub const PENALTY_MAX: Float = -2.0; /// Calculates the ratio between the "Jackpot" and the "Normal" signal. /// Used by the Agent to determine how many times larger than the average @@ -157,6 +162,14 @@ impl SearchRewards { max_theoretical / typical_reward } + + /// Derives a temperature based on the search progress. + /// When the search plateaus, we increase exploration. + pub fn softmax_temperature(improvement_1000_ratio: Float) -> Float { + // In other places we treat 10% improvement as "fast flow". + let flow = (improvement_1000_ratio * 10.0).clamp(0.0, 1.0); + Self::SOFTMAX_TEMPERATURE_MIN + (1.0 - flow) * (Self::SOFTMAX_TEMPERATURE_MAX - Self::SOFTMAX_TEMPERATURE_MIN) + } } /// Search state for Thompson sampling. @@ -334,14 +347,11 @@ where }; // Get contextually appropriate slot machines. - let (slot_idx, slot_machine) = self - .slot_machines - .get(&from) - .and_then(|slots| { - random_argmax(slots.iter().map(|(slot, _)| slot.sample()), self.random.as_ref()) - .and_then(|slot_idx| slots.get(slot_idx).map(|(slot, _)| (slot_idx, slot))) - }) - .expect("cannot get slot machine"); + let slots = self.slot_machines.get(&from).expect("cannot get slot machines"); + let temperature = SearchRewards::softmax_temperature(heuristic_ctx.statistics().improvement_1000_ratio); + let samples = slots.iter().map(|(slot, _)| slot.sample()).collect::>(); + let slot_idx = select_softmax_index(&samples, temperature, self.random.as_ref()); + let slot_machine = &slots[slot_idx].0; let approx_median = self.tracker.approx_median(); @@ -430,15 +440,16 @@ where let distance_initial = get_relative_distance(objective, new_solution, initial_solution); let distance_best = get_relative_distance(objective, new_solution, best_known); - // Reward components (max ~4.0 for local, ~10.0 for global). let reward_initial = (distance_initial + SearchRewards::DISTANCE_OFFSET) * SearchRewards::BASE_REWARD; - let reward_best = (distance_best + SearchRewards::DISTANCE_OFFSET) - * SearchRewards::BASE_REWARD - * SearchRewards::GLOBAL_BEST_MULTIPLIER; match (distance_initial.total_cmp(&0.), distance_best.total_cmp(&0.)) { // Global Jackpot - (Ordering::Greater, Ordering::Greater) => Some(reward_initial + reward_best), + (Ordering::Greater, Ordering::Greater) => { + let reward_best = (distance_best + SearchRewards::DISTANCE_OFFSET) + * SearchRewards::BASE_REWARD + * SearchRewards::GLOBAL_BEST_MULTIPLIER; + Some(reward_initial + reward_best) + } // Local/Diverse Improvement (Ordering::Greater, _) => Some(reward_initial * SearchRewards::DIVERSE_MULTIPLIER), diff --git a/rosomaxa/src/utils/random.rs b/rosomaxa/src/utils/random.rs index 4bbeb1909..bb51da416 100644 --- a/rosomaxa/src/utils/random.rs +++ b/rosomaxa/src/utils/random.rs @@ -219,3 +219,49 @@ where }) .map(|(idx, _)| idx) } + +/// Selects an index from samples using softmax (Boltzmann) distribution with given temperature. +pub fn select_softmax_index(samples: &[Float], temperature: Float, random: &dyn Random) -> usize { + assert!(!samples.is_empty()); + + // If we ever see +inf, treat it as a hard-max tie among +inf arms. + if samples.iter().any(|&s| s.is_infinite() && s.is_sign_positive()) { + let idxs = samples + .iter() + .enumerate() + .filter(|(_, s)| s.is_infinite() && s.is_sign_positive()) + .map(|(idx, _)| idx) + .collect::>(); + let pick = random.uniform_int(0, (idxs.len() - 1) as i32) as usize; + return idxs[pick]; + } + + let temperature = temperature.max(f64::EPSILON); + let max_sample = samples.iter().copied().filter(|s| s.is_finite()).max_by(|a, b| a.total_cmp(b)).unwrap_or(0.0); + + // Stable softmax: w_i = exp((s_i - max_s) / T). Since (s_i - max_s) <= 0, + // weights are in (0, 1] and never overflow. + let mut weights = Vec::with_capacity(samples.len()); + let mut sum = 0.0; + for &s in samples { + let w = if s.is_finite() { ((s - max_sample) / temperature).exp() } else { 0.0 }; + sum += w; + weights.push(w); + } + + if sum <= f64::EPSILON { + // Degenerate case: fall back to argmax tie-break. + return random_argmax(samples.iter().copied(), random).unwrap_or(0); + } + + let mut r = random.uniform_real(0.0, sum); + for (idx, w) in weights.iter().enumerate() { + r -= *w; + if r <= 0.0 { + return idx; + } + } + + // Numerical edge: return last. + samples.len() - 1 +} diff --git a/vrp-cli/src/extensions/solve/config.rs b/vrp-cli/src/extensions/solve/config.rs index d31289390..133597b14 100644 --- a/vrp-cli/src/extensions/solve/config.rs +++ b/vrp-cli/src/extensions/solve/config.rs @@ -625,7 +625,7 @@ fn create_operator( let operator = create_default_heuristic_operator(problem, environment.clone()); ( - Arc::new(DecomposeSearch::new(operator, (routes.min, routes.max), *repeat, 200)), + Arc::new(DecomposeSearch::new(operator, (routes.min, routes.max), *repeat)), create_operator_probability(probability, environment.random.clone()), ) } @@ -699,7 +699,7 @@ fn create_local_search( let operators = inners .iter() .map::<(Arc, usize), _>(|op| match op { - LocalOperatorType::SwapStar { weight } => (Arc::new(ExchangeSwapStar::new(random.clone(), 200)), *weight), + LocalOperatorType::SwapStar { weight } => (Arc::new(ExchangeSwapStar::new(random.clone())), *weight), LocalOperatorType::InterRouteBest { weight, noise } => { (Arc::new(ExchangeInterRouteBest::new(noise.probability, noise.min, noise.max)), *weight) } diff --git a/vrp-core/src/solver/heuristic.rs b/vrp-core/src/solver/heuristic.rs index 426286b47..a0d59e807 100644 --- a/vrp-core/src/solver/heuristic.rs +++ b/vrp-core/src/solver/heuristic.rs @@ -134,7 +134,7 @@ pub fn get_static_heuristic( let heuristic_group: TargetHeuristicGroup = vec![ ( - Arc::new(DecomposeSearch::new(default_operator.clone(), (2, 4), 4, SINGLE_HEURISTIC_QUOTA_LIMIT)), + Arc::new(DecomposeSearch::new(default_operator.clone(), (2, 4), 4)), create_context_operator_probability( 300, 10, @@ -279,8 +279,6 @@ fn get_limits(problem: &Problem) -> (RemovalLimits, RemovalLimits) { (normal_limits, small_limits) } -const SINGLE_HEURISTIC_QUOTA_LIMIT: usize = 200; - pub use self::builder::create_default_init_operators; pub use self::builder::create_default_processing; pub use self::statik::create_default_heuristic_operator; @@ -475,7 +473,7 @@ mod statik { pub fn create_default_local_search(random: Arc) -> TargetSearchOperator { Arc::new(LocalSearch::new(Arc::new(CompositeLocalOperator::new( vec![ - (Arc::new(ExchangeSwapStar::new(random, SINGLE_HEURISTIC_QUOTA_LIMIT)), 200), + (Arc::new(ExchangeSwapStar::new(random)), 200), (Arc::new(ExchangeInterRouteBest::default()), 100), (Arc::new(ExchangeSequence::default()), 100), (Arc::new(ExchangeInterRouteRandom::default()), 30), @@ -494,8 +492,8 @@ mod dynamic { fn get_weighted_recreates(problem: &Problem, random: Arc) -> Vec<(Arc, String, Float)> { let cheapest: Arc = Arc::new(RecreateWithCheapest::new(random.clone())); vec![ - (cheapest.clone(), "cheapest".to_string(), 1.), - (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), "blinks".to_string(), 3.), + (cheapest.clone(), "cheapest".to_string(), 2.), + (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), "blinks".to_string(), 2.), (Arc::new(RecreateWithSkipBest::new(1, 2, random.clone())), "skip_best".to_string(), 1.), (Arc::new(RecreateWithRegret::new(1, 3, random.clone())), "regret".to_string(), 1.), (Arc::new(RecreateWithPerturbation::new_with_defaults(random.clone())), "perturbation".to_string(), 1.), @@ -537,19 +535,19 @@ mod dynamic { ( create_weighted(|limits| Arc::new(AdjustedStringRemoval::new_with_defaults(limits))), "asr".to_string(), - 7., + 2., ), - (create_weighted(|limits| Arc::new(NeighbourRemoval::new(limits))), "neighbour".to_string(), 5.), - (create_weighted(|limits| Arc::new(WorstRouteRemoval::new(limits))), "worst_route".to_string(), 5.), - (create_weighted(|limits| Arc::new(WorstJobRemoval::new(4, limits))), "worst_job".to_string(), 4.), - (create_weighted(|limits| Arc::new(CloseRouteRemoval::new(limits))), "close_route".to_string(), 4.), - (create_weighted(|limits| Arc::new(RandomJobRemoval::new(limits))), "random_job".to_string(), 4.), - (create_weighted(|limits| Arc::new(RandomRouteRemoval::new(limits))), "random_route".to_string(), 2.), - (Arc::new(ClusterRemoval::new_with_defaults(problem).unwrap()), "cluster".to_string(), 4.), + (create_weighted(|limits| Arc::new(NeighbourRemoval::new(limits))), "neighbour".to_string(), 1.), + (create_weighted(|limits| Arc::new(WorstRouteRemoval::new(limits))), "worst_route".to_string(), 1.), + (create_weighted(|limits| Arc::new(WorstJobRemoval::new(4, limits))), "worst_job".to_string(), 1.), + (create_weighted(|limits| Arc::new(CloseRouteRemoval::new(limits))), "close_route".to_string(), 1.), + (create_weighted(|limits| Arc::new(RandomJobRemoval::new(limits))), "random_job".to_string(), 1.), + (create_weighted(|limits| Arc::new(RandomRouteRemoval::new(limits))), "random_route".to_string(), 1.), + (Arc::new(ClusterRemoval::new_with_defaults(problem).unwrap()), "cluster".to_string(), 1.), ] } - fn get_mutations( + fn get_search_operators( problem: Arc, environment: Arc, ) -> Vec<(TargetSearchOperator, String, Float)> { @@ -574,22 +572,14 @@ mod dynamic { "local_reschedule_departure".to_string(), 1., ), - (Arc::new(LKHSearch::new(LKHSearchMode::ImprovementOnly)), "lkh_strict".to_string(), 3.), + (Arc::new(LKHSearch::new(LKHSearchMode::ImprovementOnly)), "lkh_strict".to_string(), 1.), ( - Arc::new(LocalSearch::new(Arc::new(ExchangeSwapStar::new( - environment.random.clone(), - SINGLE_HEURISTIC_QUOTA_LIMIT, - )))), + Arc::new(LocalSearch::new(Arc::new(ExchangeSwapStar::new(environment.random.clone())))), "local_swap_star".to_string(), - 5., - ), - // decompose search methods with different inner heuristic - ( - create_variable_search_decompose_search(problem.clone(), environment.clone()), - "decompose_search_var".to_string(), - 10., + 2., ), - (create_composite_decompose_search(problem, environment), "decompose_search_com".to_string(), 10.), + // combined decompose search with different inner heuristics + (create_combined_decompose_search(problem, environment), "decompose_search".to_string(), 2.), ] } @@ -621,22 +611,30 @@ mod dynamic { ( Arc::new(RuinAndRecreate::new(ruin.clone(), recreate.clone())), format!("{ruin_name}+{recreate_name}"), - ruin_weight * recreate_weight, + ruin_weight + recreate_weight, ) }) }) .collect::>(); - let mutations = get_mutations(problem.clone(), environment.clone()); + let operators = get_search_operators(problem.clone(), environment.clone()); let heuristic_filter = problem.extras.get_heuristic_filter(); ruin_recreate_ops .into_iter() - .chain(mutations) + .chain(operators) .filter(|(_, name, _)| heuristic_filter.as_ref().is_none_or(|filter| (filter)(name.as_str()))) .collect::>() } + /// Creates a default operator which is good for general use. + pub fn create_default_good_operator(problem: Arc, environment: Arc) -> TargetSearchOperator { + Arc::new(RuinAndRecreate::new( + Arc::new(AdjustedStringRemoval::new_with_defaults(get_limits(problem.as_ref()).0)), + Arc::new(RecreateWithBlinks::new_with_defaults(environment.random.clone())), + )) + } + /// Creates a default ruin-and-recreate operator for internal use (e.g., decompose search, infeasible search). /// Uses weighted ruins and recreates from the main operator building logic. pub fn create_default_inner_ruin_recreate( @@ -663,7 +661,7 @@ mod dynamic { pub fn create_default_local_search(random: Arc) -> Arc { Arc::new(LocalSearch::new(Arc::new(CompositeLocalOperator::new( vec![ - (Arc::new(ExchangeSwapStar::new(random, SINGLE_HEURISTIC_QUOTA_LIMIT / 4)), 2), + (Arc::new(ExchangeSwapStar::new(random)), 2), (Arc::new(ExchangeInterRouteBest::default()), 1), (Arc::new(ExchangeInterRouteRandom::default()), 1), (Arc::new(ExchangeIntraRouteRandom::default()), 1), @@ -682,13 +680,13 @@ mod dynamic { Arc::new(WeightedHeuristicOperator::new( vec![ create_default_inner_ruin_recreate(problem.clone(), environment.clone()), + create_default_good_operator(problem, environment.clone()), create_default_local_search(environment.random.clone()), ], - vec![10, 1], + vec![9, 3, 1], )), (2, 4), 2, - SINGLE_HEURISTIC_QUOTA_LIMIT, )) } @@ -707,7 +705,16 @@ mod dynamic { ])), (2, 4), 2, - SINGLE_HEURISTIC_QUOTA_LIMIT, + )) + } + + fn create_combined_decompose_search(problem: Arc, environment: Arc) -> TargetSearchOperator { + Arc::new(WeightedHeuristicOperator::new( + vec![ + create_variable_search_decompose_search(problem.clone(), environment.clone()), + create_composite_decompose_search(problem, environment), + ], + vec![1, 1], )) } } diff --git a/vrp-core/src/solver/search/decompose_search.rs b/vrp-core/src/solver/search/decompose_search.rs index a16b29633..139e319d9 100644 --- a/vrp-core/src/solver/search/decompose_search.rs +++ b/vrp-core/src/solver/search/decompose_search.rs @@ -8,7 +8,6 @@ use crate::solver::search::create_environment_with_custom_quota; use crate::solver::*; use crate::utils::Either; use rosomaxa::utils::parallel_into_collect; -use std::cell::RefCell; use std::cmp::Ordering; use std::collections::HashSet; use std::iter::{empty, once}; @@ -19,21 +18,15 @@ pub struct DecomposeSearch { inner_search: TargetSearchOperator, max_routes_range: (i32, i32), repeat_count: usize, - quota_limit: usize, } impl DecomposeSearch { /// Create a new instance of `DecomposeSearch`. - pub fn new( - inner_search: TargetSearchOperator, - max_routes_range: (usize, usize), - repeat_count: usize, - quota_limit: usize, - ) -> Self { + pub fn new(inner_search: TargetSearchOperator, max_routes_range: (usize, usize), repeat_count: usize) -> Self { assert!(max_routes_range.0 > 1); let max_routes_range = (max_routes_range.0 as i32, max_routes_range.1 as i32); - Self { inner_search, max_routes_range, repeat_count, quota_limit } + Self { inner_search, max_routes_range, repeat_count } } } @@ -46,15 +39,9 @@ impl HeuristicSearchOperator for DecomposeSearch { let refinement_ctx = heuristic_ctx; let insertion_ctx = solution; - decompose_insertion_context( - refinement_ctx, - insertion_ctx, - self.max_routes_range, - self.repeat_count, - self.quota_limit, - ) - .map(|contexts| self.refine_decomposed(refinement_ctx, insertion_ctx, contexts)) - .unwrap_or_else(|| self.inner_search.search(heuristic_ctx, insertion_ctx)) + decompose_insertion_context(refinement_ctx, insertion_ctx, self.max_routes_range, self.repeat_count) + .map(|contexts| self.refine_decomposed(refinement_ctx, contexts)) + .unwrap_or_else(|| self.inner_search.search(heuristic_ctx, insertion_ctx)) } } @@ -64,21 +51,23 @@ impl DecomposeSearch { fn refine_decomposed( &self, refinement_ctx: &RefinementContext, - original_insertion_ctx: &InsertionContext, decomposed: Vec<(RefinementContext, HashSet)>, ) -> InsertionContext { - // NOTE: validate decomposition + // NOTE: validate decomposition in debug builds only + #[cfg(debug_assertions)] decomposed.iter().enumerate().for_each(|(outer_ix, (_, outer))| { decomposed.iter().enumerate().filter(|(inner_idx, _)| outer_ix != *inner_idx).for_each( |(_, (_, inner))| { - assert!(outer.intersection(inner).next().is_none()); + debug_assert!(outer.intersection(inner).next().is_none()); }, ); }); // do actual refinement independently for each decomposed context let decomposed = parallel_into_collect(decomposed, |(mut refinement_ctx, route_indices)| { - let _ = (0..self.repeat_count).try_for_each(|_| { + let actual_repeat_count = get_repeat_count(self.repeat_count, refinement_ctx.environment.random.as_ref()); + + let _ = (0..actual_repeat_count).try_for_each(|_| { let insertion_ctx = refinement_ctx.selected().next().expect(GREEDY_ERROR); let insertion_ctx = self.inner_search.search(&refinement_ctx, insertion_ctx); let is_quota_reached = @@ -92,7 +81,7 @@ impl DecomposeSearch { // get new and old parts and detect if there was any improvement in any part let ((new_parts, old_parts), improvements): ((Vec<_>, Vec<_>), Vec<_>) = - decomposed.into_iter().map(|decomposed| get_solution_parts(decomposed, original_insertion_ctx)).unzip(); + decomposed.into_iter().map(|decomposed| get_solution_parts(decomposed)).unzip(); let has_improvements = improvements.iter().any(|is_improvement| *is_improvement); @@ -118,7 +107,29 @@ impl DecomposeSearch { } fn create_population(insertion_ctx: InsertionContext) -> TargetPopulation { - Box::new(GreedyPopulation::new(insertion_ctx.problem.goal.clone(), 1, Some(insertion_ctx))) + // Keep baseline and (optionally) best/last candidate without reconstructing baseline later. + Box::new(DecomposePopulation::new(insertion_ctx.problem.goal.clone(), 1, insertion_ctx)) +} + +/// Selects a repeat count from 1 to max_repeat_count using exponential decay. +/// Uses stack-allocated arrays for common cases to avoid heap allocation. +fn get_repeat_count(max_repeat_count: usize, random: &dyn Random) -> usize { + if max_repeat_count == 1 { + return 1; + } + + // create weights with exponential decay: [3^(n-1), 3^(n-2), ..., 3^1, 3^0] + let index = match max_repeat_count { + 2 => random.weighted(&[3, 1]), + 3 => random.weighted(&[9, 3, 1]), + 4 => random.weighted(&[27, 9, 3, 1]), + _ => { + let weights: Vec<_> = (1..=max_repeat_count).map(|i| 3_usize.pow((max_repeat_count - i) as u32)).collect(); + random.weighted(&weights) + 1 + } + }; + + index + 1 } fn create_multiple_insertion_contexts( @@ -135,21 +146,24 @@ fn create_multiple_insertion_contexts( let max = if insertion_ctx.solution.routes.len() < max as usize { (max / 2).max(min) } else { max }; // identify route groups and create contexts from them - let used_indices = RefCell::new(HashSet::new()); + let mut used_indices: HashSet = HashSet::new(); let insertion_ctxs = route_groups .into_iter() .enumerate() - .filter(|(outer_idx, _)| !used_indices.borrow().contains(outer_idx)) - .map(|(outer_idx, route_group)| { + .filter_map(|(outer_idx, route_group)| { + if used_indices.contains(&outer_idx) { + return None; + } + let group_size = environment.random.uniform_int(min, max) as usize; let route_group = once(outer_idx) - .chain(route_group.into_iter().filter(|inner_idx| !used_indices.borrow().contains(inner_idx))) + .chain(route_group.into_iter().filter(|inner_idx| !used_indices.contains(inner_idx))) .take(group_size) .collect::>(); - used_indices.borrow_mut().extend(route_group.iter().cloned()); + used_indices.extend(route_group.iter().copied()); - create_partial_insertion_ctx(insertion_ctx, environment.clone(), route_group) + Some(create_partial_insertion_ctx(insertion_ctx, environment.clone(), route_group)) }) .chain(create_empty_insertion_ctxs(insertion_ctx, environment.clone())) .collect(); @@ -235,11 +249,10 @@ fn decompose_insertion_context( insertion_ctx: &InsertionContext, max_routes_range: (i32, i32), repeat: usize, - quota_limit: usize, ) -> Option)>> { // NOTE make limit a bit higher than median let median = refinement_ctx.statistics().speed.get_median(); - let limit = median.map(|median| (median * repeat).max(quota_limit)); + let limit = median.map(|median| (((median.max(10) * repeat) as f64) * 1.5) as usize); let environment = create_environment_with_custom_quota(limit, refinement_ctx.environment.as_ref()); create_multiple_insertion_contexts(insertion_ctx, environment.clone(), max_routes_range) @@ -262,36 +275,136 @@ fn decompose_insertion_context( .and_then(|contexts| if contexts.len() > 1 { Some(contexts) } else { None }) } -fn get_solution_parts( - decomposed: (RefinementContext, HashSet), - original_insertion_ctx: &InsertionContext, -) -> ((SolutionContext, SolutionContext), bool) { - let (decomposed_ctx, route_indices) = decomposed; - let decomposed_insertion_ctx = decomposed_ctx.into_individuals().next().expect(GREEDY_ERROR); - let environment = original_insertion_ctx.environment.clone(); +fn get_solution_parts(decomposed: (RefinementContext, HashSet)) -> ((SolutionContext, SolutionContext), bool) { + let (decomposed_ctx, _) = decomposed; + let mut individuals = decomposed_ctx.into_individuals(); - let (partial_insertion_ctx, _) = create_partial_insertion_ctx(original_insertion_ctx, environment, route_indices); - let goal = partial_insertion_ctx.problem.goal.as_ref(); + // Baseline is preserved by `DecomposePopulation` and yielded first. + let baseline = individuals.next().expect(GREEDY_ERROR); + // The second individual is always present: + // - if there was an improvement: best improved solution + // - otherwise: last non-improving solution (used for diversity) + let candidate = individuals.next().expect(GREEDY_ERROR); - let is_improvement = goal.total_order(&decomposed_insertion_ctx, &partial_insertion_ctx) == Ordering::Less; + let goal = baseline.problem.goal.as_ref(); - ((decomposed_insertion_ctx.solution, partial_insertion_ctx.solution), is_improvement) + // When there is no improvement, `candidate` is the last non-improving solution for diversity. + // When there is an improvement, `candidate` is the best improved solution. + let is_improvement = goal.total_order(&candidate, &baseline) == Ordering::Less; + + ((candidate.solution, baseline.solution), is_improvement) } fn merge_parts(source_solution: SolutionContext, accumulated: InsertionContext) -> InsertionContext { let mut accumulated = accumulated; let dest_solution = &mut accumulated.solution; - // NOTE theoretically, we can avoid deep copy here, but this would require an extension in Population trait - dest_solution.routes.extend(source_solution.routes.iter().map(|route_ctx| route_ctx.deep_copy())); - dest_solution.ignored.extend(source_solution.ignored.iter().cloned()); - dest_solution.required.extend(source_solution.required.iter().cloned()); - dest_solution.locked.extend(source_solution.locked.iter().cloned()); - dest_solution.unassigned.extend(source_solution.unassigned.iter().map(|(k, v)| (k.clone(), v.clone()))); - + // register routes in registry before moving them source_solution.routes.iter().for_each(|route_ctx| { assert!(dest_solution.registry.use_route(route_ctx), "attempt to use route more than once"); }); + dest_solution.routes.extend(source_solution.routes); + dest_solution.ignored.extend(source_solution.ignored); + dest_solution.required.extend(source_solution.required); + dest_solution.locked.extend(source_solution.locked); + dest_solution.unassigned.extend(source_solution.unassigned); + accumulated } + +/// A small population implementation used only by `DecomposeSearch`. +/// +/// It preserves the original (baseline) individual so we can compare and (optionally) reuse it +/// later without reconstructing/deep-copying it again from the original full solution. +/// +/// Additionally, when there is no improvement, it keeps the last non-improving candidate which +/// can be used to build a more diverse combined solution. +struct DecomposePopulation { + objective: Arc, + selection_size: usize, + + baseline: InsertionContext, + best: Option, + last_non_improving: Option, +} + +impl DecomposePopulation { + fn new(objective: Arc, selection_size: usize, baseline: InsertionContext) -> Self { + Self { objective, selection_size, baseline, best: None, last_non_improving: None } + } + + fn best_ref(&self) -> &InsertionContext { + self.best.as_ref().unwrap_or(&self.baseline) + } +} + +impl HeuristicPopulation for DecomposePopulation { + type Objective = GoalContext; + type Individual = InsertionContext; + + fn add_all(&mut self, individuals: Vec) -> bool { + if individuals.is_empty() { + return false; + } + + individuals.into_iter().fold(false, |acc, individual| acc || self.add(individual)) + } + + fn add(&mut self, individual: Self::Individual) -> bool { + // Greedy update: replace best only when a strictly better solution is found. + if self.objective.total_order(self.best_ref(), &individual) == Ordering::Greater { + self.best = Some(individual); + // Once we found an improvement, we don't need to keep non-improving candidates. + self.last_non_improving = None; + true + } else { + // Keep the last non-improving candidate for diversity (used only when no improvements happen). + if self.best.is_none() { + self.last_non_improving = Some(individual); + } + false + } + } + + fn on_generation(&mut self, _: &HeuristicStatistics) {} + + fn cmp(&self, a: &Self::Individual, b: &Self::Individual) -> Ordering { + self.objective.total_order(a, b) + } + + fn select(&self) -> Box + '_> { + Box::new(std::iter::repeat_n(self.best_ref(), self.selection_size)) + } + + fn ranked(&self) -> Box + '_> { + // Not used by `DecomposeSearch`, but provide a deterministic iteration order. + if let Some(best) = self.best.as_ref() { + Box::new(once(best).chain(once(&self.baseline))) + } else { + Box::new(once(&self.baseline)) + } + } + + fn iter(&self) -> Box + '_> { + self.ranked() + } + + fn into_iter(self: Box) -> Box> + where + Self::Individual: 'static, + { + // Contract used by `get_solution_parts`: + // - Always yield baseline first. + // - Then yield either best (if any) or last non-improving (if any). + Box::new(once(self.baseline).chain(self.best.or(self.last_non_improving).into_iter())) + } + + fn size(&self) -> usize { + 1 + usize::from(self.best.is_some() || self.last_non_improving.is_some()) + } + + fn selection_phase(&self) -> SelectionPhase { + SelectionPhase::Exploitation + } +} diff --git a/vrp-core/src/solver/search/local/exchange_swap_star.rs b/vrp-core/src/solver/search/local/exchange_swap_star.rs index 683f7e681..aeca49d46 100644 --- a/vrp-core/src/solver/search/local/exchange_swap_star.rs +++ b/vrp-core/src/solver/search/local/exchange_swap_star.rs @@ -24,17 +24,12 @@ use std::iter::once; pub struct ExchangeSwapStar { leg_selection: LegSelection, result_selector: Box, - quota_limit: usize, } impl ExchangeSwapStar { /// Creates a new instance of `ExchangeSwapStar`. - pub fn new(random: Arc, quota_limit: usize) -> Self { - Self { - leg_selection: LegSelection::Stochastic(random), - result_selector: Box::::default(), - quota_limit, - } + pub fn new(random: Arc) -> Self { + Self { leg_selection: LegSelection::Stochastic(random), result_selector: Box::::default() } } } @@ -50,7 +45,8 @@ impl LocalOperator for ExchangeSwapStar { let route_pairs = create_route_pairs(insertion_ctx, ROUTE_PAIRS_THRESHOLD); // modify environment to include median as an extra quota to prevent long runs - let limit = refinement_ctx.statistics().speed.get_median().map(|median| median.max(self.quota_limit)); + let limit = + refinement_ctx.statistics().speed.get_median().map(|median| ((median.max(10) as f64) * 1.5) as usize); let mut insertion_ctx = InsertionContext { environment: create_environment_with_custom_quota(limit, insertion_ctx.environment.as_ref()), ..insertion_ctx.deep_copy() diff --git a/vrp-core/tests/unit/solver/search/decompose_search_test.rs b/vrp-core/tests/unit/solver/search/decompose_search_test.rs index c1c351085..b9826f0aa 100644 --- a/vrp-core/tests/unit/solver/search/decompose_search_test.rs +++ b/vrp-core/tests/unit/solver/search/decompose_search_test.rs @@ -54,7 +54,7 @@ fn can_perform_search() { let refinement_ctx = RefinementContext::new(problem.clone(), population, TelemetryMode::None, environment.clone()); let insertion_ctx = InsertionContext::new_from_solution(problem.clone(), (solution, None), environment.clone()); let inner_search = create_default_heuristic_operator(problem, environment); - let decompose_search = DecomposeSearch::new(inner_search, (2, 2), 10, 1000); + let decompose_search = DecomposeSearch::new(inner_search, (2, 2), 10); let result = decompose_search.search(&refinement_ctx, &insertion_ctx); diff --git a/vrp-core/tests/unit/solver/search/local/exchange_swap_star_test.rs b/vrp-core/tests/unit/solver/search/local/exchange_swap_star_test.rs index 76c65a8b5..b950b805a 100644 --- a/vrp-core/tests/unit/solver/search/local/exchange_swap_star_test.rs +++ b/vrp-core/tests/unit/solver/search/local/exchange_swap_star_test.rs @@ -81,7 +81,7 @@ fn can_use_exchange_swap_star_impl(jobs_order: Vec>, expected: Vec>(); assert_eq!(vehicles, vec!["0", "1", "2"]); - let insertion_ctx = ExchangeSwapStar::new(environment.random.clone(), 10000) + let insertion_ctx = ExchangeSwapStar::new(environment.random.clone()) .explore(&create_default_refinement_ctx(insertion_ctx.problem.clone()), &insertion_ctx) .expect("cannot find new solution"); @@ -101,7 +101,7 @@ fn can_keep_locked_jobs_in_place() { ); rearrange_jobs_in_routes(&mut insertion_ctx, jobs_order.as_slice()); - let insertion_ctx = ExchangeSwapStar::new(environment.random.clone(), 1000) + let insertion_ctx = ExchangeSwapStar::new(environment.random.clone()) .explore(&create_default_refinement_ctx(insertion_ctx.problem.clone()), &insertion_ctx) .expect("cannot find new solution"); From 86c9cac4725643dc6a0da09f213350ee9d50c599 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Wed, 14 Jan 2026 23:45:44 +0100 Subject: [PATCH 27/30] Improve dynamic heuristic --- rosomaxa/src/algorithms/rl/slot_machine.rs | 24 +- rosomaxa/src/hyper/dynamic_selective.rs | 405 +++++++----------- .../unit/hyper/dynamic_selective_test.rs | 74 +--- 3 files changed, 168 insertions(+), 335 deletions(-) diff --git a/rosomaxa/src/algorithms/rl/slot_machine.rs b/rosomaxa/src/algorithms/rl/slot_machine.rs index 32f20aa96..55d9402bb 100644 --- a/rosomaxa/src/algorithms/rl/slot_machine.rs +++ b/rosomaxa/src/algorithms/rl/slot_machine.rs @@ -51,9 +51,6 @@ where S: DistributionSampler + Clone, { /// Creates a new instance with Universal Priors. - /// - /// Since the `SearchAgent` externally normalizes rewards to standard scores (Z-scores), - /// we can use universal constants for the priors rather than problem-specific guesses. pub fn new(prior_mean: Float, action: A, sampler: S) -> Self { // Universal priors for a Standard Normal distribution N(0, 1): // Alpha = 2.0 implies a weak prior belief with mathematically defined variance. @@ -61,8 +58,8 @@ where let alpha = 2.0; let beta = 1.0; - // Prior mean is clamped to a reasonable Z-score range (-0 to 10) - let mu = prior_mean.clamp(0.01, 10.0); + // Prior mean is clamped to a reasonable reward range [0.1, 2.0]. + let mu = prior_mean.clamp(0.1, 2.0); // Variance expectation v = Beta / (Alpha - 1) = 1.0. // This implies we are "uncertain" by about +/- 1.0 standard deviation unit, @@ -102,16 +99,15 @@ where pub fn update(&mut self, feedback: &A::Feedback) { let reward = feedback.reward(); - // --- 1. Memory Decay (Non-Stationarity) --- + // 1. Memory Decay (Non-Stationarity) - // A decay factor of 0.999 implies a "memory horizon" of ~1000 samples. - // This is crucial for VRP where improvements are rare (sparse signals). - // It provides enough patience to wait for "lottery ticket" wins while still - // allowing the agent to abandon operators that stop working in later phases. - const DECAY_FACTOR: Float = 0.999; + // A decay factor of 0.995 implies a "memory horizon" of ~200 samples. + // This faster forgetting prevents operator monopolies and allows the agent + // to adapt more quickly when search dynamics change between phases. + const DECAY_FACTOR: Float = 0.995; // Decay the sufficient statistics. - // CRITICAL: We clamp alpha to >= 2.0. The variance of the Inverse-Gamma + // We clamp alpha to >= 2.0. The variance of the Inverse-Gamma // distribution is defined as Beta / (Alpha - 1). If Alpha <= 1, variance is undefined. // Keeping Alpha >= 2.0 ensures numerical stability and prevents division by zero. self.alpha = (self.alpha * DECAY_FACTOR).max(2.0); @@ -121,7 +117,7 @@ where // We do not decay this value so we can track total lifetime usage. self.n += 1; - // --- 2. Bayesian Update (Normal-Gamma) --- + // 2. Bayesian Update (Normal-Gamma) // Standard update adds 0.5 to Alpha for each new observation n=1. self.alpha += 0.5; @@ -144,7 +140,7 @@ where // contribution to the variance relative to prior knowledge. self.beta += 0.5 * (reward - old_mu).powi(2) * effective_n / (effective_n + 1.0); - // --- 3. Variance Estimation --- + // 3. Variance Estimation // Calculate expected variance E[σ²] = Beta / (Alpha - 1). // Since we enforced Alpha >= 2.0 (decayed) + 0.5 (update) = 2.5, diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs index 0ebe11296..a408d85af 100644 --- a/rosomaxa/src/hyper/dynamic_selective.rs +++ b/rosomaxa/src/hyper/dynamic_selective.rs @@ -6,7 +6,7 @@ use super::*; use crate::Timer; use crate::algorithms::math::RemedianUsize; use crate::algorithms::rl::{SlotAction, SlotFeedback, SlotMachine}; -use crate::utils::{DefaultDistributionSampler, select_softmax_index}; +use crate::utils::{DefaultDistributionSampler, random_argmax}; use std::cmp::Ordering; use std::collections::HashMap; use std::fmt::Formatter; @@ -105,72 +105,28 @@ where /// Type alias for slot machines used in Thompson sampling. pub type SlotMachines<'a, C, O, S> = Vec<(SlotMachine, DefaultDistributionSampler>, String)>; -/// Centralized "Physics" for the Reward System. -/// -/// This struct defines the constants that control how rewards are calculated, -/// scaled, and clamped. Changing values here automatically propagates to -/// reward estimation and signal normalization logic. -struct SearchRewards; - -impl SearchRewards { - /// The base value for a standard improvement (Local Search success). - /// Used as the atomic unit for signal normalization. - pub const BASE_REWARD: Float = 2.0; - - /// The offset added to relative distance to ensure positive rewards. - /// Logic: `(distance + OFFSET)`. - pub const DISTANCE_OFFSET: Float = 1.0; - - /// Multiplier applied when a solution improves the Global Best Known. - /// This is the "Jackpot" factor. - pub const GLOBAL_BEST_MULTIPLIER: Float = 2.0; - - /// Multiplier applied when a solution is diverse but not a global improvement. - /// Keeps "Diverse" operators alive but with low signal. - pub const DIVERSE_MULTIPLIER: Float = 0.01; - - /// The maximum percentage (+/-) that execution duration can affect the reward. - /// e.g., 0.2 means performance can scale reward by [0.8, 1.2]. - pub const PERF_TOLERANCE: Float = 0.2; - - /// Softmax/Boltzmann policy temperature range (over Thompson samples). - /// Higher temperature => more exploration. - pub const SOFTMAX_TEMPERATURE_MIN: Float = 0.3; - pub const SOFTMAX_TEMPERATURE_MAX: Float = 2.0; - - /// The "Cheap Failure" - /// The penalty applied when an operator produces no improvement. - /// A small negative value ensures that "doing nothing" is worse than "finding diverse solutions". - /// This forces the Slot Machine to eventually lower the confidence of stagnating operators. - pub const PENALTY_MIN: Float = -0.5; - - /// The "Expensive Failure" - /// The penalty applied when an operator produces a negative outcome. - /// A larger negative value ensures that failures are strongly discouraged. - pub const PENALTY_MAX: Float = -2.0; - - /// Calculates the ratio between the "Jackpot" and the "Normal" signal. - /// Used by the Agent to determine how many times larger than the average - /// a signal must be to be considered an "Outlier" that needs clamping. - pub fn signal_clamp_ratio(max_dist: Float) -> Float { - // Recalculate based on new constants - let base_term = max_dist + Self::DISTANCE_OFFSET; - let global_term = base_term * Self::GLOBAL_BEST_MULTIPLIER; - let max_theoretical = (base_term + global_term) * (1.0 + Self::PERF_TOLERANCE); - - let typical_reward = (1.0 + Self::DISTANCE_OFFSET) * Self::BASE_REWARD; - - max_theoretical / typical_reward - } +/// Base reward for finding a new global best solution. +/// This is the "jackpot" that operators compete for. +const JACKPOT_BASE: Float = 2.0; - /// Derives a temperature based on the search progress. - /// When the search plateaus, we increase exploration. - pub fn softmax_temperature(improvement_1000_ratio: Float) -> Float { - // In other places we treat 10% improvement as "fast flow". - let flow = (improvement_1000_ratio * 10.0).clamp(0.0, 1.0); - Self::SOFTMAX_TEMPERATURE_MIN + (1.0 - flow) * (Self::SOFTMAX_TEMPERATURE_MAX - Self::SOFTMAX_TEMPERATURE_MIN) - } -} +/// Maximum reward for diverse improvements (soft ceiling via tanh saturation). +/// Must be well below JACKPOT_BASE to ensure exploitation of best-finding operators. +/// At 1.0, jackpots are guaranteed to be at least 2x more valuable than any diverse improvement. +const DIVERSE_CAP: Float = 1.0; + +/// Minimum reward for any improvement (prevents near-zero rewards). +/// Set high enough to be a meaningful positive signal. +const MIN_REWARD: Float = 0.1; + +/// Penalty scale for failures (multiplied by time ratio). +/// A failure that takes median time costs -0.1; twice median costs -0.2. +const PENALTY_SCALE: Float = 0.1; + +/// Floor for rewards (maximum penalty). +const REWARD_MIN: Float = -1.0; + +/// Ceiling for rewards (maximum jackpot + efficiency bonus). +const REWARD_MAX: Float = 10.0; /// Search state for Thompson sampling. #[derive(PartialEq, Eq, Hash, Clone, Debug)] @@ -230,29 +186,9 @@ where let duration = duration.as_millis() as usize; - // 1. Analyze result. - let distance_score = estimate_distance_reward(context.heuristic_ctx, context.solution, &new_solution); - - // 2. Calculate final reward / penalty. - let reward = match distance_score { - // Success: Apply performance multiplier (bonus/penalty within +/- 20%). - Some(base_score) => { - let mult = estimate_reward_perf_multiplier(&context, duration); - base_score * mult - } - // Failure: Apply time-weighted penalty. - None => { - // Get median (defensive default to duration itself to avoid div/0). - let median = context.approx_median.unwrap_or(duration).max(1) as Float; - let ratio = duration as Float / median; - - // Base penalty unit: PENALTY_MIN (-0.01) per "Median Unit of Time". - let raw_penalty = SearchRewards::PENALTY_MIN * ratio; - - // Clamp to the range [PENALTY_MAX, PENALTY_MIN] = [-0.1, -0.01]. - raw_penalty.clamp(SearchRewards::PENALTY_MAX, SearchRewards::PENALTY_MIN) - } - }; + // Compute reward using the simplified V2.1 formula. + let reward = + compute_reward(context.heuristic_ctx, context.solution, &new_solution, duration, context.approx_median); let is_new_best = compare_to_best(context.heuristic_ctx, &new_solution) == Ordering::Less; let to = if is_new_best { SearchState::BestKnown } else { SearchState::Diverse }; @@ -279,11 +215,11 @@ where } struct SearchAgent<'a, C, O, S> { - // Separate learning contexts for different search phases + /// Separate learning contexts for different search phases (BestKnown vs Diverse). slot_machines: HashMap>, - // Shared scale invariance (universal physics) - signal_stats: SignalStats, + /// Tracks operator durations for median calculation. tracker: HeuristicTracker, + /// Random number generator for Thompson sampling selection. random: Arc, } @@ -294,24 +230,28 @@ where S: HeuristicSolution + 'a, { pub fn new(search_operators: HeuristicSearchOperators, environment: &Environment) -> Self { - // Calculate the Mean Weight - // We want the "Average Operator" to have a prior mu = 1.0. - // This aligns with SignalStats which normalizes the average reward to 1.0. + // Normalize weights so the average operator has prior_mean ≈ 1.0. + // This aligns with typical success rewards (~1-3 range). let total_weight: Float = search_operators.iter().map(|(_, _, w)| *w).sum(); let count = search_operators.len() as Float; + let avg_weight = if count > 0.0 && total_weight > f64::EPSILON { total_weight / count } else { 1.0 }; - let avg_weight = if count > 0.0 { total_weight / count } else { 1.0 }; - - // Avoid division by zero if weights are weird - let target_mean = SearchRewards::BASE_REWARD; - let scale = if avg_weight > f64::EPSILON { target_mean / avg_weight } else { target_mean }; - - // Factory function to create identical slot configurations for each state + // Factory function to create slot configurations for each state. + // Uses domain knowledge (initial weights) as priors - important because: + // 1. We have many operators (cold start problem) + // 2. Limited search time may not be enough to learn from scratch + // 3. Weights encode expert knowledge about operator effectiveness let create_slots = || { search_operators .iter() .map(|(operator, name, initial_weight)| { - let prior_mean = initial_weight * scale; + // Smooth mapping of weight ratio to prior mean range [0.1, 3.0]. + // Uses tanh for smooth compression without hard cutoffs. + // ratio=1 (average) → prior=1.0, higher ratios → up to 3.0, lower → down to 0.1 + let ratio = initial_weight / avg_weight; + let t = (ratio - 1.0).tanh(); // smooth compression to [-1, 1] + // Asymmetric scaling: [−1,0] → [0.1,1.0], [0,1] → [1.0,3.0] + let prior_mean = if t >= 0.0 { 1.0 + t * 2.0 } else { 1.0 + t * 0.9 }; ( SlotMachine::new( prior_mean, @@ -324,20 +264,19 @@ where .collect::>() }; - // Initialize separate states with identical priors but independent learning + // Initialize separate states with identical priors but independent learning. let slot_machines = once((SearchState::BestKnown, create_slots())) .chain(once((SearchState::Diverse, create_slots()))) .collect(); Self { slot_machines, - signal_stats: SignalStats::new(), tracker: HeuristicTracker::new(environment.is_experimental), random: environment.random.clone(), } } - /// Picks the relevant search operator based on learnings and runs the search. + /// Picks the relevant search operator using pure Thompson Sampling and runs the search. pub fn search(&self, heuristic_ctx: &C, solution: &S) -> SearchFeedback { // Determine search context - critical for operator selection. let from = if matches!(compare_to_best(heuristic_ctx, solution), Ordering::Equal) { @@ -348,9 +287,10 @@ where // Get contextually appropriate slot machines. let slots = self.slot_machines.get(&from).expect("cannot get slot machines"); - let temperature = SearchRewards::softmax_temperature(heuristic_ctx.statistics().improvement_1000_ratio); + + // Sample each arm, pick argmax with random tie-break. let samples = slots.iter().map(|(slot, _)| slot.sample()).collect::>(); - let slot_idx = select_softmax_index(&samples, temperature, self.random.as_ref()); + let slot_idx = random_argmax(samples.into_iter(), self.random.as_ref()).unwrap_or(0); let slot_machine = &slots[slot_idx].0; let approx_median = self.tracker.approx_median(); @@ -359,35 +299,13 @@ where slot_machine.play(SearchContext { heuristic_ctx, from, slot_idx, solution, approx_median }) } - /// Updates estimations based on search feedback with protected signal baseline. + /// Updates the slot machine with the raw reward (no normalization needed). pub fn update(&mut self, generation: usize, feedback: &SearchFeedback) { - let max_dist = feedback.solution.as_ref().map_or(1., |s| s.fitness().count() as Float); - let raw_reward = feedback.sample.reward; - let current_scale = self.signal_stats.scale(); - - // 1. Update ruler. - // We only update the "Scale of Success" based on positive outcomes. - // We do NOT shrink the ruler just because we failed. - if raw_reward > f64::EPSILON { - let clamp_limit = current_scale * SearchRewards::signal_clamp_ratio(max_dist); - self.signal_stats.update(raw_reward.min(clamp_limit)); - } - - // 2. Normalize. - let normalized_reward = - if raw_reward.abs() > f64::EPSILON { raw_reward / self.signal_stats.scale() } else { 0.0 }; - let normalized_feedback = SearchFeedback { - sample: SearchSample { reward: normalized_reward, ..feedback.sample.clone() }, - slot_idx: feedback.slot_idx, - solution: None, - }; - - // 3. Update contextual slot machine. let from = &feedback.sample.transition.0; let slots = self.slot_machines.get_mut(from).expect("cannot get slot machines"); - slots[feedback.slot_idx].0.update(&normalized_feedback); + slots[feedback.slot_idx].0.update(feedback); - // 4. Observe telemetry. + // Track telemetry. self.tracker.observe_sample(generation, feedback.sample.clone()); } @@ -409,105 +327,106 @@ where } } -fn compare_to_best(heuristic_ctx: &C, solution: &S) -> Ordering +/// Computes the reward for an operator based on solution improvement. +/// +/// Key design principles: +/// 1. **Best-Known Anchoring**: Improvements far from best get diminished credit. +/// 2. **Stagnation-Aware Efficiency**: Slow operators tolerated during stagnation. +/// 3. **Bounded Output**: Rewards in [-1, 5] for stable Bayesian updates. +fn compute_reward( + heuristic_ctx: &C, + initial_solution: &S, + new_solution: &S, + duration: usize, + approx_median: Option, +) -> Float where C: HeuristicContext, O: HeuristicObjective, S: HeuristicSolution, { - heuristic_ctx - .ranked() - .next() - .map(|best_known| heuristic_ctx.objective().total_order(solution, best_known)) - .unwrap_or(Ordering::Less) -} + let objective = heuristic_ctx.objective(); -/// Estimates new solution discovery reward based on distance metric. -/// Returns `Some(reward)` for improvement, or `None` for stagnation. -fn estimate_distance_reward(heuristic_ctx: &C, initial_solution: &S, new_solution: &S) -> Option -where - C: HeuristicContext, - O: HeuristicObjective, - S: HeuristicSolution, -{ - heuristic_ctx - .ranked() - .next() - .map(|best_known| { - let objective = heuristic_ctx.objective(); - - // Calculate normalized relative distances [0, 1]. - let distance_initial = get_relative_distance(objective, new_solution, initial_solution); - let distance_best = get_relative_distance(objective, new_solution, best_known); - - let reward_initial = (distance_initial + SearchRewards::DISTANCE_OFFSET) * SearchRewards::BASE_REWARD; - - match (distance_initial.total_cmp(&0.), distance_best.total_cmp(&0.)) { - // Global Jackpot - (Ordering::Greater, Ordering::Greater) => { - let reward_best = (distance_best + SearchRewards::DISTANCE_OFFSET) - * SearchRewards::BASE_REWARD - * SearchRewards::GLOBAL_BEST_MULTIPLIER; - Some(reward_initial + reward_best) - } + // Get best known solution for anchoring. + let best_known = match heuristic_ctx.ranked().next() { + Some(best) => best, + None => return 0.0, // No population yet, neutral reward. + }; - // Local/Diverse Improvement - (Ordering::Greater, _) => Some(reward_initial * SearchRewards::DIVERSE_MULTIPLIER), + // Determine improvement types. + let is_new_best = objective.total_order(new_solution, best_known) == Ordering::Less; + let is_improvement = objective.total_order(new_solution, initial_solution) == Ordering::Less; + + // COMPUTE REWARD BASED ON IMPROVEMENT TYPE + let raw_reward = if is_new_best { + // JACKPOT: Found new global best! + // Apply magnitude scaling so small improvements become meaningful signals. + // Raw distance is often tiny (0.001 for 0.1% improvement). + // ln_1p(x * 1000) transforms: 0.001 -> ~0.69, 0.01 -> ~2.4, 0.1 -> ~4.6 + let improvement_distance = get_relative_distance(new_solution, initial_solution); + let magnitude = (improvement_distance * 1000.0).ln_1p(); + JACKPOT_BASE + magnitude + } else if is_improvement { + // DIVERSE IMPROVEMENT: Better than starting point, but not global best. + // Use tanh for soft saturation - large improvements asymptote to DIVERSE_CAP, + // ensuring diverse rewards stay well below JACKPOT_BASE. + let improvement_distance = get_relative_distance(new_solution, initial_solution); + let magnitude = (improvement_distance * 1000.0).ln_1p(); + let saturated = magnitude.tanh(); + + // Proximity to best: closer solutions get higher reward. + let gap_to_best = get_relative_distance(new_solution, best_known); + let proximity_factor = (1.0 - gap_to_best).powi(2); + + // Base utility: soft-capped and bounded [MIN_REWARD, DIVERSE_CAP]. + let base_utility = (DIVERSE_CAP * saturated * proximity_factor).clamp(MIN_REWARD, DIVERSE_CAP); + + // Apply efficiency modulation: fast improvements get bonus, slow ones get penalty. + let median = approx_median.unwrap_or(duration.max(1)).max(1) as Float; + let improvement_ratio = heuristic_ctx.statistics().improvement_1000_ratio; + + // "Flow" measures search progress: 0 = stagnating, 1 = fast progress. + let flow = (improvement_ratio * 10.0).clamp(0.0, 1.0); + + // Efficiency clamp range adapts to search phase (reduced impact): + // - Stagnation (flow=0): [0.9, 1.1] - 10% penalty to 10% bonus + // - Fast progress (flow=1): [0.8, 1.2] - 20% penalty to 20% bonus + let min_eff = 0.9 - flow * 0.1; + let max_eff = 1.1 + flow * 0.1; + + let efficiency = (median / duration as Float).clamp(min_eff, max_eff); + + base_utility * efficiency + } else { + // FAILURE: time-proportional penalty. + let median = approx_median.unwrap_or(duration.max(1)).max(1) as Float; + let time_ratio = duration as Float / median; + -PENALTY_SCALE * time_ratio + }; - // Stagnation - _ => None, - } - }) - .unwrap_or(None) + // Clamp to bounded range for stable Bayesian updates. + raw_reward.clamp(REWARD_MIN, REWARD_MAX) } -/// Estimates performance of the used operation based on its duration and the current search phase. -/// Returns a reward multiplier in the range `[0.8, 1.2]`. -fn estimate_reward_perf_multiplier(search_ctx: &SearchContext, duration: usize) -> Float +fn compare_to_best(heuristic_ctx: &C, solution: &S) -> Ordering where C: HeuristicContext, O: HeuristicObjective, S: HeuristicSolution, { - let improvement_ratio = search_ctx.heuristic_ctx.statistics().improvement_1000_ratio; - let approx_median = &search_ctx.approx_median; - let median = match approx_median { - Some(m) if *m > 0 => *m as Float, - _ => return 1.0, - }; - - let time_ratio = duration as Float / median; - - // Calculate the raw time modifier (logarithmic). - let raw_modifier = (1.0 / time_ratio).ln() * 0.15; - - // Apply phase-dependent damping. - // Smooth transition from 0.0 to 1.0 based on improvement ratio. - // We saturate at 0.1 (10% improvement is considered "Fast Flow"). - let phase_damping = (improvement_ratio * 10.0).clamp(0.0, 1.0); - let final_modifier = raw_modifier * phase_damping; - - // Final safety clamp defined by reward physics. - let tolerance = SearchRewards::PERF_TOLERANCE; - (1.0 + final_modifier).clamp(1.0 - tolerance, 1.0 + tolerance) + heuristic_ctx + .ranked() + .next() + .map(|best_known| heuristic_ctx.objective().total_order(solution, best_known)) + .unwrap_or(Ordering::Less) } -/// Returns the normalized distance in `[0.0, 1.0]` (absolute magnitude) -/// where 1.0 = Improvement on Primary Objective. -/// Returns a negative value if worse. -fn get_relative_distance(objective: &O, a: &S, b: &S) -> Float +/// Returns the normalized distance in `[0.0, 1.0]`. +fn get_relative_distance(a: &S, b: &S) -> Float where - O: HeuristicObjective, S: HeuristicSolution, { - let order = objective.total_order(a, b); - - let sign = match order { - Ordering::Less => 1., - Ordering::Greater => -1., - Ordering::Equal => return 0., - }; - + // Find the first differing fitness component. let idx = a .fitness() .zip(b.fitness()) @@ -515,69 +434,31 @@ where .find(|(_, (fitness_a, fitness_b))| fitness_a != fitness_b) .map(|(idx, _)| idx); - let idx = if let Some(idx) = idx { - idx - } else { - return 0.; + let idx = match idx { + Some(idx) => idx, + None => return 0., // All fitness values equal. }; let total_objectives = a.fitness().count(); - assert_ne!(total_objectives, 0, "cannot have an empty objective here"); - assert_ne!(total_objectives, idx, "cannot have the index equal to total amount of objectives"); + if total_objectives == 0 || total_objectives == idx { + return 0.; + } - // Normalization: Divide by total_objectives to map to [0, 1]. + // Priority amplifier: earlier objectives matter more. let priority_amplifier = (total_objectives - idx) as Float / total_objectives as Float; + // Relative difference in the differing component. let value = a .fitness() .nth(idx) .zip(b.fitness().nth(idx)) - .map(|(a, b)| (a - b).abs() / a.abs().max(b.abs())) - .expect("cannot get fitness by idx"); - - value * sign * priority_amplifier -} + .map(|(a, b)| (a - b).abs() / a.abs().max(b.abs()).max(f64::EPSILON)) + .unwrap_or(0.0); -/// Signal tracker that observes ONLY positive values to establish a baseline for "Success". -/// Uses exponential moving average for stability in sparse signals. -#[derive(Clone)] -struct SignalStats { - mean: Float, - n: Float, -} - -impl SignalStats { - fn new() -> Self { - Self { mean: 0.0, n: 0.0 } - } - - /// Observes ONLY positive values to establish a baseline for "Success". - fn update(&mut self, value: Float) { - if value <= f64::EPSILON { - return; - } - - // Horizon: Adapt to the scale of the last ~200 successful operations. - // This is structural (adaptation speed), not problem-specific. - let window_size = 200.0; - let decay = 1.0 - (1.0 / window_size); - - self.n = self.n * decay + 1.0; - - // Exponential moving average of the magnitude. - // We use this instead of Welford's variance for stability in sparse signals. - let learning_rate = 1.0 / self.n; - self.mean = self.mean * (1.0 - learning_rate) + value * learning_rate; - } - - /// Returns the scale factor. - /// If we haven't seen enough data, return 1.0 to avoid division by zero. - fn scale(&self) -> Float { - if self.mean < f64::EPSILON { 1.0 } else { self.mean } - } + value * priority_amplifier } -/// Enhanced diagnostic tracker for Thompson sampling analysis. +/// Diagnostic tracker for Thompson sampling analysis. struct HeuristicTracker { total_median: RemedianUsize, search_telemetry: Vec<(usize, SearchSample)>, diff --git a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs index 4d0449243..43298f42d 100644 --- a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs +++ b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs @@ -56,58 +56,24 @@ fn can_estimate_median() { assert!(median > 0); } -parameterized_test! {can_estimate_reward_multiplier, (improvement_ratio, approx_median, duration, expected), { - can_estimate_reward_multiplier_impl(improvement_ratio, approx_median, duration, expected); +parameterized_test! {can_compute_relative_distance, (fitness_a, fitness_b, expected), { + can_compute_relative_distance_impl(fitness_a, fitness_b, expected); }} -can_estimate_reward_multiplier! { - case_01_fast_with_improvement: (0.1, Some(10), 5, 1.104), // Fast operator during improvement: ln(2.0)*0.15*1.0 ≈ 0.104 - case_02_slow_with_improvement: (0.1, Some(10), 20, 0.896), // Slow operator during improvement: ln(0.5)*0.15*1.0 ≈ -0.104 - case_03_fast_partial_improvement: (0.05, Some(10), 5, 1.052), // Fast with 50% damping: ln(2.0)*0.15*0.5 ≈ 0.052 - case_04_no_improvement: (0.0, Some(10), 5, 1.0), // No improvement = no bonus - case_05_no_median: (0.1, None, 5, 1.0), // No median = baseline - case_06_clamped_fast: (0.1, Some(10), 1, 1.2), // Very fast but clamped to PERF_TOLERANCE (0.2) - case_07_clamped_slow: (0.1, Some(10), 100, 0.8), // Very slow but clamped to -PERF_TOLERANCE (-0.2) +can_compute_relative_distance! { + case_01_improvement: (vec![90.0], vec![100.0], 0.1), // 10% distance: |100-90|/100 = 0.1 + case_02_regression: (vec![110.0], vec![100.0], 0.09), // 9% distance: |110-100|/110 ≈ 0.09 + case_03_equal: (vec![100.0], vec![100.0], 0.0), // Equal = no distance + case_04_primary_priority: (vec![90.0, 100.0], vec![100.0, 90.0], 0.1), // Primary objective distance } -fn can_estimate_reward_multiplier_impl( - improvement_ratio: Float, - approx_median: Option, - duration: usize, - expected: Float, -) { - // Create a mock context with the specified improvement ratio - let mut heuristic_ctx = create_default_heuristic_context(); - - // Simulate improvement ratio by triggering generations - if improvement_ratio > 0.0 { - let num_improvements = (improvement_ratio * 1000.0) as usize; - - // Add improving solutions - for i in 0..num_improvements { - let solution = VectorSolution::new(vec![], -(i as Float), vec![]); - heuristic_ctx.on_generation(vec![solution], 0.0, Timer::start()); - } - - // Add non-improving solutions - for _ in num_improvements..1000 { - let solution = VectorSolution::new(vec![], 100.0, vec![]); - heuristic_ctx.on_generation(vec![solution], 0.0, Timer::start()); - } - } - - let solution = VectorSolution::new(vec![], 0., vec![]); - let search_ctx = SearchContext { - heuristic_ctx: &heuristic_ctx, - from: SearchState::BestKnown, - slot_idx: 0, - solution: &solution, - approx_median, - }; +fn can_compute_relative_distance_impl(fitness_a: Vec, fitness_b: Vec, expected: Float) { + let solution_a = VectorSolution::new(vec![], fitness_a.first().copied().unwrap_or(0.0), fitness_a); + let solution_b = VectorSolution::new(vec![], fitness_b.first().copied().unwrap_or(0.0), fitness_b); - let result = estimate_reward_perf_multiplier(&search_ctx, duration); + let result = get_relative_distance(&solution_a, &solution_b); - assert!((result - expected).abs() < 0.001, "Expected {expected}, got {result}"); + assert!((result - expected).abs() < 0.02, "Expected ~{expected}, got {result}"); } #[test] @@ -132,18 +98,8 @@ fn can_display_heuristic_info() { } #[test] -fn can_handle_when_objective_lies() { - struct LiarObjective; - - impl HeuristicObjective for LiarObjective { - type Solution = TestData; - - fn total_order(&self, _: &Self::Solution, _: &Self::Solution) -> Ordering { - // that is where it lies based on some non-fitness related factors for total order - Ordering::Greater - } - } - +fn can_handle_equal_fitness_solutions() { + // Test that solutions with identical fitness return 0 distance. struct TestData; impl HeuristicSolution for TestData { @@ -157,7 +113,7 @@ fn can_handle_when_objective_lies() { } } - let distance = get_relative_distance(&LiarObjective, &TestData, &TestData); + let distance = get_relative_distance(&TestData, &TestData); assert_eq!(distance, 0.) } From 29f7884d0deaa0a9af289361098cc0dbbb5f03f8 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Thu, 15 Jan 2026 00:02:31 +0100 Subject: [PATCH 28/30] Split decompose search --- vrp-core/src/solver/heuristic.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/vrp-core/src/solver/heuristic.rs b/vrp-core/src/solver/heuristic.rs index a0d59e807..5b05513fa 100644 --- a/vrp-core/src/solver/heuristic.rs +++ b/vrp-core/src/solver/heuristic.rs @@ -578,8 +578,12 @@ mod dynamic { "local_swap_star".to_string(), 2., ), - // combined decompose search with different inner heuristics - (create_combined_decompose_search(problem, environment), "decompose_search".to_string(), 2.), + ( + create_variable_search_decompose_search(problem.clone(), environment.clone()), + "variable_decompose_search".to_string(), + 2., + ), + (create_composite_decompose_search(problem, environment), "composite_decompose_search".to_string(), 2.), ] } @@ -707,16 +711,6 @@ mod dynamic { 2, )) } - - fn create_combined_decompose_search(problem: Arc, environment: Arc) -> TargetSearchOperator { - Arc::new(WeightedHeuristicOperator::new( - vec![ - create_variable_search_decompose_search(problem.clone(), environment.clone()), - create_composite_decompose_search(problem, environment), - ], - vec![1, 1], - )) - } } fn get_recreate_with_alternative_goal( From 49e1878bbca315fedd407b5675832fbc9ef27558 Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Thu, 15 Jan 2026 00:03:50 +0100 Subject: [PATCH 29/30] Fix clippy warnings --- vrp-core/src/solver/search/decompose_search.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vrp-core/src/solver/search/decompose_search.rs b/vrp-core/src/solver/search/decompose_search.rs index 139e319d9..a2db2d9fd 100644 --- a/vrp-core/src/solver/search/decompose_search.rs +++ b/vrp-core/src/solver/search/decompose_search.rs @@ -81,7 +81,7 @@ impl DecomposeSearch { // get new and old parts and detect if there was any improvement in any part let ((new_parts, old_parts), improvements): ((Vec<_>, Vec<_>), Vec<_>) = - decomposed.into_iter().map(|decomposed| get_solution_parts(decomposed)).unzip(); + decomposed.into_iter().map(get_solution_parts).unzip(); let has_improvements = improvements.iter().any(|is_improvement| *is_improvement); @@ -348,7 +348,7 @@ impl HeuristicPopulation for DecomposePopulation { return false; } - individuals.into_iter().fold(false, |acc, individual| acc || self.add(individual)) + individuals.into_iter().any(|individual| self.add(individual)) } fn add(&mut self, individual: Self::Individual) -> bool { @@ -397,7 +397,7 @@ impl HeuristicPopulation for DecomposePopulation { // Contract used by `get_solution_parts`: // - Always yield baseline first. // - Then yield either best (if any) or last non-improving (if any). - Box::new(once(self.baseline).chain(self.best.or(self.last_non_improving).into_iter())) + Box::new(once(self.baseline).chain(self.best.or(self.last_non_improving))) } fn size(&self) -> usize { From 6b50a837be15be6cab491a733819d210574a649a Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Thu, 15 Jan 2026 00:25:35 +0100 Subject: [PATCH 30/30] Delete dead code --- rosomaxa/src/utils/random.rs | 46 ------------------------------------ 1 file changed, 46 deletions(-) diff --git a/rosomaxa/src/utils/random.rs b/rosomaxa/src/utils/random.rs index bb51da416..4bbeb1909 100644 --- a/rosomaxa/src/utils/random.rs +++ b/rosomaxa/src/utils/random.rs @@ -219,49 +219,3 @@ where }) .map(|(idx, _)| idx) } - -/// Selects an index from samples using softmax (Boltzmann) distribution with given temperature. -pub fn select_softmax_index(samples: &[Float], temperature: Float, random: &dyn Random) -> usize { - assert!(!samples.is_empty()); - - // If we ever see +inf, treat it as a hard-max tie among +inf arms. - if samples.iter().any(|&s| s.is_infinite() && s.is_sign_positive()) { - let idxs = samples - .iter() - .enumerate() - .filter(|(_, s)| s.is_infinite() && s.is_sign_positive()) - .map(|(idx, _)| idx) - .collect::>(); - let pick = random.uniform_int(0, (idxs.len() - 1) as i32) as usize; - return idxs[pick]; - } - - let temperature = temperature.max(f64::EPSILON); - let max_sample = samples.iter().copied().filter(|s| s.is_finite()).max_by(|a, b| a.total_cmp(b)).unwrap_or(0.0); - - // Stable softmax: w_i = exp((s_i - max_s) / T). Since (s_i - max_s) <= 0, - // weights are in (0, 1] and never overflow. - let mut weights = Vec::with_capacity(samples.len()); - let mut sum = 0.0; - for &s in samples { - let w = if s.is_finite() { ((s - max_sample) / temperature).exp() } else { 0.0 }; - sum += w; - weights.push(w); - } - - if sum <= f64::EPSILON { - // Degenerate case: fall back to argmax tie-break. - return random_argmax(samples.iter().copied(), random).unwrap_or(0); - } - - let mut r = random.uniform_real(0.0, sum); - for (idx, w) in weights.iter().enumerate() { - r -= *w; - if r <= 0.0 { - return idx; - } - } - - // Numerical edge: return last. - samples.len() - 1 -}