From a10307087db73879d41911f463f735613d6ffa54 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Mon, 15 Dec 2025 21:06:01 +0100
Subject: [PATCH 01/30] Add FUNDING.md

---
 .github/FUNDING.yml | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .github/FUNDING.yml

diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
new file mode 100644
index 000000000..4e35480b3
--- /dev/null
+++ b/.github/FUNDING.yml
@@ -0,0 +1 @@
+github: reinterpretcat
\ No newline at end of file

From ab5ae8adcfc1d99c7dd0f1413beb5e597af74772 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Thu, 25 Dec 2025 10:44:35 +0100
Subject: [PATCH 02/30] Add test to reproduce crash

---
 .../tests/features/breaks/required_break.rs   | 81 ++++++++++++++++++-
 1 file changed, 80 insertions(+), 1 deletion(-)

diff --git a/vrp-pragmatic/tests/features/breaks/required_break.rs b/vrp-pragmatic/tests/features/breaks/required_break.rs
index c6e31914f..d8e52d3a3 100644
--- a/vrp-pragmatic/tests/features/breaks/required_break.rs
+++ b/vrp-pragmatic/tests/features/breaks/required_break.rs
@@ -1,3 +1,4 @@
+use crate::format::Location;
 use crate::format::problem::*;
 use crate::format_time;
 use crate::helpers::*;
@@ -198,7 +199,6 @@ fn can_skip_break_if_it_is_after_start_before_end_range() {
     assert!(get_ids_from_tour(&solution.tours[0]).iter().flatten().all(|id| id != "break"));
 }
 
-// TODO check exact and offset use cases
 #[test]
 fn can_reschedule_break_early_from_transport_to_activity() {
     let is_open = true;
@@ -252,3 +252,82 @@ fn can_reschedule_break_early_from_transport_to_activity() {
             .build()
     );
 }
+
+#[test]
+fn can_handle_required_break_with_infeasible_sequence_relation() {
+    let create_test_job = |index: usize, duration: f64, times: (String, String)| Job {
+        services: Some(vec![JobTask {
+            places: vec![JobPlace {
+                location: Location::Reference { index },
+                duration,
+                times: Some(vec![vec![times.0, times.1]]),
+                tag: None,
+            }],
+            demand: None,
+            order: None,
+        }]),
+        ..create_job(index.to_string().as_str())
+    };
+
+    let problem = Problem {
+        plan: Plan {
+            jobs: vec![
+                create_test_job(0, 10800., (format_time(0.), format_time(86399.))),
+                create_test_job(1, 3600., (format_time(81000.), format_time(81000.))),
+                create_test_job(2, 1800., (format_time(86400. + 900.), format_time(86400. + 900.))),
+                create_test_job(3, 5400., (format_time(75600.), format_time(75600.))),
+                create_test_job(4, 1800., (format_time(86400. + 2700.), format_time(86400. + 2700.))),
+            ],
+            relations: Some(vec![Relation {
+                type_field: RelationType::Sequence,
+                jobs: to_strings(vec!["3", "1", "2", "4"]),
+                vehicle_id: "my_vehicle_1".to_string(),
+                shift_index: Some(0),
+            }]),
+            ..create_empty_plan()
+        },
+        fleet: Fleet {
+            vehicles: vec![VehicleType {
+                shifts: vec![VehicleShift {
+                    start: ShiftStart {
+                        earliest: format_time(86400. + 28800.),
+                        latest: Some(format_time(86400. + 28800.)),
+                        location: Location::Reference { index: 5 },
+                    },
+                    end: Some(ShiftEnd {
+                        earliest: None,
+                        latest: format_time(86400. + 57600.),
+                        location: Location::Reference { index: 5 },
+                    }),
+                    breaks: Some(vec![VehicleBreak::Required {
+                        time: VehicleRequiredBreakTime::OffsetTime { earliest: 15303., latest: 15303. },
+                        duration: 1800.,
+                    }]),
+                    ..create_default_vehicle_shift()
+                }],
+                ..create_default_vehicle_type()
+            }],
+            ..create_default_fleet()
+        },
+        ..create_empty_problem()
+    };
+
+    let matrix = Matrix {
+        profile: Some("car".to_string()),
+        timestamp: None,
+        travel_times: vec![
+            0, 635, 24, 580, 27, 2232, 625, 0, 650, 76, 653, 2507, 24, 660, 0, 605, 3, 2257, 570, 95, 595, 0, 598,
+            2449, 27, 663, 3, 608, 0, 2260, 2232, 2545, 2257, 2515, 2260, 0,
+        ],
+        distances: vec![
+            0, 8888, 192, 8510, 215, 52931, 8896, 0, 9088, 450, 9111, 56579, 192, 9080, 0, 8702, 23, 53123, 8518, 450,
+            8710, 0, 8733, 60163, 215, 9103, 23, 8725, 0, 53146, 52996, 56684, 53188, 60477, 53211, 0,
+        ],
+        error_codes: None,
+    };
+
+    let solution = solve_with_metaheuristic(problem, Some(vec![matrix]));
+
+    // Basic assertion - no crash, solution should exist and have at least one tour
+    assert!(!solution.tours.is_empty());
+}

From 110a8125b11f0515cc808a54907d757df314f2ea Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Thu, 25 Dec 2025 12:31:44 +0100
Subject: [PATCH 03/30] Fix crash with required break and relations

---
 .../enablers/only_vehicle_activity_cost.rs    |  5 +++--
 .../construction/enablers/reserved_time.rs    | 19 ++++++++++-------
 .../construction/enablers/schedule_update.rs  |  5 +++--
 .../src/construction/features/fast_service.rs |  3 ++-
 .../src/construction/features/transport.rs    | 21 ++++++++++++++-----
 vrp-core/src/models/problem/costs.rs          | 17 +++++++++------
 .../tests/helpers/models/problem/costs.rs     |  5 +++--
 .../enablers/reserved_time_test.rs            |  1 +
 .../tests/features/breaks/required_break.rs   |  7 +++++--
 9 files changed, 55 insertions(+), 28 deletions(-)

diff --git a/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs b/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs
index 2f9a17816..9e884c65d 100644
--- a/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs
+++ b/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs
@@ -2,6 +2,7 @@ use crate::models::common::{Cost, Timestamp};
 use crate::models::problem::{ActivityCost, SimpleActivityCost};
 use crate::models::solution::Activity;
 use crate::models::solution::Route;
+use std::ops::ControlFlow;
 
 /// Uses costs only for a vehicle ignoring costs of a driver.
 #[derive(Default)]
@@ -19,11 +20,11 @@ impl ActivityCost for OnlyVehicleActivityCost {
         waiting * actor.vehicle.costs.per_waiting_time + service * actor.vehicle.costs.per_service_time
     }
 
-    fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> Timestamp {
+    fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
         self.inner.estimate_departure(route, activity, arrival)
     }
 
-    fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> Timestamp {
+    fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
         self.inner.estimate_arrival(route, activity, departure)
     }
 }
diff --git a/vrp-core/src/construction/enablers/reserved_time.rs b/vrp-core/src/construction/enablers/reserved_time.rs
index 860cacc13..dc7eb0613 100644
--- a/vrp-core/src/construction/enablers/reserved_time.rs
+++ b/vrp-core/src/construction/enablers/reserved_time.rs
@@ -5,8 +5,9 @@ mod reserved_time_test;
 use crate::models::common::*;
 use crate::models::problem::{ActivityCost, Actor, TransportCost, TravelTime};
 use crate::models::solution::{Activity, Route};
-use rosomaxa::prelude::{Float, GenericError};
+use rosomaxa::prelude::GenericError;
 use std::collections::HashMap;
+use std::ops::ControlFlow;
 use std::sync::Arc;
 
 /// Represent a reserved time span entity.
@@ -54,12 +55,12 @@ impl DynamicActivityCost {
 }
 
 impl ActivityCost for DynamicActivityCost {
-    fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> Timestamp {
+    fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
         let activity_start = arrival.max(activity.place.time.start);
         let departure = activity_start + activity.place.duration;
         let schedule = TimeWindow::new(arrival, departure);
 
-        (self.reserved_times_fn)(route, &schedule).map_or(departure, |reserved_time| {
+        (self.reserved_times_fn)(route, &schedule).map_or(ControlFlow::Continue(departure), |reserved_time| {
             // NOTE we ignore reserved_time.time.start and consider the latest possible time only
             let reserved_tw = &reserved_time.time;
             let reserved_tw = TimeWindow::new(reserved_tw.end, reserved_tw.end + reserved_time.duration);
@@ -81,19 +82,21 @@ impl ActivityCost for DynamicActivityCost {
             if activity_start + extra_duration > activity.place.time.end {
                 // TODO this branch is the reason why departure rescheduling is disabled.
                 //      theoretically, rescheduling should be aware somehow about dynamic costs
-                Float::MAX
+                ControlFlow::Break(departure + extra_duration)
             } else {
-                departure + extra_duration
+                ControlFlow::Continue(departure + extra_duration)
             }
         })
     }
 
-    fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> Timestamp {
+    fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
         let arrival = activity.place.time.end.min(departure - activity.place.duration);
         let schedule = TimeWindow::new(arrival, departure);
 
-        (self.reserved_times_fn)(route, &schedule)
-            .map_or(arrival, |reserved_time| (arrival - reserved_time.duration).max(activity.place.time.start))
+        let value = (self.reserved_times_fn)(route, &schedule)
+            .map_or(arrival, |reserved_time| (arrival - reserved_time.duration).max(activity.place.time.start));
+
+        ControlFlow::Continue(value)
     }
 }
 
diff --git a/vrp-core/src/construction/enablers/schedule_update.rs b/vrp-core/src/construction/enablers/schedule_update.rs
index 2443840a4..a64770795 100644
--- a/vrp-core/src/construction/enablers/schedule_update.rs
+++ b/vrp-core/src/construction/enablers/schedule_update.rs
@@ -3,6 +3,7 @@ use crate::models::OP_START_MSG;
 use crate::models::common::{Distance, Duration, Schedule, Timestamp};
 use crate::models::problem::{ActivityCost, TransportCost, TravelTime};
 use rosomaxa::prelude::Float;
+use rosomaxa::utils::UnwrapValue;
 
 custom_activity_state!(pub(crate) LatestArrival typeof Timestamp);
 custom_activity_state!(pub(crate) WaitingTime typeof Timestamp);
@@ -41,7 +42,7 @@ fn update_schedules(route_ctx: &mut RouteContext, activity: &dyn ActivityCost, t
             let a = route_ctx.route().tour.get(activity_idx).unwrap();
             let location = a.place.location;
             let arrival = dep + transport.duration(route_ctx.route(), loc, location, TravelTime::Departure(dep));
-            let departure = activity.estimate_departure(route_ctx.route(), a, arrival);
+            let departure = activity.estimate_departure(route_ctx.route(), a, arrival).unwrap_value();
 
             (location, arrival, departure)
         };
@@ -83,7 +84,7 @@ fn update_states(route_ctx: &mut RouteContext, activity: &dyn ActivityCost, tran
         } else {
             let latest_departure =
                 end_time - transport.duration(route, act.place.location, prev_loc, TravelTime::Arrival(end_time));
-            activity.estimate_arrival(route, act, latest_departure)
+            activity.estimate_arrival(route, act, latest_departure).unwrap_value()
         };
         let future_waiting = waiting + (act.place.time.start - act.schedule.arrival).max(0.);
 
diff --git a/vrp-core/src/construction/features/fast_service.rs b/vrp-core/src/construction/features/fast_service.rs
index f80e1213c..2e8934bd9 100644
--- a/vrp-core/src/construction/features/fast_service.rs
+++ b/vrp-core/src/construction/features/fast_service.rs
@@ -4,6 +4,7 @@ mod fast_service_test;
 
 use super::*;
 use crate::construction::enablers::{calculate_travel, calculate_travel_delta};
+use rosomaxa::utils::UnwrapValue;
 use std::collections::HashMap;
 
 /// Provides the way to build fast service feature.
@@ -189,7 +190,7 @@ impl FastServiceObjective {
         let (_, (prev_to_tar_dur, _)) = calculate_travel(route_ctx, activity_ctx, self.transport.as_ref());
         let arrival = activity_ctx.prev.schedule.departure + prev_to_tar_dur;
 
-        self.activity.estimate_departure(route_ctx.route(), activity_ctx.target, arrival)
+        self.activity.estimate_departure(route_ctx.route(), activity_ctx.target, arrival).unwrap_value()
     }
 
     fn get_cost_for_multi_job(&self, route_ctx: &RouteContext, activity_ctx: &ActivityContext) -> Cost {
diff --git a/vrp-core/src/construction/features/transport.rs b/vrp-core/src/construction/features/transport.rs
index 39b6291e5..0aa328cac 100644
--- a/vrp-core/src/construction/features/transport.rs
+++ b/vrp-core/src/construction/features/transport.rs
@@ -4,11 +4,14 @@
 #[path = "../../../tests/unit/construction/features/transport_test.rs"]
 mod transport_test;
 
+use std::ops::ControlFlow;
+
 use super::*;
 use crate::construction::enablers::*;
 use crate::models::common::Timestamp;
 use crate::models::problem::{ActivityCost, Single, TransportCost, TravelTime};
 use crate::models::solution::Activity;
+use rosomaxa::utils::UnwrapValue;
 
 // TODO
 //  remove get_total_cost, get_route_costs, get_max_cost methods from contexts
@@ -226,8 +229,12 @@ impl TransportConstraint {
                 TravelTime::Arrival(latest_arr_time_at_next),
             );
 
-        let latest_arr_time_at_target =
-            target.place.time.end.min(self.activity.estimate_arrival(route, target, latest_departure_at_target));
+        let ControlFlow::Continue(latest_arr_time_at_target) =
+            self.activity.estimate_arrival(route, target, latest_departure_at_target)
+        else {
+            return ConstraintViolation::skip(self.time_window_code);
+        };
+        let latest_arr_time_at_target = target.place.time.end.min(latest_arr_time_at_target);
 
         if arr_time_at_target > latest_arr_time_at_target {
             return ConstraintViolation::skip(self.time_window_code);
@@ -237,7 +244,11 @@ impl TransportConstraint {
             return ConstraintViolation::success();
         }
 
-        let end_time_at_target = self.activity.estimate_departure(route, target, arr_time_at_target);
+        let ControlFlow::Continue(end_time_at_target) =
+            self.activity.estimate_departure(route, target, arr_time_at_target)
+        else {
+            return ConstraintViolation::skip(self.time_window_code);
+        };
 
         let arr_time_at_next = end_time_at_target
             + self.transport.duration(
@@ -335,7 +346,7 @@ where
     let (prev_target, dep_time_target) = {
         let time = activity_ctx.prev.schedule.departure;
         let arrival = time + transport.duration(route, prev, target, prev_dep);
-        let departure = activity.estimate_departure(route, activity_ctx.target, arrival);
+        let departure = activity.estimate_departure(route, activity_ctx.target, arrival).unwrap_value();
 
         (estimate_fn(prev, target, prev_dep), departure)
     };
@@ -429,7 +440,7 @@ impl CostObjective {
 
         let arrival = time
             + self.transport.duration(route, start.place.location, end.place.location, TravelTime::Departure(time));
-        let departure = self.activity.estimate_departure(route, end, arrival);
+        let departure = self.activity.estimate_departure(route, end, arrival).unwrap_value();
 
         let transport_cost =
             self.transport.cost(route, start.place.location, end.place.location, TravelTime::Departure(time));
diff --git a/vrp-core/src/models/problem/costs.rs b/vrp-core/src/models/problem/costs.rs
index c6c032081..2c515dd29 100644
--- a/vrp-core/src/models/problem/costs.rs
+++ b/vrp-core/src/models/problem/costs.rs
@@ -7,6 +7,7 @@ use crate::models::solution::{Activity, Route};
 use rosomaxa::prelude::{Float, GenericError, GenericResult};
 use rosomaxa::utils::CollectGroupBy;
 use std::collections::HashMap;
+use std::ops::ControlFlow;
 use std::sync::Arc;
 
 /// Specifies a travel time type.
@@ -32,10 +33,14 @@ pub trait ActivityCost: Send + Sync {
     }
 
     /// Estimates departure time for activity and actor at given arrival time.
-    fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> Timestamp;
+    /// Returns `ControlFlow::Continue(timestamp)` if the departure time is feasible,
+    /// or `ControlFlow::Break(timestamp)` if constraints are violated (e.g., time window infeasible).
+    fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow<Timestamp, Timestamp>;
 
     /// Estimates arrival time for activity and actor at given departure time.
-    fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> Timestamp;
+    /// Returns `ControlFlow::Continue(timestamp)` if the arrival time is feasible,
+    /// or `ControlFlow::Break(timestamp)` if constraints are violated (e.g., time window infeasible).
+    fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow<Timestamp, Timestamp>;
 }
 
 /// An actor independent activity costs.
@@ -43,12 +48,12 @@ pub trait ActivityCost: Send + Sync {
 pub struct SimpleActivityCost {}
 
 impl ActivityCost for SimpleActivityCost {
-    fn estimate_departure(&self, _: &Route, activity: &Activity, arrival: Timestamp) -> Timestamp {
-        arrival.max(activity.place.time.start) + activity.place.duration
+    fn estimate_departure(&self, _: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
+        ControlFlow::Continue(arrival.max(activity.place.time.start) + activity.place.duration)
     }
 
-    fn estimate_arrival(&self, _: &Route, activity: &Activity, departure: Timestamp) -> Timestamp {
-        activity.place.time.end.min(departure - activity.place.duration)
+    fn estimate_arrival(&self, _: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
+        ControlFlow::Continue(activity.place.time.end.min(departure - activity.place.duration))
     }
 }
 
diff --git a/vrp-core/tests/helpers/models/problem/costs.rs b/vrp-core/tests/helpers/models/problem/costs.rs
index 7873343f3..31df10824 100644
--- a/vrp-core/tests/helpers/models/problem/costs.rs
+++ b/vrp-core/tests/helpers/models/problem/costs.rs
@@ -2,6 +2,7 @@ use crate::models::common::{Distance, Duration, Location, Profile, Timestamp};
 use crate::models::problem::{ActivityCost, SimpleActivityCost, TransportCost, TravelTime};
 use crate::models::solution::{Activity, Route};
 use rosomaxa::prelude::Float;
+use std::ops::ControlFlow;
 use std::sync::Arc;
 
 #[derive(Default)]
@@ -45,11 +46,11 @@ pub struct TestActivityCost {
 }
 
 impl ActivityCost for TestActivityCost {
-    fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> Timestamp {
+    fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
         self.inner.estimate_departure(route, activity, arrival)
     }
 
-    fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> Timestamp {
+    fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
         self.inner.estimate_arrival(route, activity, departure)
     }
 }
diff --git a/vrp-core/tests/unit/construction/enablers/reserved_time_test.rs b/vrp-core/tests/unit/construction/enablers/reserved_time_test.rs
index 2efadcbc6..f844293d2 100644
--- a/vrp-core/tests/unit/construction/enablers/reserved_time_test.rs
+++ b/vrp-core/tests/unit/construction/enablers/reserved_time_test.rs
@@ -7,6 +7,7 @@ use crate::helpers::models::problem::*;
 use crate::helpers::models::solution::*;
 use crate::models::problem::*;
 use crate::models::{Feature, ViolationCode};
+use rosomaxa::prelude::Float;
 
 const VIOLATION_CODE: ViolationCode = ViolationCode(1);
 
diff --git a/vrp-pragmatic/tests/features/breaks/required_break.rs b/vrp-pragmatic/tests/features/breaks/required_break.rs
index d8e52d3a3..1219ef2ac 100644
--- a/vrp-pragmatic/tests/features/breaks/required_break.rs
+++ b/vrp-pragmatic/tests/features/breaks/required_break.rs
@@ -326,8 +326,11 @@ fn can_handle_required_break_with_infeasible_sequence_relation() {
         error_codes: None,
     };
 
-    let solution = solve_with_metaheuristic(problem, Some(vec![matrix]));
+    let solution = solve_with_metaheuristic_and_iterations_without_check(problem, Some(vec![matrix]), 200);
 
-    // Basic assertion - no crash, solution should exist and have at least one tour
+    // With proper constraint handling via ControlFlow, job "0" (with very wide time window) cannot be
+    // scheduled due to the required break at specific time conflicting with the strict sequence relation.
     assert!(!solution.tours.is_empty());
+    assert_eq!(solution.unassigned.as_ref().map(|u| u.len()), Some(1));
+    assert_eq!(solution.unassigned.as_ref().and_then(|u| u.first()).map(|j| j.job_id.as_str()), Some("0"));
 }

From ca239990db19d0b41c6da3c09cf2d35cb44d053c Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Thu, 25 Dec 2025 13:05:11 +0100
Subject: [PATCH 04/30] Improve format_time error handling

---
 vrp-pragmatic/src/lib.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/vrp-pragmatic/src/lib.rs b/vrp-pragmatic/src/lib.rs
index 56e6fe63f..a3b6160bf 100644
--- a/vrp-pragmatic/src/lib.rs
+++ b/vrp-pragmatic/src/lib.rs
@@ -47,8 +47,10 @@ pub fn get_unique_locations(problem: &Problem) -> Vec<Location> {
 }
 
 fn format_time(time: Float) -> String {
-    // TODO avoid using implicitly unwrap
-    OffsetDateTime::from_unix_timestamp(time as i64).map(|time| time.format(&Rfc3339).unwrap()).unwrap()
+    OffsetDateTime::from_unix_timestamp(time as i64)
+        .map_err(|err| format!("Invalid timestamp {}: {}", time, err))
+        .and_then(|time| time.format(&Rfc3339).map_err(|err| format!("Format error: {}", err)))
+        .unwrap()
 }
 
 fn parse_time(time: &str) -> Float {

From 12e66bf9feab93806a040b48534a985ec2fd17c3 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Fri, 26 Dec 2025 12:34:31 +0100
Subject: [PATCH 05/30] Update to rust 1.92

---
 Dockerfile                                    |  4 +--
 vrp-cli/tests/unit/commands/solve_test.rs     |  2 +-
 .../enablers/only_vehicle_activity_cost.rs    | 14 ++++++++--
 .../construction/enablers/reserved_time.rs    | 14 ++++++++--
 vrp-core/src/models/problem/costs.rs          | 28 ++++++++++++++++---
 .../tests/helpers/models/problem/costs.rs     | 14 ++++++++--
 vrp-pragmatic/src/format/solution/mod.rs      |  8 ++----
 7 files changed, 65 insertions(+), 19 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index a375e5c92..3753e06b3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM rust:1.88-alpine AS Builder
+FROM rust:1.92-alpine AS builder
 
 LABEL maintainer="Ilya Builuk <ilya.builuk@gmail.com>" \
       org.opencontainers.image.title="A Vehicle Routing Problem solver CLI" \
@@ -29,6 +29,6 @@ FROM alpine:3.18
 ENV SOLVER_DIR=/solver
 
 RUN mkdir $SOLVER_DIR
-COPY --from=Builder /src/target/release/vrp-cli $SOLVER_DIR/vrp-cli
+COPY --from=builder /src/target/release/vrp-cli $SOLVER_DIR/vrp-cli
 
 WORKDIR $SOLVER_DIR
diff --git a/vrp-cli/tests/unit/commands/solve_test.rs b/vrp-cli/tests/unit/commands/solve_test.rs
index 68cf9fa40..b350cbb66 100644
--- a/vrp-cli/tests/unit/commands/solve_test.rs
+++ b/vrp-cli/tests/unit/commands/solve_test.rs
@@ -161,7 +161,7 @@ fn can_use_init_size() {
 
 #[test]
 fn can_specify_cv() {
-    for (params, result) in vec![
+    for (params, result) in [
         (vec!["--min-cv", "sample,200,0.05,true"], Ok(Some(("sample".to_string(), 200, 0.05, true)))),
         (vec!["--min-cv", "period,100,0.01,false"], Ok(Some(("period".to_string(), 100, 0.01, false)))),
         (vec!["--min-cv", "sample,200,0,tru"], Err("cannot parse min_cv parameter".into())),
diff --git a/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs b/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs
index 9e884c65d..87f4f4ca4 100644
--- a/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs
+++ b/vrp-core/src/construction/enablers/only_vehicle_activity_cost.rs
@@ -20,11 +20,21 @@ impl ActivityCost for OnlyVehicleActivityCost {
         waiting * actor.vehicle.costs.per_waiting_time + service * actor.vehicle.costs.per_service_time
     }
 
-    fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
+    fn estimate_departure(
+        &self,
+        route: &Route,
+        activity: &Activity,
+        arrival: Timestamp,
+    ) -> ControlFlow<Timestamp, Timestamp> {
         self.inner.estimate_departure(route, activity, arrival)
     }
 
-    fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
+    fn estimate_arrival(
+        &self,
+        route: &Route,
+        activity: &Activity,
+        departure: Timestamp,
+    ) -> ControlFlow<Timestamp, Timestamp> {
         self.inner.estimate_arrival(route, activity, departure)
     }
 }
diff --git a/vrp-core/src/construction/enablers/reserved_time.rs b/vrp-core/src/construction/enablers/reserved_time.rs
index dc7eb0613..7e918e231 100644
--- a/vrp-core/src/construction/enablers/reserved_time.rs
+++ b/vrp-core/src/construction/enablers/reserved_time.rs
@@ -55,7 +55,12 @@ impl DynamicActivityCost {
 }
 
 impl ActivityCost for DynamicActivityCost {
-    fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
+    fn estimate_departure(
+        &self,
+        route: &Route,
+        activity: &Activity,
+        arrival: Timestamp,
+    ) -> ControlFlow<Timestamp, Timestamp> {
         let activity_start = arrival.max(activity.place.time.start);
         let departure = activity_start + activity.place.duration;
         let schedule = TimeWindow::new(arrival, departure);
@@ -89,7 +94,12 @@ impl ActivityCost for DynamicActivityCost {
         })
     }
 
-    fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
+    fn estimate_arrival(
+        &self,
+        route: &Route,
+        activity: &Activity,
+        departure: Timestamp,
+    ) -> ControlFlow<Timestamp, Timestamp> {
         let arrival = activity.place.time.end.min(departure - activity.place.duration);
         let schedule = TimeWindow::new(arrival, departure);
 
diff --git a/vrp-core/src/models/problem/costs.rs b/vrp-core/src/models/problem/costs.rs
index 2c515dd29..2fe526c4e 100644
--- a/vrp-core/src/models/problem/costs.rs
+++ b/vrp-core/src/models/problem/costs.rs
@@ -35,12 +35,22 @@ pub trait ActivityCost: Send + Sync {
     /// Estimates departure time for activity and actor at given arrival time.
     /// Returns `ControlFlow::Continue(timestamp)` if the departure time is feasible,
     /// or `ControlFlow::Break(timestamp)` if constraints are violated (e.g., time window infeasible).
-    fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow<Timestamp, Timestamp>;
+    fn estimate_departure(
+        &self,
+        route: &Route,
+        activity: &Activity,
+        arrival: Timestamp,
+    ) -> ControlFlow<Timestamp, Timestamp>;
 
     /// Estimates arrival time for activity and actor at given departure time.
     /// Returns `ControlFlow::Continue(timestamp)` if the arrival time is feasible,
     /// or `ControlFlow::Break(timestamp)` if constraints are violated (e.g., time window infeasible).
-    fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow<Timestamp, Timestamp>;
+    fn estimate_arrival(
+        &self,
+        route: &Route,
+        activity: &Activity,
+        departure: Timestamp,
+    ) -> ControlFlow<Timestamp, Timestamp>;
 }
 
 /// An actor independent activity costs.
@@ -48,11 +58,21 @@ pub trait ActivityCost: Send + Sync {
 pub struct SimpleActivityCost {}
 
 impl ActivityCost for SimpleActivityCost {
-    fn estimate_departure(&self, _: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
+    fn estimate_departure(
+        &self,
+        _: &Route,
+        activity: &Activity,
+        arrival: Timestamp,
+    ) -> ControlFlow<Timestamp, Timestamp> {
         ControlFlow::Continue(arrival.max(activity.place.time.start) + activity.place.duration)
     }
 
-    fn estimate_arrival(&self, _: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
+    fn estimate_arrival(
+        &self,
+        _: &Route,
+        activity: &Activity,
+        departure: Timestamp,
+    ) -> ControlFlow<Timestamp, Timestamp> {
         ControlFlow::Continue(activity.place.time.end.min(departure - activity.place.duration))
     }
 }
diff --git a/vrp-core/tests/helpers/models/problem/costs.rs b/vrp-core/tests/helpers/models/problem/costs.rs
index 31df10824..4b91ff86b 100644
--- a/vrp-core/tests/helpers/models/problem/costs.rs
+++ b/vrp-core/tests/helpers/models/problem/costs.rs
@@ -46,11 +46,21 @@ pub struct TestActivityCost {
 }
 
 impl ActivityCost for TestActivityCost {
-    fn estimate_departure(&self, route: &Route, activity: &Activity, arrival: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
+    fn estimate_departure(
+        &self,
+        route: &Route,
+        activity: &Activity,
+        arrival: Timestamp,
+    ) -> ControlFlow<Timestamp, Timestamp> {
         self.inner.estimate_departure(route, activity, arrival)
     }
 
-    fn estimate_arrival(&self, route: &Route, activity: &Activity, departure: Timestamp) -> ControlFlow<Timestamp, Timestamp> {
+    fn estimate_arrival(
+        &self,
+        route: &Route,
+        activity: &Activity,
+        departure: Timestamp,
+    ) -> ControlFlow<Timestamp, Timestamp> {
         self.inner.estimate_arrival(route, activity, departure)
     }
 }
diff --git a/vrp-pragmatic/src/format/solution/mod.rs b/vrp-pragmatic/src/format/solution/mod.rs
index fae7280fa..825195cab 100644
--- a/vrp-pragmatic/src/format/solution/mod.rs
+++ b/vrp-pragmatic/src/format/solution/mod.rs
@@ -38,8 +38,10 @@ type DomainLocation = vrp_core::models::common::Location;
 type DomainExtras = vrp_core::models::Extras;
 
 /// Specifies possible options for solution output.
+#[derive(Default)]
 pub enum PragmaticOutputType {
     /// Only pragmatic is needed.
+    #[default]
     OnlyPragmatic,
     /// Only geojson is needed.
     OnlyGeoJson,
@@ -47,12 +49,6 @@ pub enum PragmaticOutputType {
     Combined,
 }
 
-impl Default for PragmaticOutputType {
-    fn default() -> Self {
-        Self::OnlyPragmatic
-    }
-}
-
 /// Writes solution in pragmatic format variation defined by output type argument.
 pub fn write_pragmatic<W: Write>(
     problem: &DomainProblem,

From 7b86780237f9a0106026d18b982b71375fc41095 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Fri, 9 Jan 2026 00:05:09 +0100
Subject: [PATCH 06/30] Improve SISR implementation

---
 CHANGELOG.md                                  |   1 +
 .../search/recreate/recreate_with_blinks.rs   | 312 +++++++++++++++---
 2 files changed, 261 insertions(+), 52 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7e053fac4..a5b330fad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ are already published. So, I stick to it for now.
 * improved remedian algorithm
 * separate calculations for distance/duration from cost minimization
 * change GSOM distance function
+* improved SISR implementation
 
 ### Added
 
diff --git a/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs b/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs
index 961233634..286a551b9 100644
--- a/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs
+++ b/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs
@@ -1,26 +1,179 @@
-use crate::construction::heuristics::InsertionContext;
 use crate::construction::heuristics::*;
+use crate::construction::heuristics::{InsertionContext, JobSelector};
 use crate::models::Problem;
-use crate::models::common::*;
 use crate::models::problem::Job;
+use crate::prelude::Cost;
 use crate::solver::RefinementContext;
 use crate::solver::search::recreate::Recreate;
 use rosomaxa::prelude::*;
+use rosomaxa::utils::fold_reduce;
 use std::sync::Arc;
 
-struct ChunkJobSelector {
-    size: usize,
+/// A recreate method as described in "Slack Induction by String Removals for
+/// Vehicle Routing Problems" (aka SISR) paper by Jan Christiaens, Greet Vanden Berghe.
+pub struct RecreateWithBlinks {
+    job_selectors: Vec<Box<dyn JobSelector>>,
+    route_selector: Box<dyn RouteSelector>,
+    leg_selection: LegSelection,
+    result_selector: Box<dyn ResultSelector>,
+    insertion_heuristic: InsertionHeuristic,
+    weights: Vec<usize>,
+}
+
+impl RecreateWithBlinks {
+    /// Creates a new instance of `RecreateWithBlinks`.
+    pub fn new(selectors: Vec<(Box<dyn JobSelector>, usize)>, blink_ratio: f64, random: Arc<dyn Random>) -> Self {
+        let weights = selectors.iter().map(|(_, weight)| *weight).collect();
+        let evaluator = Box::new(BlinkInsertionEvaluator::new(blink_ratio, random.clone()));
+
+        Self {
+            job_selectors: selectors.into_iter().map(|(selector, _)| selector).collect(),
+            route_selector: Box::<AllRouteSelector>::default(),
+            leg_selection: LegSelection::Exhaustive,
+            result_selector: Box::<BestResultSelector>::default(),
+            insertion_heuristic: InsertionHeuristic::new(evaluator),
+            weights,
+        }
+    }
+
+    /// Creates a new instance with defaults compliant with SISR paper (Section 5.3 + Table 13).
+    pub fn new_with_defaults(random: Arc<dyn Random>) -> Self {
+        let blink_ratio = 0.01;
+
+        // Helper to wrap selectors with SingletonJobSelector
+        fn singleton<T: JobSelector + 'static>(selector: T) -> Box<dyn JobSelector> {
+            Box::new(SingletonJobSelector(selector))
+        }
+
+        Self::new(
+            vec![
+                // --- Core Selectors (Section 5.3) ---
+                // 1. Random (Weight: 4)
+                (singleton(AllJobSelector::default()), 4),
+                // 2. Demand: Largest First (Weight: 4)
+                (singleton(DemandJobSelector::new(true)), 4),
+                // 3. Far: Largest Distance First (Weight: 2)
+                (singleton(RankedJobSelector::new(true)), 2),
+                // 4. Close: Smallest Distance First (Weight: 1)
+                (singleton(RankedJobSelector::new(false)), 1),
+                // --- VRPTW Extensions (Table 13) ---
+                // 5. TW Length: Increasing (Shortest/Hardest first) (Weight: 2)
+                (singleton(TimeWindowJobSelector::new(TimeWindowSelectionMode::LengthAscending)), 2),
+                // 6. TW Start: Increasing (Earliest first) (Weight: 2)
+                (singleton(TimeWindowJobSelector::new(TimeWindowSelectionMode::StartAscending)), 2),
+                // 7. TW End: Decreasing (Latest first) (Weight: 2)
+                (singleton(TimeWindowJobSelector::new(TimeWindowSelectionMode::EndDescending)), 2),
+            ],
+            blink_ratio,
+            random,
+        )
+    }
+}
+
+impl Recreate for RecreateWithBlinks {
+    fn run(&self, _: &RefinementContext, insertion_ctx: InsertionContext) -> InsertionContext {
+        let index = insertion_ctx.environment.random.weighted(self.weights.as_slice());
+        let job_selector = self.job_selectors.get(index).unwrap().as_ref();
+
+        self.insertion_heuristic.process(
+            insertion_ctx,
+            job_selector,
+            self.route_selector.as_ref(),
+            &self.leg_selection,
+            self.result_selector.as_ref(),
+        )
+    }
+}
+
+struct BlinkInsertionEvaluator {
+    blink_ratio: f64,
+    random: Arc<dyn Random>,
 }
 
-impl ChunkJobSelector {
-    pub fn new(size: usize) -> Self {
-        Self { size }
+impl BlinkInsertionEvaluator {
+    pub fn new(blink_ratio: f64, random: Arc<dyn Random>) -> Self {
+        Self { blink_ratio, random }
+    }
+}
+
+impl InsertionEvaluator for BlinkInsertionEvaluator {
+    fn evaluate_all(
+        &self,
+        insertion_ctx: &InsertionContext,
+        jobs: &[&Job],
+        routes: &[&RouteContext],
+        _leg_selection: &LegSelection,
+        result_selector: &dyn ResultSelector,
+    ) -> InsertionResult {
+        // With SingletonJobSelector, this is guaranteed to be the single target job.
+        let job = match jobs.first() {
+            Some(job) => job,
+            None => return InsertionResult::make_failure(),
+        };
+
+        let eval_ctx = EvaluationContext {
+            goal: &insertion_ctx.problem.goal,
+            job,
+            leg_selection: &LegSelection::Exhaustive,
+            result_selector,
+        };
+
+        let result = fold_reduce(
+            routes,
+            InsertionResult::make_failure,
+            |best_in_thread, route_ctx| {
+                let mut best_in_route = best_in_thread;
+                let tour = &route_ctx.route().tour;
+
+                for leg_index in 0..tour.legs().count() {
+                    // The Blink: "Each position is evaluated with a probability of 1 - gamma"
+                    if self.random.is_hit(self.blink_ratio) {
+                        continue;
+                    }
+
+                    // Evaluate specific position
+                    let result = eval_job_insertion_in_route(
+                        insertion_ctx,
+                        &eval_ctx,
+                        route_ctx,
+                        InsertionPosition::Concrete(leg_index),
+                        best_in_route,
+                    );
+
+                    best_in_route = result;
+                }
+
+                best_in_route
+            },
+            // Reduce: Merge results from threads (Greedy/Best selection)
+            |a, b| InsertionResult::choose_best_result(a, b),
+        );
+
+        match result {
+            InsertionResult::Success(_) => result,
+            InsertionResult::Failure(mut failure) => {
+                // If we failed to insert, we must blame the specific job we were trying
+                // to insert. Otherwise, the heuristic thinks the issue is systemic (no routes)
+                // and might abort entirely.
+                if failure.job.is_none() {
+                    failure.job = Some((*job).clone());
+                }
+                InsertionResult::Failure(failure)
+            }
+        }
     }
 }
 
-impl JobSelector for ChunkJobSelector {
-    fn select<'a>(&'a self, insertion_ctx: &'a InsertionContext) -> Box<dyn Iterator<Item = &'a Job> + 'a> {
-        Box::new(insertion_ctx.solution.required.iter().take(self.size))
+struct SingletonJobSelector<T>(T);
+
+impl<T: JobSelector> JobSelector for SingletonJobSelector<T> {
+    fn prepare(&self, ctx: &mut InsertionContext) {
+        self.0.prepare(ctx);
+    }
+
+    fn select<'a>(&'a self, ctx: &'a InsertionContext) -> Box<dyn Iterator<Item = &'a Job> + 'a> {
+        // Only yield the first job: this forces InsertionHeuristic to align with BlinkInsertionEvaluator.
+        Box::new(self.0.select(ctx).take(1))
     }
 }
 
@@ -59,56 +212,111 @@ impl JobSelector for RankedJobSelector {
     }
 }
 
-/// A recreate method as described in "Slack Induction by String Removals for
-/// Vehicle Routing Problems" (aka SISR) paper by Jan Christiaens, Greet Vanden Berghe.
-pub struct RecreateWithBlinks {
-    job_selectors: Vec<Box<dyn JobSelector>>,
-    route_selector: Box<dyn RouteSelector>,
-    leg_selection: LegSelection,
-    result_selector: Box<dyn ResultSelector>,
-    insertion_heuristic: InsertionHeuristic,
-    weights: Vec<usize>,
+struct DemandJobSelector {
+    asc_order: bool,
 }
 
-impl RecreateWithBlinks {
-    /// Creates a new instance of `RecreateWithBlinks`.
-    pub fn new(selectors: Vec<(Box<dyn JobSelector>, usize)>, random: Arc<dyn Random>) -> Self {
-        let weights = selectors.iter().map(|(_, weight)| *weight).collect();
-        Self {
-            job_selectors: selectors.into_iter().map(|(selector, _)| selector).collect(),
-            route_selector: Box::<AllRouteSelector>::default(),
-            leg_selection: LegSelection::Stochastic(random.clone()),
-            result_selector: Box::new(BlinkResultSelector::new_with_defaults(random)),
-            insertion_heuristic: Default::default(),
-            weights,
+impl DemandJobSelector {
+    pub fn new(asc_order: bool) -> Self {
+        Self { asc_order }
+    }
+}
+
+impl JobSelector for DemandJobSelector {
+    fn prepare(&self, insertion_ctx: &mut InsertionContext) {
+        insertion_ctx.solution.required.sort_by(|a, b| {
+            use crate::construction::features::JobDemandDimension;
+            use crate::models::common::{MultiDimLoad, SingleDimLoad};
+
+            // Get total demand size (sum of all pickup and delivery components)
+            // Try SingleDimLoad first, then MultiDimLoad (only one type per problem)
+            let get_demand = |job: &Job| -> i32 {
+                let dimens = job.dimens();
+
+                if let Some(demand) = dimens.get_job_demand::<SingleDimLoad>() {
+                    demand.pickup.0.value + demand.pickup.1.value + demand.delivery.0.value + demand.delivery.1.value
+                } else if let Some(demand) = dimens.get_job_demand::<MultiDimLoad>() {
+                    let sum_load = |load: &MultiDimLoad| load.load[..load.size].iter().sum::<i32>();
+                    sum_load(&demand.pickup.0)
+                        + sum_load(&demand.pickup.1)
+                        + sum_load(&demand.delivery.0)
+                        + sum_load(&demand.delivery.1)
+                } else {
+                    0
+                }
+            };
+
+            get_demand(a).cmp(&get_demand(b))
+        });
+
+        if !self.asc_order {
+            insertion_ctx.solution.required.reverse();
         }
     }
+}
 
-    /// Creates a new instance of `RecreateWithBlinks` with default prameters.
-    pub fn new_with_defaults(random: Arc<dyn Random>) -> Self {
-        Self::new(
-            vec![
-                (Box::<AllJobSelector>::default(), 10),
-                (Box::new(ChunkJobSelector::new(8)), 10),
-                (Box::new(RankedJobSelector::new(true)), 5),
-                (Box::new(RankedJobSelector::new(false)), 1),
-            ],
-            random,
-        )
+/// Specifies the sorting criterion for Time Windows (Table 13 of SISR paper).
+pub enum TimeWindowSelectionMode {
+    /// Sort by time window duration (End - Start).
+    /// Paper: "Increasing time window length"
+    LengthAscending,
+
+    /// Sort by start time.
+    /// Paper: "Increasing time window start"
+    StartAscending,
+
+    /// Sort by end time.
+    /// Paper: "Decreasing time window end"
+    EndDescending,
+}
+
+pub struct TimeWindowJobSelector {
+    mode: TimeWindowSelectionMode,
+}
+
+impl TimeWindowJobSelector {
+    pub fn new(mode: TimeWindowSelectionMode) -> Self {
+        Self { mode }
+    }
+
+    /// Helper to extract the relevant time window metric from a job.
+    /// Returns (min_start, max_end, min_length).
+    /// Handles multiple time windows by finding the extreme boundaries.
+    fn get_tw_metric(job: &Job) -> (f64, f64, f64) {
+        job.places()
+            .flat_map(|place| place.times.iter())
+            // NOTE: ignore non-time window spans. This is the most easy way to handle
+            // when multiple span types mixed.
+            .filter_map(|time| time.as_time_window())
+            .fold(None, |acc: Option<(f64, f64, f64)>, tw| {
+                let length = tw.end - tw.start;
+                Some(match acc {
+                    None => (tw.start, tw.end, length),
+                    Some((min_start, max_end, min_length)) => {
+                        (min_start.min(tw.start), max_end.max(tw.end), min_length.min(length))
+                    }
+                })
+            })
+            .unwrap_or((0.0, f64::MAX, f64::MAX))
     }
 }
 
-impl Recreate for RecreateWithBlinks {
-    fn run(&self, _: &RefinementContext, insertion_ctx: InsertionContext) -> InsertionContext {
-        let index = insertion_ctx.environment.random.weighted(self.weights.as_slice());
-        let job_selector = self.job_selectors.get(index).unwrap().as_ref();
+impl JobSelector for TimeWindowJobSelector {
+    fn prepare(&self, insertion_ctx: &mut InsertionContext) {
+        insertion_ctx.solution.required.sort_by(|a, b| {
+            let (start_a, end_a, len_a) = Self::get_tw_metric(a);
+            let (start_b, end_b, len_b) = Self::get_tw_metric(b);
 
-        self.insertion_heuristic.process(
-            insertion_ctx,
-            job_selector,
-            self.route_selector.as_ref(),
-            &self.leg_selection,
-            self.result_selector.as_ref(),
-        )
+            match self.mode {
+                // Increasing Length: Shortest windows first (hardest to fit)
+                TimeWindowSelectionMode::LengthAscending => len_a.total_cmp(&len_b),
+
+                // Increasing Start: Earliest start first (chronological)
+                TimeWindowSelectionMode::StartAscending => start_a.total_cmp(&start_b),
+
+                // Decreasing End: Latest end first (reverse chronological/urgency)
+                TimeWindowSelectionMode::EndDescending => end_b.total_cmp(&end_a),
+            }
+        });
     }
 }

From 12422e085391f9c8ebdf837c7b45efb7720aac73 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Fri, 9 Jan 2026 00:25:22 +0100
Subject: [PATCH 07/30] Increase SISR probability

---
 vrp-core/src/solver/heuristic.rs | 41 ++++++++++++++++----------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/vrp-core/src/solver/heuristic.rs b/vrp-core/src/solver/heuristic.rs
index daf8648d0..0a0762b81 100644
--- a/vrp-core/src/solver/heuristic.rs
+++ b/vrp-core/src/solver/heuristic.rs
@@ -399,13 +399,13 @@ mod statik {
 
         // initialize recreate
         let recreate = Arc::new(WeightedRecreate::new(vec![
-            (Arc::new(RecreateWithSkipBest::new(1, 2, random.clone())), 50),
+            (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), 50),
+            (Arc::new(RecreateWithSkipBest::new(1, 2, random.clone())), 20),
             (Arc::new(RecreateWithRegret::new(2, 3, random.clone())), 20),
             (Arc::new(RecreateWithCheapest::new(random.clone())), 20),
             (Arc::new(RecreateWithPerturbation::new_with_defaults(random.clone())), 10),
             (Arc::new(RecreateWithSkipBest::new(3, 4, random.clone())), 5),
             (Arc::new(RecreateWithGaps::new(2, 20, random.clone())), 5),
-            (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), 5),
             (Arc::new(RecreateWithFarthest::new(random.clone())), 2),
             (Arc::new(RecreateWithSkipBest::new(4, 8, random.clone())), 2),
             (Arc::new(RecreateWithNearestNeighbor::new(random.clone())), 1),
@@ -430,7 +430,7 @@ mod statik {
         let ruin = Arc::new(WeightedRuin::new(vec![
             (
                 Arc::new(CompositeRuin::new(vec![
-                    (Arc::new(AdjustedStringRemoval::new_with_defaults(normal_limits.clone())), 1.),
+                    (Arc::new(AdjustedStringRemoval::new_with_defaults(normal_limits.clone())), 2.),
                     (extra_random_job.clone(), 0.1),
                 ])),
                 100,
@@ -492,22 +492,23 @@ mod statik {
 mod dynamic {
     use super::*;
 
-    fn get_recreates(problem: &Problem, random: Arc<dyn Random>) -> Vec<(Arc<dyn Recreate>, String)> {
+    fn get_recreates(problem: &Problem, random: Arc<dyn Random>) -> Vec<(Arc<dyn Recreate>, String, Float)> {
         let cheapest: Arc<dyn Recreate> = Arc::new(RecreateWithCheapest::new(random.clone()));
         vec![
-            (cheapest.clone(), "cheapest".to_string()),
-            (Arc::new(RecreateWithSkipBest::new(1, 2, random.clone())), "skip_best".to_string()),
-            (Arc::new(RecreateWithRegret::new(1, 3, random.clone())), "regret".to_string()),
-            (Arc::new(RecreateWithPerturbation::new_with_defaults(random.clone())), "perturbation".to_string()),
-            (Arc::new(RecreateWithGaps::new(2, 20, random.clone())), "gaps".to_string()),
-            (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), "blinks".to_string()),
-            (Arc::new(RecreateWithFarthest::new(random.clone())), "farthest".to_string()),
-            (Arc::new(RecreateWithNearestNeighbor::new(random.clone())), "nearest".to_string()),
+            (cheapest.clone(), "cheapest".to_string(), 1.),
+            (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), "blinks".to_string(), 3.),
+            (Arc::new(RecreateWithSkipBest::new(1, 2, random.clone())), "skip_best".to_string(), 1.),
+            (Arc::new(RecreateWithRegret::new(1, 3, random.clone())), "regret".to_string(), 1.),
+            (Arc::new(RecreateWithPerturbation::new_with_defaults(random.clone())), "perturbation".to_string(), 1.),
+            (Arc::new(RecreateWithGaps::new(2, 20, random.clone())), "gaps".to_string(), 1.),
+            (Arc::new(RecreateWithFarthest::new(random.clone())), "farthest".to_string(), 1.),
+            (Arc::new(RecreateWithNearestNeighbor::new(random.clone())), "nearest".to_string(), 1.),
             (
                 Arc::new(RecreateWithSkipRandom::default_explorative_phased(cheapest.clone(), random.clone())),
                 "skip_random".to_string(),
+                1.,
             ),
-            (Arc::new(RecreateWithSlice::new(random.clone())), "slice".to_string()),
+            (Arc::new(RecreateWithSlice::new(random.clone())), "slice".to_string(), 1.),
         ]
         .into_iter()
         .chain(
@@ -516,14 +517,14 @@ mod dynamic {
                 move || RecreateWithCheapest::new(random.clone())
             })
             .enumerate()
-            .map(|(idx, recreate)| (recreate, format!("alternative_{idx}"))),
+            .map(|(idx, recreate)| (recreate, format!("alternative_{idx}"), 1.)),
         )
         .collect()
     }
 
     fn get_ruins_with_limits(limits: RemovalLimits, prefix: &str) -> Vec<(Arc<dyn Ruin>, String, Float)> {
         vec![
-            (Arc::new(AdjustedStringRemoval::new_with_defaults(limits.clone())), format!("{prefix}_asr"), 2.),
+            (Arc::new(AdjustedStringRemoval::new_with_defaults(limits.clone())), format!("{prefix}_asr"), 7.),
             (Arc::new(NeighbourRemoval::new(limits.clone())), format!("{prefix}_neighbour_removal"), 5.),
             (Arc::new(WorstJobRemoval::new(4, limits.clone())), format!("{prefix}_worst_job"), 4.),
             (Arc::new(RandomJobRemoval::new(limits.clone())), format!("{prefix}_random_job_removal"), 4.),
@@ -619,12 +620,12 @@ mod dynamic {
 
         recreates
             .iter()
-            .flat_map(|(recreate, recreate_name)| {
-                ruins.iter().map::<(TargetSearchOperator, String, Float), _>(move |(ruin, ruin_name, weight)| {
+            .flat_map(|(recreate, recreate_name, recreate_weight)| {
+                ruins.iter().map::<(TargetSearchOperator, String, Float), _>(move |(ruin, ruin_name, ruin_weight)| {
                     (
                         Arc::new(RuinAndRecreate::new(ruin.clone(), recreate.clone())),
                         format!("{ruin_name}+{recreate_name}"),
-                        *weight,
+                        ruin_weight * recreate_weight,
                     )
                 })
             })
@@ -644,11 +645,11 @@ mod dynamic {
         let cheapest = Arc::new(RecreateWithCheapest::new(random.clone()));
         let recreate = Arc::new(WeightedRecreate::new(vec![
             (cheapest.clone(), 1),
+            (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), 3),
             (Arc::new(RecreateWithSkipBest::new(1, 2, random.clone())), 1),
             (Arc::new(RecreateWithPerturbation::new_with_defaults(random.clone())), 1),
             (Arc::new(RecreateWithSkipBest::new(3, 4, random.clone())), 1),
             (Arc::new(RecreateWithGaps::new(2, 20, random.clone())), 1),
-            (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), 1),
             (Arc::new(RecreateWithFarthest::new(random.clone())), 1),
             (Arc::new(RecreateWithSlice::new(random.clone())), 1),
             (Arc::new(RecreateWithSkipRandom::default_explorative_phased(cheapest, random.clone())), 1),
@@ -661,7 +662,7 @@ mod dynamic {
         let ruin = Arc::new(WeightedRuin::new(vec![
             (
                 Arc::new(CompositeRuin::new(vec![
-                    (Arc::new(AdjustedStringRemoval::new_with_defaults(small_limits.clone())), 1.),
+                    (Arc::new(AdjustedStringRemoval::new_with_defaults(small_limits.clone())), 5.),
                     (random_ruin.clone(), 0.1),
                 ])),
                 1,

From 10d46e919b46d88ed7f6ab892aa06890a3e0f1ca Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Fri, 9 Jan 2026 19:19:58 +0100
Subject: [PATCH 08/30] Fix potential issue in lkh search

---
 vrp-core/src/algorithms/lkh/kopt.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/vrp-core/src/algorithms/lkh/kopt.rs b/vrp-core/src/algorithms/lkh/kopt.rs
index d95eb8ce0..db73b76fb 100644
--- a/vrp-core/src/algorithms/lkh/kopt.rs
+++ b/vrp-core/src/algorithms/lkh/kopt.rs
@@ -31,9 +31,17 @@ where
     /// Tries to optimize a given path using modified Lin-Kernighan-Helsgaun algorithm.
     /// Returns discovered solutions in the order of their improvement.
     pub fn optimize(mut self, path: Path) -> Vec<Path> {
+        // Scale with problem size: linear growth with safety cap to prevent infinite loops
+        let max_iterations = (path.len() * 10).min(2000);
+
         self.solutions.push(path);
+        let mut iterations = 0;
 
         while let Some(improved_path) = self.solutions.last().and_then(|p| self.improve(p.iter().copied())) {
+            iterations += 1;
+            if iterations >= max_iterations {
+                break;
+            }
             self.solutions.clear();
             self.solutions.push(improved_path);
         }

From 5d3ce5cd38f4530a922fc05dc2820d59c40ca778 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Fri, 9 Jan 2026 20:00:03 +0100
Subject: [PATCH 09/30] Apply some tweaks in dynamic heuristic

---
 rosomaxa/src/hyper/dynamic_selective.rs       | 31 ++++++++++++-------
 .../unit/hyper/dynamic_selective_test.rs      | 22 +++++--------
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs
index 0a911f77a..42fc3ac8c 100644
--- a/rosomaxa/src/hyper/dynamic_selective.rs
+++ b/rosomaxa/src/hyper/dynamic_selective.rs
@@ -159,7 +159,7 @@ where
         let duration = duration.as_millis() as usize;
 
         let base_reward = estimate_distance_reward(context.heuristic_ctx, context.solution, &new_solution);
-        let reward_multiplier = estimate_reward_perf_multiplier(&context, duration, is_new_best);
+        let reward_multiplier = estimate_reward_perf_multiplier(&context, duration);
         let reward = base_reward * reward_multiplier;
 
         let to = if is_new_best { SearchState::BestKnown } else { SearchState::Diverse };
@@ -197,13 +197,24 @@ where
     S: HeuristicSolution + 'a,
 {
     pub fn new(search_operators: HeuristicSearchOperators<C, O, S>, environment: &Environment) -> Self {
+        // Normalize weights to achievable reward scale to prevent optimistic bias locking
+        // Find max weight for scaling
+        let max_weight = search_operators.iter().map(|(_, _, weight)| *weight).fold(0.0_f64, |a, b| a.max(b));
+
+        // Target max prior: rewards typically [0.75, 4.5], max theoretical ~9-10
+        // 4.0 is optimistic but achievable for good operators
+        const TARGET_MAX_PRIOR: Float = 4.0;
+        let scale = if max_weight > 0.0 { TARGET_MAX_PRIOR / max_weight } else { 1.0 };
+
         let slot_machines = search_operators
             .into_iter()
-            .map(|(operator, name, _)| {
-                // TODO use initial weight as prior mean estimation?
+            .map(|(operator, name, initial_weight)| {
+                // Scale weight to reward range while preserving hierarchy
+                // E.g., weight 21.0 -> 4.0, weight 2.0 -> 0.38
+                let prior_mean = initial_weight * scale;
                 (
                     SlotMachine::new(
-                        1.,
+                        prior_mean,
                         SearchAction { operator, operator_name: name.to_string() },
                         DefaultDistributionSampler::new(environment.random.clone()),
                     ),
@@ -363,11 +374,7 @@ where
 
 /// Estimates performance of used operation based on its duration and overall improvement statistics.
 /// Returns a reward multiplier in `(~0.5, 3]` range.
-fn estimate_reward_perf_multiplier<C, O, S>(
-    search_ctx: &SearchContext<C, O, S>,
-    duration: usize,
-    has_improvement: bool,
-) -> Float
+fn estimate_reward_perf_multiplier<C, O, S>(search_ctx: &SearchContext<C, O, S>, duration: usize) -> Float
 where
     C: HeuristicContext<Objective = O, Solution = S>,
     O: HeuristicObjective<Solution = S>,
@@ -385,11 +392,11 @@ where
         _ => 1.,                      // Moderato
     };
 
-    let improvement_ratio = match (improvement_ratio, has_improvement) {
+    let improvement_ratio = match improvement_ratio {
         // stagnation: increase reward
-        (ratio, true) if ratio < 0.05 => 2.,
+        ratio if ratio < 0.05 => 2.,
         // fast convergence: decrease reward
-        (ratio, true) if ratio > 0.150 => 0.75,
+        ratio if ratio > 0.150 => 0.75,
         // moderate convergence
         _ => 1.,
     };
diff --git a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs
index 66285b424..7d4958e89 100644
--- a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs
+++ b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs
@@ -56,24 +56,18 @@ fn can_estimate_median() {
     assert!(median > 0);
 }
 
-parameterized_test! {can_estimate_reward_multiplier, (approx_median, duration, has_improvement, expected), {
-    can_estimate_reward_multiplier_impl(approx_median, duration, has_improvement, expected);
+parameterized_test! {can_estimate_reward_multiplier, (approx_median, duration, expected), {
+    can_estimate_reward_multiplier_impl(approx_median, duration, expected);
 }}
 
 can_estimate_reward_multiplier! {
-    case_01_moderate: (Some(1), 1, false, 1.),
-    case_02_allegro: (Some(2), 1, false, 1.5),
-    case_03_allegretto: (Some(10), 8, false, 1.25),
-    case_04_andante: (Some(8), 13, false, 0.75),
-    case_05_moderato_improvement: (Some(1), 1, true, 2.),
+    case_01_moderate: (Some(1), 1, 2.0),       // 1.0 (moderate) * 2.0 (stagnation bonus)
+    case_02_allegro: (Some(2), 1, 3.0),        // 1.5 (fast) * 2.0 (stagnation bonus)
+    case_03_allegretto: (Some(10), 8, 2.5),    // 1.25 (fast-ish) * 2.0 (stagnation bonus)
+    case_04_andante: (Some(8), 13, 1.5),       // 0.75 (slow) * 2.0 (stagnation bonus)
 }
 
-fn can_estimate_reward_multiplier_impl(
-    approx_median: Option<usize>,
-    duration: usize,
-    has_improvement: bool,
-    expected: Float,
-) {
+fn can_estimate_reward_multiplier_impl(approx_median: Option<usize>, duration: usize, expected: Float) {
     let heuristic_ctx = create_default_heuristic_context();
     let solution = VectorSolution::new(vec![], 0., vec![]);
     let search_ctx = SearchContext {
@@ -84,7 +78,7 @@ fn can_estimate_reward_multiplier_impl(
         approx_median,
     };
 
-    let result = estimate_reward_perf_multiplier(&search_ctx, duration, has_improvement);
+    let result = estimate_reward_perf_multiplier(&search_ctx, duration);
 
     assert_eq!(result, expected);
 }

From 3c268c6dce899635568f1e57f3ce795b17c5602b Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Fri, 9 Jan 2026 23:39:33 +0100
Subject: [PATCH 10/30] Fix clippy warning

---
 vrp-core/src/solver/search/recreate/recreate_with_blinks.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs b/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs
index 286a551b9..078cb8d0b 100644
--- a/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs
+++ b/vrp-core/src/solver/search/recreate/recreate_with_blinks.rs
@@ -146,7 +146,7 @@ impl InsertionEvaluator for BlinkInsertionEvaluator {
                 best_in_route
             },
             // Reduce: Merge results from threads (Greedy/Best selection)
-            |a, b| InsertionResult::choose_best_result(a, b),
+            InsertionResult::choose_best_result,
         );
 
         match result {

From afb26d501a3170c890112ada04a79fef1d7a00c7 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Sun, 11 Jan 2026 00:41:25 +0100
Subject: [PATCH 11/30] Redesign dynamic selective heuristic

---
 rosomaxa/src/algorithms/rl/slot_machine.rs    | 120 +++++++---
 rosomaxa/src/hyper/dynamic_selective.rs       | 217 ++++++++++++------
 .../unit/hyper/dynamic_selective_test.rs      |  16 +-
 3 files changed, 252 insertions(+), 101 deletions(-)

diff --git a/rosomaxa/src/algorithms/rl/slot_machine.rs b/rosomaxa/src/algorithms/rl/slot_machine.rs
index 2eca72c89..a3cac2732 100644
--- a/rosomaxa/src/algorithms/rl/slot_machine.rs
+++ b/rosomaxa/src/algorithms/rl/slot_machine.rs
@@ -4,11 +4,11 @@ mod slot_machine_test;
 
 use crate::utils::{DistributionSampler, Float};
 
-/// Represents an action on slot machine;
+/// Represents an action on slot machine.
 pub trait SlotAction {
     /// An environment context.
     type Context;
-    ///  A feedback from taking slot action.
+    /// A feedback from taking slot action.
     type Feedback: SlotFeedback;
 
     /// Take an action for given context and return reward.
@@ -21,23 +21,27 @@ pub trait SlotFeedback {
     fn reward(&self) -> Float;
 }
 
-/// Simulates a slot machine.
-/// Internally tries to estimate reward probability distribution using one of methods from Thompson sampling.
+/// Simulates a slot machine using Non-Stationary Thompson Sampling.
+///
+/// This implementation uses a Normal-Inverse-Gamma (NIG) conjugate prior to model
+/// the unknown mean and variance of the reward distribution. It employs exponential
+/// decay (weighted likelihood) to handle non-stationary environments where the
+/// effectiveness of operators changes over time (e.g., VRP search phases).
 #[derive(Clone)]
 pub struct SlotMachine<A, S> {
-    /// The number of times this slot machine has been tried.
+    /// The number of times this slot machine has been used (telemetry only).
     n: usize,
-    /// Gamma shape parameter.
+    /// Shape parameter (α) of the Inverse-Gamma distribution (tracks sample count/confidence).
     alpha: Float,
-    /// Gamma rate parameter.
+    /// Rate parameter (β) of the Inverse-Gamma distribution (tracks sum of squared errors).
     beta: Float,
-    /// Estimated mean.
+    /// Estimated mean (μ) of the Normal distribution.
     mu: Float,
-    /// Estimated variance.
+    /// Estimated variance (E[σ²]) derived from α and β.
     v: Float,
-    /// Sampler: used to provide samples from underlying estimated distribution.
+    /// Sampler used to draw values from the estimated distribution.
     sampler: S,
-    /// Actual slot action function.
+    /// The actual action associated with this slot.
     action: A,
 }
 
@@ -46,47 +50,109 @@ where
     A: SlotAction + Clone,
     S: DistributionSampler + Clone,
 {
-    /// Creates a new instance of `SlotMachine`.
+    /// Creates a new instance with Universal Priors.
+    ///
+    /// Since the `SearchAgent` externally normalizes rewards to standard scores (Z-scores),
+    /// we can use universal constants for the priors rather than problem-specific guesses.
     pub fn new(prior_mean: Float, action: A, sampler: S) -> Self {
-        let alpha = 1.;
-        let beta = 10.;
-        let mu = prior_mean;
-        let v = beta / (alpha + 1.);
+        // Universal priors for a Standard Normal distribution N(0, 1):
+        // Alpha = 2.0 implies a weak prior belief with mathematically defined variance.
+        // Beta = 1.0 combined with Alpha=2.0 implies an expected variance of ~1.0.
+        let alpha = 2.0;
+        let beta = 1.0;
+
+        // Prior mean is clamped to a reasonable Z-score range (-1 to 1) to prevent
+        // initialization bias, though typically 0.0 is used for centered data.
+        let mu = prior_mean.min(1.0).max(-1.0);
+
+        // Variance expectation for Inverse-Gamma: Beta / (Alpha - 1)
+        // With Alpha=2.0, this results in v=1.0.
+        let v = beta / (alpha - 1.0);
 
         Self { n: 0, alpha, beta, mu, v, action, sampler }
     }
 
-    /// Samples from estimated normal distribution.
+    /// Samples a reward prediction from the estimated Normal-Inverse-Gamma distribution.
+    ///
+    /// 1. Samples precision (τ) from Gamma(α, β).
+    /// 2. Samples reward from Normal(μ, 1/√(τ)).
     pub fn sample(&self) -> Float {
+        // Sample precision from Gamma distribution
         let precision = self.sampler.gamma(self.alpha, 1. / self.beta);
+
+        // Safety: If precision is numerically zero (rare), fallback to high variance
         let precision = if precision == 0. || self.n == 0 { 0.001 } else { precision };
         let variance = 1. / precision;
 
         self.sampler.normal(self.mu, variance.sqrt())
     }
 
-    /// Plays a game by taking action within given context. As result, updates a slot state.
+    /// Plays the slot machine by executing the action within the given context.
     pub fn play(&self, context: A::Context) -> A::Feedback {
         self.action.take(context)
     }
 
-    /// Updates slot machine.
+    /// Updates the internal Bayesian state with a new reward observation.
+    ///
+    /// The update logic performs two key functions:
+    /// 1. **Decay:** Forgets old observations to adapt to the changing search landscape.
+    /// 2. **Bayesian Update:** Refines estimates of Mean and Variance using the new data.
+    ///
+    /// `reward` is expected to be a normalized relative value (e.g., success ≈ 1.0, failure = 0.0).
     pub fn update(&mut self, feedback: &A::Feedback) {
         let reward = feedback.reward();
 
-        let n = 1.;
-        let v = self.n as Float;
+        // --- 1. Memory Decay (Non-Stationarity) ---
 
-        self.alpha += n / 2.;
-        self.beta += (n * v / (v + n)) * (reward - self.mu).powi(2) / 2.;
+        // A decay factor of 0.999 implies a "memory horizon" of ~1000 samples.
+        // This is crucial for VRP where improvements are rare (sparse signals).
+        // It provides enough patience to wait for "lottery ticket" wins while still
+        // allowing the agent to abandon operators that stop working in later phases.
+        const DECAY_FACTOR: Float = 0.999;
 
-        // estimate the variance: calculate running mean from the gamma hyper-parameters
-        self.v = self.beta / (self.alpha + 1.);
+        // Decay the sufficient statistics.
+        // CRITICAL: We clamp alpha to >= 2.0. The variance of the Inverse-Gamma
+        // distribution is defined as Beta / (Alpha - 1). If Alpha <= 1, variance is undefined.
+        // Keeping Alpha >= 2.0 ensures numerical stability and prevents division by zero.
+        self.alpha = (self.alpha * DECAY_FACTOR).max(2.0);
+        self.beta *= DECAY_FACTOR;
+
+        // Increment usage counter (purely for human telemetry/diagnostics).
+        // We do not decay this value so we can track total lifetime usage.
         self.n += 1;
-        self.mu += (reward - self.mu) / self.n as Float;
+
+        // --- 2. Bayesian Update (Normal-Gamma) ---
+
+        // Standard update adds 0.5 to Alpha for each new observation n=1.
+        self.alpha += 0.5;
+        let old_mu = self.mu;
+
+        // Calculate Effective N derived from shape parameter.
+        // In Normal-Gamma, Alpha grows by 0.5 per sample, so N ~ 2 * Alpha.
+        // This avoids maintaining a separate floating-point 'n' variable for the math.
+        let effective_n = self.alpha * 2.0;
+
+        // Update Mean (Mu)
+        // Uses linear interpolation based on the effective sample size.
+        let learning_rate = 1.0 / effective_n;
+        self.mu += learning_rate * (reward - self.mu);
+
+        // Update Variance (Beta)
+        // This is the Bayesian adaptation of Welford's online variance algorithm.
+        // It incrementally updates the sum of squared errors.
+        // The term `effective_n / (effective_n + 1.0)` weights the new sample's
+        // contribution to the variance relative to prior knowledge.
+        self.beta += 0.5 * (reward - old_mu).powi(2) * effective_n / (effective_n + 1.0);
+
+        // --- 3. Variance Estimation ---
+
+        // Calculate expected variance E[σ²] = Beta / (Alpha - 1).
+        // Since we enforced Alpha >= 2.0 (decayed) + 0.5 (update) = 2.5,
+        // the denominator is guaranteed to be >= 1.5. Safe division.
+        self.v = self.beta / (self.alpha - 1.0);
     }
 
-    /// Gets learned params (alpha, beta, mean and variants) and usage amount.
+    /// Gets learned params (alpha, beta, mean, variance) and usage amount.
     pub fn get_params(&self) -> (Float, Float, Float, Float, usize) {
         (self.alpha, self.beta, self.mu, self.v, self.n)
     }
diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs
index 42fc3ac8c..2d28648d9 100644
--- a/rosomaxa/src/hyper/dynamic_selective.rs
+++ b/rosomaxa/src/hyper/dynamic_selective.rs
@@ -51,7 +51,7 @@ where
 
         self.agent.update(generation, &feedback);
 
-        vec![feedback.solution]
+        feedback.solution.into_iter().collect()
     }
 
     fn search_many(&mut self, heuristic_ctx: &Self::Context, solutions: Vec<&Self::Solution>) -> Vec<Self::Solution> {
@@ -69,7 +69,7 @@ where
 
         self.agent.save_params(generation);
 
-        feedbacks.into_iter().map(|feedback| feedback.solution).collect()
+        feedbacks.into_iter().filter_map(|feedback| feedback.solution).collect()
     }
 
     fn diversify(&self, heuristic_ctx: &Self::Context, solution: &Self::Solution) -> Vec<Self::Solution> {
@@ -102,11 +102,15 @@ where
     }
 }
 
-type SlotMachines<'a, C, O, S> = Vec<(SlotMachine<SearchAction<'a, C, O, S>, DefaultDistributionSampler>, String)>;
+/// Type alias for slot machines used in Thompson sampling
+pub type SlotMachines<'a, C, O, S> = Vec<(SlotMachine<SearchAction<'a, C, O, S>, DefaultDistributionSampler>, String)>;
 
-#[derive(PartialEq, Eq, Hash, Clone)]
-enum SearchState {
+#[derive(PartialEq, Eq, Hash, Clone, Debug)]
+/// Search state for Thompson sampling
+pub enum SearchState {
+    /// Best known solution state
     BestKnown,
+    /// Diverse solution state
     Diverse,
 }
 
@@ -119,10 +123,11 @@ impl Display for SearchState {
     }
 }
 
-struct SearchFeedback<S> {
+/// Search feedback result for Thompson sampling
+pub struct SearchFeedback<S> {
     sample: SearchSample,
     slot_idx: usize,
-    solution: S,
+    solution: Option<S>,
 }
 
 impl<S> SlotFeedback for SearchFeedback<S> {
@@ -131,7 +136,8 @@ impl<S> SlotFeedback for SearchFeedback<S> {
     }
 }
 
-struct SearchAction<'a, C, O, S> {
+/// Search action wrapper for Thompson sampling
+pub struct SearchAction<'a, C, O, S> {
     operator: Arc<dyn HeuristicSearchOperator<Context = C, Objective = O, Solution = S> + Send + Sync + 'a>,
     operator_name: String,
 }
@@ -167,11 +173,12 @@ where
 
         let sample = SearchSample { name: self.operator_name.clone(), duration, reward, transition };
 
-        SearchFeedback { sample, slot_idx: context.slot_idx, solution: new_solution }
+        SearchFeedback { sample, slot_idx: context.slot_idx, solution: Some(new_solution) }
     }
 }
 
-struct SearchContext<'a, C, O, S>
+/// Search context for Thompson sampling
+pub struct SearchContext<'a, C, O, S>
 where
     C: HeuristicContext<Objective = O, Solution = S>,
     O: HeuristicObjective<Solution = S>,
@@ -185,7 +192,10 @@ where
 }
 
 struct SearchAgent<'a, C, O, S> {
+    // Separate learning contexts for different search phases
     slot_machines: HashMap<SearchState, SlotMachines<'a, C, O, S>>,
+    // Shared scale invariance (universal physics)
+    signal_stats: SignalStats,
     tracker: HeuristicTracker,
     random: Arc<dyn Random>,
 }
@@ -197,56 +207,52 @@ where
     S: HeuristicSolution + 'a,
 {
     pub fn new(search_operators: HeuristicSearchOperators<C, O, S>, environment: &Environment) -> Self {
-        // Normalize weights to achievable reward scale to prevent optimistic bias locking
-        // Find max weight for scaling
+        // Calculate scaling factor to normalize operator weights to target range
         let max_weight = search_operators.iter().map(|(_, _, weight)| *weight).fold(0.0_f64, |a, b| a.max(b));
-
-        // Target max prior: rewards typically [0.75, 4.5], max theoretical ~9-10
-        // 4.0 is optimistic but achievable for good operators
         const TARGET_MAX_PRIOR: Float = 4.0;
         let scale = if max_weight > 0.0 { TARGET_MAX_PRIOR / max_weight } else { 1.0 };
 
-        let slot_machines = search_operators
-            .into_iter()
-            .map(|(operator, name, initial_weight)| {
-                // Scale weight to reward range while preserving hierarchy
-                // E.g., weight 21.0 -> 4.0, weight 2.0 -> 0.38
-                let prior_mean = initial_weight * scale;
-                (
-                    SlotMachine::new(
-                        prior_mean,
-                        SearchAction { operator, operator_name: name.to_string() },
-                        DefaultDistributionSampler::new(environment.random.clone()),
-                    ),
-                    name,
-                )
-            })
-            .collect::<Vec<_>>();
+        // Factory function to create identical slot configurations for each state
+        let create_slots = || {
+            search_operators
+                .iter()
+                .map(|(operator, name, initial_weight)| {
+                    let prior_mean = initial_weight * scale;
+                    (
+                        SlotMachine::new(
+                            prior_mean,
+                            SearchAction { operator: operator.clone(), operator_name: name.to_string() },
+                            DefaultDistributionSampler::new(environment.random.clone()),
+                        ),
+                        name.clone(),
+                    )
+                })
+                .collect::<Vec<_>>()
+        };
 
-        let slot_machines = once((SearchState::BestKnown, slot_machines.clone()))
-            .chain(once((SearchState::Diverse, slot_machines)))
+        // Initialize separate states with identical priors but independent learning
+        let slot_machines = once((SearchState::BestKnown, create_slots()))
+            .chain(once((SearchState::Diverse, create_slots())))
             .collect();
 
         Self {
             slot_machines,
-            tracker: HeuristicTracker {
-                total_median: RemedianUsize::new(11, 7, |a, b| a.cmp(b)),
-                search_telemetry: Default::default(),
-                heuristic_telemetry: Default::default(),
-                is_experimental: environment.is_experimental,
-            },
+            signal_stats: SignalStats::new(),
+            tracker: HeuristicTracker::new(environment.is_experimental),
             random: environment.random.clone(),
         }
     }
 
     /// Picks relevant search operator based on learnings and runs the search.
     pub fn search(&self, heuristic_ctx: &C, solution: &S) -> SearchFeedback<S> {
+        // Determine search context - critical for operator selection
         let from = if matches!(compare_to_best(heuristic_ctx, solution), Ordering::Equal) {
             SearchState::BestKnown
         } else {
             SearchState::Diverse
         };
 
+        // Get contextually appropriate slot machines
         let (slot_idx, slot_machine) = self
             .slot_machines
             .get(&from)
@@ -258,18 +264,45 @@ where
 
         let approx_median = self.tracker.approx_median();
 
+        // Execute with full context information
         slot_machine.play(SearchContext { heuristic_ctx, from, slot_idx, solution, approx_median })
     }
 
-    /// Updates estimations based on search feedback.
+    /// Updates estimations based on search feedback with protected signal baseline.
     pub fn update(&mut self, generation: usize, feedback: &SearchFeedback<S>) {
-        let from = &feedback.sample.transition.0;
-
-        if let Some(slots) = self.slot_machines.get_mut(from) {
-            slots[feedback.slot_idx].0.update(feedback);
+        let raw_reward = feedback.sample.reward;
+        let current_scale = self.signal_stats.scale();
+
+        // 1. Update Shared Signal Baseline (Protected)
+        // Clamp observation to 3x current scale to prevent a single
+        // "Best Known Jackpot" from disrupting baseline for Diverse state
+        if raw_reward > f64::EPSILON {
+            self.signal_stats.update(raw_reward.min(current_scale * 3.0));
         }
 
-        self.tracker.observe_sample(generation, feedback.sample.clone())
+        // 2. Normalize for Contextual Learning (Uncapped)
+        // If we hit a jackpot (e.g., reward 60.0 vs scale 1.0), we pass the full 60.0.
+        // This ensures the specific slot machine learns "I AM THE BEST"
+        let normalized_reward = if raw_reward > f64::EPSILON {
+            raw_reward / self.signal_stats.scale()
+        } else {
+            // Zero reward strategy: don't penalize, just indicate no progress
+            // Allows variance in "good" operators to keep them alive vs "bad" operators
+            0.0
+        };
+        let normalized_feedback = SearchFeedback {
+            sample: SearchSample { reward: normalized_reward, ..feedback.sample.clone() },
+            slot_idx: feedback.slot_idx,
+            solution: None,
+        };
+
+        // 3. Update Contextual Slot Machine
+        let from = &feedback.sample.transition.0;
+        let slots = self.slot_machines.get_mut(from).expect("cannot get slot machines");
+        slots[feedback.slot_idx].0.update(&normalized_feedback);
+
+        // 4. Observe telemetry
+        self.tracker.observe_sample(generation, feedback.sample.clone());
     }
 
     /// Updates statistics about heuristic internal parameters.
@@ -279,18 +312,36 @@ where
         }
 
         self.slot_machines.iter().for_each(|(state, slots)| {
-            slots.iter().map(|(slot, name)| (name.clone(), slot.get_params())).for_each(
-                |(name, (alpha, beta, mu, v, n))| {
-                    self.tracker.observe_params(
-                        generation,
-                        HeuristicSample { state: state.clone(), name, alpha, beta, mu, v, n },
-                    );
-                },
-            )
+            slots.iter().for_each(|(slot, name)| {
+                let (alpha, beta, mu, v, n) = slot.get_params();
+                self.tracker.observe_params(
+                    generation,
+                    HeuristicSample { state: state.clone(), name: name.clone(), alpha, beta, mu, v, n },
+                );
+            });
         });
     }
 }
 
+/// Sample of search telemetry.
+#[derive(Clone)]
+struct SearchSample {
+    name: String,
+    duration: usize,
+    reward: Float,
+    transition: (SearchState, SearchState),
+}
+
+struct HeuristicSample {
+    state: SearchState,
+    name: String,
+    alpha: Float,
+    beta: Float,
+    mu: Float,
+    v: Float,
+    n: usize,
+}
+
 impl<C, O, S> Display for DynamicSelective<C, O, S>
 where
     C: HeuristicContext<Objective = O, Solution = S>,
@@ -448,26 +499,46 @@ where
     value * sign * priority_amplifier
 }
 
-/// Sample of search telemetry.
+/// Signal tracker that observes ONLY positive values to establish baseline for "Success".
+/// Uses exponential moving average for stability in sparse signals.
 #[derive(Clone)]
-struct SearchSample {
-    name: String,
-    duration: usize,
-    reward: Float,
-    transition: (SearchState, SearchState),
+struct SignalStats {
+    mean: Float,
+    n: Float,
 }
 
-struct HeuristicSample {
-    state: SearchState,
-    name: String,
-    alpha: Float,
-    beta: Float,
-    mu: Float,
-    v: Float,
-    n: usize,
+impl SignalStats {
+    fn new() -> Self {
+        Self { mean: 0.0, n: 0.0 }
+    }
+
+    /// Observe ONLY positive values to establish a baseline for "Success"
+    fn update(&mut self, value: Float) {
+        if value <= f64::EPSILON {
+            return;
+        }
+
+        // Horizon: Adapt to the scale of the last ~200 SUCCESSFUL operations
+        // This is structural (adaptation speed), not problem-specific.
+        let window_size = 200.0;
+        let decay = 1.0 - (1.0 / window_size);
+
+        self.n = self.n * decay + 1.0;
+
+        // Exponential Moving Average of the Magnitude
+        // We use this instead of Welford's variance for stability in sparse signals
+        let learning_rate = 1.0 / self.n;
+        self.mean = self.mean * (1.0 - learning_rate) + value * learning_rate;
+    }
+
+    /// Returns the scale factor.
+    /// If we haven't seen enough data, return 1.0 to avoid division by zero.
+    fn scale(&self) -> Float {
+        if self.mean < f64::EPSILON { 1.0 } else { self.mean }
+    }
 }
 
-/// Provides way to track heuristic's telemetry and duration median estimation.
+/// Enhanced diagnostic tracker for Thompson sampling analysis
 struct HeuristicTracker {
     total_median: RemedianUsize,
     search_telemetry: Vec<(usize, SearchSample)>,
@@ -476,6 +547,16 @@ struct HeuristicTracker {
 }
 
 impl HeuristicTracker {
+    /// Creates new tracker with diagnostic configuration
+    pub fn new(is_experimental: bool) -> Self {
+        Self {
+            total_median: RemedianUsize::new(11, 7, |a, b| a.cmp(b)),
+            search_telemetry: Default::default(),
+            heuristic_telemetry: Default::default(),
+            is_experimental,
+        }
+    }
+
     /// Returns true if telemetry is enabled.
     pub fn telemetry_enabled(&self) -> bool {
         self.is_experimental
diff --git a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs
index 7d4958e89..814091be0 100644
--- a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs
+++ b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs
@@ -87,17 +87,21 @@ fn can_estimate_reward_multiplier_impl(approx_median: Option<usize>, duration: u
 fn can_display_heuristic_info() {
     let is_experimental = true;
     let environment = Environment { is_experimental, ..Environment::default() };
-    let duration = 1;
-    let reward = 1.;
-    let transition = (SearchState::Diverse, SearchState::BestKnown);
-    let mut heuristic =
+    let heuristic =
         DynamicSelective::<VectorContext, VectorObjective, VectorSolution>::new(vec![], vec![], &environment);
 
-    heuristic.agent.tracker.observe_sample(1, SearchSample { name: "name1".to_string(), duration, reward, transition });
+    // Test that diagnostic system is properly initialized
+    assert_eq!(heuristic.agent.tracker.telemetry_enabled(), is_experimental);
 
     let formatted = format!("{heuristic}");
 
-    assert!(!formatted.is_empty());
+    // Should contain TELEMETRY section when experimental mode is enabled
+    if is_experimental {
+        assert!(formatted.contains("TELEMETRY"));
+    } else {
+        // When not experimental, should be empty or minimal
+        assert!(formatted.is_empty() || !formatted.contains("thompson_diagnostics:"));
+    }
 }
 
 #[test]

From 604c7e1ce5bd4ea8b7f240b242d6d1cda2343d10 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Sun, 11 Jan 2026 13:15:40 +0100
Subject: [PATCH 12/30] Change reward multiplier calculations

---
 rosomaxa/src/hyper/dynamic_selective.rs | 70 ++++++++++++++++++-------
 1 file changed, 50 insertions(+), 20 deletions(-)

diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs
index 2d28648d9..92da75a38 100644
--- a/rosomaxa/src/hyper/dynamic_selective.rs
+++ b/rosomaxa/src/hyper/dynamic_selective.rs
@@ -423,36 +423,66 @@ where
         .unwrap_or(0.)
 }
 
-/// Estimates performance of used operation based on its duration and overall improvement statistics.
-/// Returns a reward multiplier in `(~0.5, 3]` range.
+/// Estimates performance of the used operation based on its duration and the current search phase.
+/// Returns a reward multiplier in the range `[0.8, 1.2]`.
+///
+/// # Strategy: Context-Aware Time Dilation
+///
+/// This function balances **Throughput (Speed)** vs. **Depth (Quality)** by adapting to the
+/// current `improvement_1000_ratio`:
+///
+/// 1.  **Flow State (High Improvement):** When the solver is finding frequent improvements,
+///     we prioritize **Efficiency**. Fast operators are rewarded, and slow operators are penalized.
+///     This maximizes the generation of diverse individuals to populate the pool.
+/// 2.  **Stagnation (Low Improvement):** When the solver is stuck, we prioritize **Power**.
+///     The time penalty is dampened or removed. This ensures that "Heavy" operators (e.g.,
+///     complex Ruin & Recreate), which are naturally slower but capable of escaping local optima,
+///     are not unfairly penalized against faster, ineffective local search operators.
+///
+/// # Logic
+///
+/// *   **Continuous Scaling:** Uses a logarithmic curve to avoid artificial "cliffs" in reward.
+/// *   **Phase Damping:** The time modifier is scaled by the improvement ratio.
+/// *   **Safety Clamp:** The final multiplier is bounded to `+/- 20%` to ensure that execution
+///     time never overrides the actual quality signal (distance improvement).
 fn estimate_reward_perf_multiplier<C, O, S>(search_ctx: &SearchContext<C, O, S>, duration: usize) -> Float
 where
     C: HeuristicContext<Objective = O, Solution = S>,
     O: HeuristicObjective<Solution = S>,
     S: HeuristicSolution,
 {
-    let improvement_ratio = search_ctx.heuristic_ctx.statistics().improvement_1000_ratio;
+    let stats = search_ctx.heuristic_ctx.statistics();
+    let improvement_ratio = stats.improvement_1000_ratio;
+
     let approx_median = &search_ctx.approx_median;
-    let median_ratio =
-        approx_median.map_or(1., |median| if median == 0 { 1. } else { duration as Float / median as Float });
-
-    let median_ratio = match median_ratio.clamp(0.5, 2.) {
-        ratio if ratio < 0.75 => 1.5, // Allegro
-        ratio if ratio < 1. => 1.25,  // Allegretto
-        ratio if ratio > 1.5 => 0.75, // Andante
-        _ => 1.,                      // Moderato
+    let median = match approx_median {
+        Some(m) if *m > 0 => *m as Float,
+        _ => return 1.0,
     };
 
-    let improvement_ratio = match improvement_ratio {
-        // stagnation: increase reward
-        ratio if ratio < 0.05 => 2.,
-        // fast convergence: decrease reward
-        ratio if ratio > 0.150 => 0.75,
-        // moderate convergence
-        _ => 1.,
-    };
+    let time_ratio = duration as Float / median;
+
+    // 1. Calculate the raw time modifier (Logarithmic)
+    // Fast (0.5x) -> +0.15 reward
+    // Slow (2.0x) -> -0.15 reward
+    // We use a gentle curve so we don't distort the signal too much.
+    let raw_modifier = (1.0 / time_ratio).ln() * 0.15;
+
+    // 2. Apply Phase-Dependent Damping
+    // If improvement is HIGH (> 0.1), we care about speed. Damping = 1.0.
+    // If improvement is LOW (< 0.001), we ignore speed. Damping = 0.0.
+    // This allows slow, heavy operators to "catch up" in ranking when the easy gains are gone.
+
+    // Smooth transition from 0.0 to 1.0 based on improvement ratio
+    // We saturate at 0.1 (10% improvement is considered "Fast Flow")
+    let phase_damping = (improvement_ratio * 10.0).clamp(0.0, 1.0);
+
+    // Apply damping
+    let final_modifier = raw_modifier * phase_damping;
 
-    median_ratio * improvement_ratio
+    // 3. Final Safety Clamp
+    // Ensure we never boost/penalize by more than 20%, regardless of how extreme the time is.
+    (1.0 + final_modifier).clamp(0.8, 1.2)
 }
 
 /// Returns distance in `[-N., N]` where:

From 0cc6ed623e101f45c0ae25a4187c383b1e875d2c Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Sun, 11 Jan 2026 17:05:40 +0100
Subject: [PATCH 13/30] Improve predictability of dynamic heuristic

---
 rosomaxa/src/algorithms/rl/slot_machine.rs |  10 +-
 rosomaxa/src/hyper/dynamic_selective.rs    | 177 ++++++++++++++++-----
 2 files changed, 139 insertions(+), 48 deletions(-)

diff --git a/rosomaxa/src/algorithms/rl/slot_machine.rs b/rosomaxa/src/algorithms/rl/slot_machine.rs
index a3cac2732..cb314b0fe 100644
--- a/rosomaxa/src/algorithms/rl/slot_machine.rs
+++ b/rosomaxa/src/algorithms/rl/slot_machine.rs
@@ -61,12 +61,12 @@ where
         let alpha = 2.0;
         let beta = 1.0;
 
-        // Prior mean is clamped to a reasonable Z-score range (-1 to 1) to prevent
-        // initialization bias, though typically 0.0 is used for centered data.
-        let mu = prior_mean.min(1.0).max(-1.0);
+        // Prior mean is clamped to a reasonable Z-score range (-0 to 10)
+        let mu = prior_mean.max(0.01).min(10.0);
 
-        // Variance expectation for Inverse-Gamma: Beta / (Alpha - 1)
-        // With Alpha=2.0, this results in v=1.0.
+        // Variance expectation v = Beta / (Alpha - 1) = 1.0.
+        // This implies we are "uncertain" by about +/- 1.0 standard deviation unit,
+        // which allows the bandit to explore even if one operator starts ahead.
         let v = beta / (alpha - 1.0);
 
         Self { n: 0, alpha, beta, mu, v, action, sampler }
diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs
index 92da75a38..3586d4520 100644
--- a/rosomaxa/src/hyper/dynamic_selective.rs
+++ b/rosomaxa/src/hyper/dynamic_selective.rs
@@ -105,6 +105,60 @@ where
 /// Type alias for slot machines used in Thompson sampling
 pub type SlotMachines<'a, C, O, S> = Vec<(SlotMachine<SearchAction<'a, C, O, S>, DefaultDistributionSampler>, String)>;
 
+/// Centralized "Physics" for the Reward System.
+///
+/// This struct defines the constants that control how rewards are calculated,
+/// scaled, and clamped. Changing values here automatically propagates to
+/// reward estimation and signal normalization logic.
+struct SearchRewards;
+
+impl SearchRewards {
+    /// The base value for a standard improvement (Local Search success).
+    /// Used as the atomic unit for signal normalization.
+    pub const BASE_REWARD: Float = 2.0;
+
+    /// The offset added to relative distance to ensure positive rewards.
+    /// Logic: `(distance + OFFSET)`.
+    pub const DISTANCE_OFFSET: Float = 1.0;
+
+    /// Multiplier applied when a solution improves the Global Best Known.
+    /// This is the "Jackpot" factor.
+    pub const GLOBAL_BEST_MULTIPLIER: Float = 2.5;
+
+    /// Multiplier applied when a solution is diverse but not a global improvement.
+    /// Keeps "Diverse" operators alive but with low signal.
+    pub const DIVERSE_MULTIPLIER: Float = 0.05;
+
+    /// The maximum percentage (+/-) that execution duration can affect the reward.
+    /// e.g., 0.2 means performance can scale reward by [0.8, 1.2].
+    pub const PERF_TOLERANCE: Float = 0.2;
+
+    /// The "Cheap Failure"
+    /// The penalty applied when an operator produces no improvement.
+    /// A small negative value ensures that "doing nothing" is worse than "finding diverse solutions".
+    /// This forces the Slot Machine to eventually lower the confidence of stagnating operators.
+    pub const PENALTY_MIN: Float = -0.01;
+
+    /// The "Expensive Failure"
+    /// The penalty applied when an operator produces a negative outcome.
+    /// A larger negative value ensures that failures are strongly discouraged.
+    pub const PENALTY_MAX: Float = -0.1;
+
+    /// Calculates the ratio between the "Jackpot" and the "Normal" signal.
+    /// Used by the Agent to determine how many times larger than the average
+    /// a signal must be to be considered an "Outlier" that needs clamping.
+    pub fn signal_clamp_ratio(max_dist: Float) -> Float {
+        // Recalculate based on new constants
+        let base_term = max_dist + Self::DISTANCE_OFFSET;
+        let global_term = base_term * Self::GLOBAL_BEST_MULTIPLIER;
+        let max_theoretical = (base_term + global_term) * (1.0 + Self::PERF_TOLERANCE);
+
+        let typical_reward = (1.0 + Self::DISTANCE_OFFSET) * Self::BASE_REWARD;
+
+        max_theoretical / typical_reward
+    }
+}
+
 #[derive(PartialEq, Eq, Hash, Clone, Debug)]
 /// Search state for Thompson sampling
 pub enum SearchState {
@@ -161,13 +215,42 @@ where
         let (new_solution, duration) =
             Timer::measure_duration(|| self.operator.search(context.heuristic_ctx, context.solution));
 
-        let is_new_best = compare_to_best(context.heuristic_ctx, &new_solution) == Ordering::Less;
         let duration = duration.as_millis() as usize;
 
-        let base_reward = estimate_distance_reward(context.heuristic_ctx, context.solution, &new_solution);
-        let reward_multiplier = estimate_reward_perf_multiplier(&context, duration);
-        let reward = base_reward * reward_multiplier;
+        // 1. Analyze Result
+        let distance_score = estimate_distance_reward(context.heuristic_ctx, context.solution, &new_solution);
 
+        // 2. Calculate Final Reward / Penalty
+        let reward = match distance_score {
+            // SUCCESS: Apply performance multiplier (Bonus/Penalty within +/- 20%)
+            Some(base_score) => {
+                let mult = estimate_reward_perf_multiplier(&context, duration);
+                base_score * mult
+            }
+            // FAILURE: Apply Time-Weighted Penalty
+            None => {
+                // Get median (defensive default to duration itself to avoid div/0)
+                let median = context.approx_median.unwrap_or(duration).max(1) as Float;
+                let ratio = duration as Float / median;
+
+                // Logic:
+                // Ratio 0.2 (Fast) -> -0.1 * 0.2 = -0.02 (Too small, clamp to MIN) -> -0.1
+                // Ratio 1.0 (Avg)  -> -0.1 * 1.0 = -0.1
+                // Ratio 5.0 (Slow) -> -0.1 * 5.0 = -0.5
+                // Ratio 20.0 (Very Slow) -> -2.0 (Clamp to MAX) -> -1.0
+
+                // Base penalty unit: -0.1 per "Median Unit of Time"
+                let raw_penalty = SearchRewards::PENALTY_MIN * ratio;
+
+                // Clamp to the range [-1.0, -0.1]
+                // Note: min/max semantics with negative numbers:
+                // max(-1.0) ensures we don't go below -1.
+                // min(-0.1) ensures we don't go above -0.1.
+                raw_penalty.max(SearchRewards::PENALTY_MAX).min(SearchRewards::PENALTY_MIN)
+            }
+        };
+
+        let is_new_best = compare_to_best(context.heuristic_ctx, &new_solution) == Ordering::Less;
         let to = if is_new_best { SearchState::BestKnown } else { SearchState::Diverse };
         let transition = (context.from, to);
 
@@ -207,10 +290,17 @@ where
     S: HeuristicSolution + 'a,
 {
     pub fn new(search_operators: HeuristicSearchOperators<C, O, S>, environment: &Environment) -> Self {
-        // Calculate scaling factor to normalize operator weights to target range
-        let max_weight = search_operators.iter().map(|(_, _, weight)| *weight).fold(0.0_f64, |a, b| a.max(b));
-        const TARGET_MAX_PRIOR: Float = 4.0;
-        let scale = if max_weight > 0.0 { TARGET_MAX_PRIOR / max_weight } else { 1.0 };
+        // Calculate the Mean Weight
+        // We want the "Average Operator" to have a prior mu = 1.0.
+        // This aligns with SignalStats which normalizes the average reward to 1.0.
+        let total_weight: Float = search_operators.iter().map(|(_, _, w)| *w).sum();
+        let count = search_operators.len() as Float;
+
+        let avg_weight = if count > 0.0 { total_weight / count } else { 1.0 };
+
+        // Avoid division by zero if weights are weird
+        let target_mean = SearchRewards::BASE_REWARD;
+        let scale = if avg_weight > f64::EPSILON { target_mean / avg_weight } else { target_mean };
 
         // Factory function to create identical slot configurations for each state
         let create_slots = || {
@@ -270,26 +360,21 @@ where
 
     /// Updates estimations based on search feedback with protected signal baseline.
     pub fn update(&mut self, generation: usize, feedback: &SearchFeedback<S>) {
+        let max_dist = feedback.solution.as_ref().map_or(1., |s| s.fitness().count() as Float);
         let raw_reward = feedback.sample.reward;
         let current_scale = self.signal_stats.scale();
 
-        // 1. Update Shared Signal Baseline (Protected)
-        // Clamp observation to 3x current scale to prevent a single
-        // "Best Known Jackpot" from disrupting baseline for Diverse state
+        // 1. Update Ruler (PHYSICS)
+        // We only update the "Scale of Success" based on positive outcomes.
+        // We do NOT shrink the ruler just because we failed.
         if raw_reward > f64::EPSILON {
-            self.signal_stats.update(raw_reward.min(current_scale * 3.0));
+            let clamp_limit = current_scale * SearchRewards::signal_clamp_ratio(max_dist);
+            self.signal_stats.update(raw_reward.min(clamp_limit));
         }
 
-        // 2. Normalize for Contextual Learning (Uncapped)
-        // If we hit a jackpot (e.g., reward 60.0 vs scale 1.0), we pass the full 60.0.
-        // This ensures the specific slot machine learns "I AM THE BEST"
-        let normalized_reward = if raw_reward > f64::EPSILON {
-            raw_reward / self.signal_stats.scale()
-        } else {
-            // Zero reward strategy: don't penalize, just indicate no progress
-            // Allows variance in "good" operators to keep them alive vs "bad" operators
-            0.0
-        };
+        // 2. Normalize (Adaptive)
+        let normalized_reward =
+            if raw_reward.abs() > f64::EPSILON { raw_reward / self.signal_stats.scale() } else { 0.0 };
         let normalized_feedback = SearchFeedback {
             sample: SearchSample { reward: normalized_reward, ..feedback.sample.clone() },
             slot_idx: feedback.slot_idx,
@@ -390,10 +475,8 @@ where
 }
 
 /// Estimates new solution discovery reward based on distance metric.
-/// Returns a reward estimation in `[0, 6]` range. This range consists of:
-/// - a initial distance improvement gives `[0, 2]`
-/// - a best known improvement gives `[0, 2]` * BEST_DISCOVERY_REWARD_MULTIPLIER
-fn estimate_distance_reward<C, O, S>(heuristic_ctx: &C, initial_solution: &S, new_solution: &S) -> Float
+/// Returns `Some(reward)` for improvement, or `None` for stagnation.
+fn estimate_distance_reward<C, O, S>(heuristic_ctx: &C, initial_solution: &S, new_solution: &S) -> Option<Float>
 where
     C: HeuristicContext<Objective = O, Solution = S>,
     O: HeuristicObjective<Solution = S>,
@@ -403,24 +486,30 @@ where
         .ranked()
         .next()
         .map(|best_known| {
-            const BEST_DISCOVERY_REWARD_MULTIPLIER: Float = 2.;
-            const DIVERSE_DISCOVERY_REWARD_MULTIPLIER: Float = 0.05;
-
             let objective = heuristic_ctx.objective();
 
+            // Calculate normalized relative distances [0, 1]
             let distance_initial = get_relative_distance(objective, new_solution, initial_solution);
             let distance_best = get_relative_distance(objective, new_solution, best_known);
 
-            // NOTE remap distances to range [0, 2]
+            // Reward Components (Max ~4.0 for Local, ~10.0 for Global)
+            let reward_initial = (distance_initial + SearchRewards::DISTANCE_OFFSET) * SearchRewards::BASE_REWARD;
+            let reward_best = (distance_best + SearchRewards::DISTANCE_OFFSET)
+                * SearchRewards::BASE_REWARD
+                * SearchRewards::GLOBAL_BEST_MULTIPLIER;
+
             match (distance_initial.total_cmp(&0.), distance_best.total_cmp(&0.)) {
-                (Ordering::Greater, Ordering::Greater) => {
-                    (distance_initial + 1.) + (distance_best + 1.) * BEST_DISCOVERY_REWARD_MULTIPLIER
-                }
-                (Ordering::Greater, _) => (distance_initial + 1.) * DIVERSE_DISCOVERY_REWARD_MULTIPLIER,
-                _ => 0.,
+                // Global Jackpot
+                (Ordering::Greater, Ordering::Greater) => Some(reward_initial + reward_best),
+
+                // Local/Diverse Improvement
+                (Ordering::Greater, _) => Some(reward_initial * SearchRewards::DIVERSE_MULTIPLIER),
+
+                // Stagnation
+                _ => None,
             }
         })
-        .unwrap_or(0.)
+        .unwrap_or(None)
 }
 
 /// Estimates performance of the used operation based on its duration and the current search phase.
@@ -480,14 +569,14 @@ where
     // Apply damping
     let final_modifier = raw_modifier * phase_damping;
 
-    // 3. Final Safety Clamp
-    // Ensure we never boost/penalize by more than 20%, regardless of how extreme the time is.
-    (1.0 + final_modifier).clamp(0.8, 1.2)
+    // 3. Final Safety Clamp defined by Reward Physics
+    let tolerance = SearchRewards::PERF_TOLERANCE;
+    (1.0 + final_modifier).clamp(1.0 - tolerance, 1.0 + tolerance)
 }
 
-/// Returns distance in `[-N., N]` where:
-///  - N: in range `[1, total amount of objectives]`
-///  - sign specifies whether a solution is better (positive) or worse (negative).
+/// Returns normalized distance in `[0.0, 1.0]` (absolute magnitude)
+/// where 1.0 = Improvement on Primary Objective.
+/// Returns negative value if worse.
 fn get_relative_distance<O, S>(objective: &O, a: &S, b: &S) -> Float
 where
     O: HeuristicObjective<Solution = S>,
@@ -517,7 +606,9 @@ where
     let total_objectives = a.fitness().count();
     assert_ne!(total_objectives, 0, "cannot have an empty objective here");
     assert_ne!(total_objectives, idx, "cannot have the index equal to total amount of objectives");
-    let priority_amplifier = (total_objectives - idx) as Float;
+
+    // Normalization: Divide by total_objectives to map to [0, 1]
+    let priority_amplifier = (total_objectives - idx) as Float / total_objectives as Float;
 
     let value = a
         .fitness()

From 647d4e6343c097073ce00c9258d5cdd7d11f27ed Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Sun, 11 Jan 2026 18:23:55 +0100
Subject: [PATCH 14/30] Improve comments

---
 rosomaxa/src/hyper/dynamic_selective.rs | 234 +++++++++++-------------
 1 file changed, 104 insertions(+), 130 deletions(-)

diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs
index 3586d4520..3fec3874f 100644
--- a/rosomaxa/src/hyper/dynamic_selective.rs
+++ b/rosomaxa/src/hyper/dynamic_selective.rs
@@ -24,7 +24,7 @@ pub type HeuristicDiversifyOperators<C, O, S> =
 
 /// An experimental dynamic selective hyper heuristic which selects inner heuristics
 /// based on how they work during the search. The selection process is modeled using reinforcement
-/// learning technics.
+/// learning techniques.
 pub struct DynamicSelective<C, O, S>
 where
     C: HeuristicContext<Objective = O, Solution = S>,
@@ -102,7 +102,7 @@ where
     }
 }
 
-/// Type alias for slot machines used in Thompson sampling
+/// Type alias for slot machines used in Thompson sampling.
 pub type SlotMachines<'a, C, O, S> = Vec<(SlotMachine<SearchAction<'a, C, O, S>, DefaultDistributionSampler>, String)>;
 
 /// Centralized "Physics" for the Reward System.
@@ -159,12 +159,12 @@ impl SearchRewards {
     }
 }
 
+/// Search state for Thompson sampling.
 #[derive(PartialEq, Eq, Hash, Clone, Debug)]
-/// Search state for Thompson sampling
 pub enum SearchState {
-    /// Best known solution state
+    /// Best known solution state.
     BestKnown,
-    /// Diverse solution state
+    /// Diverse solution state.
     Diverse,
 }
 
@@ -177,7 +177,7 @@ impl Display for SearchState {
     }
 }
 
-/// Search feedback result for Thompson sampling
+/// Search feedback result for Thompson sampling.
 pub struct SearchFeedback<S> {
     sample: SearchSample,
     slot_idx: usize,
@@ -190,7 +190,7 @@ impl<S> SlotFeedback for SearchFeedback<S> {
     }
 }
 
-/// Search action wrapper for Thompson sampling
+/// Search action wrapper for Thompson sampling.
 pub struct SearchAction<'a, C, O, S> {
     operator: Arc<dyn HeuristicSearchOperator<Context = C, Objective = O, Solution = S> + Send + Sync + 'a>,
     operator_name: String,
@@ -217,35 +217,36 @@ where
 
         let duration = duration.as_millis() as usize;
 
-        // 1. Analyze Result
+        // 1. Analyze result.
         let distance_score = estimate_distance_reward(context.heuristic_ctx, context.solution, &new_solution);
 
-        // 2. Calculate Final Reward / Penalty
+        // 2. Calculate final reward / penalty.
         let reward = match distance_score {
-            // SUCCESS: Apply performance multiplier (Bonus/Penalty within +/- 20%)
+            // Success: Apply performance multiplier (bonus/penalty within +/- 20%).
             Some(base_score) => {
                 let mult = estimate_reward_perf_multiplier(&context, duration);
                 base_score * mult
             }
-            // FAILURE: Apply Time-Weighted Penalty
+            // Failure: Apply time-weighted penalty.
             None => {
-                // Get median (defensive default to duration itself to avoid div/0)
+                // Get median (defensive default to duration itself to avoid div/0).
                 let median = context.approx_median.unwrap_or(duration).max(1) as Float;
                 let ratio = duration as Float / median;
 
                 // Logic:
-                // Ratio 0.2 (Fast) -> -0.1 * 0.2 = -0.02 (Too small, clamp to MIN) -> -0.1
-                // Ratio 1.0 (Avg)  -> -0.1 * 1.0 = -0.1
-                // Ratio 5.0 (Slow) -> -0.1 * 5.0 = -0.5
-                // Ratio 20.0 (Very Slow) -> -2.0 (Clamp to MAX) -> -1.0
+                // Ratio 0.2 (Fast)  -> -0.01 * 0.2 = -0.002 (Too small, clamp to MIN) -> -0.01
+                // Ratio 1.0 (Avg)   -> -0.01 * 1.0 = -0.01
+                // Ratio 5.0 (Slow)  -> -0.01 * 5.0 = -0.05
+                // Ratio 10.0 (Slower) -> -0.01 * 10.0 = -0.1 (At MAX boundary)
+                // Ratio 20.0 (Very Slow) -> -0.01 * 20.0 = -0.2 (Clamp to MAX) -> -0.1
 
-                // Base penalty unit: -0.1 per "Median Unit of Time"
+                // Base penalty unit: PENALTY_MIN (-0.01) per "Median Unit of Time".
                 let raw_penalty = SearchRewards::PENALTY_MIN * ratio;
 
-                // Clamp to the range [-1.0, -0.1]
+                // Clamp to the range [PENALTY_MAX, PENALTY_MIN] = [-0.1, -0.01].
                 // Note: min/max semantics with negative numbers:
-                // max(-1.0) ensures we don't go below -1.
-                // min(-0.1) ensures we don't go above -0.1.
+                // - max(PENALTY_MAX) ensures we don't go below -0.1 (more negative).
+                // - min(PENALTY_MIN) ensures we don't go above -0.01 (less negative).
                 raw_penalty.max(SearchRewards::PENALTY_MAX).min(SearchRewards::PENALTY_MIN)
             }
         };
@@ -260,7 +261,7 @@ where
     }
 }
 
-/// Search context for Thompson sampling
+/// Search context for Thompson sampling.
 pub struct SearchContext<'a, C, O, S>
 where
     C: HeuristicContext<Objective = O, Solution = S>,
@@ -333,16 +334,16 @@ where
         }
     }
 
-    /// Picks relevant search operator based on learnings and runs the search.
+    /// Picks the relevant search operator based on learnings and runs the search.
     pub fn search(&self, heuristic_ctx: &C, solution: &S) -> SearchFeedback<S> {
-        // Determine search context - critical for operator selection
+        // Determine search context - critical for operator selection.
         let from = if matches!(compare_to_best(heuristic_ctx, solution), Ordering::Equal) {
             SearchState::BestKnown
         } else {
             SearchState::Diverse
         };
 
-        // Get contextually appropriate slot machines
+        // Get contextually appropriate slot machines.
         let (slot_idx, slot_machine) = self
             .slot_machines
             .get(&from)
@@ -354,7 +355,7 @@ where
 
         let approx_median = self.tracker.approx_median();
 
-        // Execute with full context information
+        // Execute with full context information.
         slot_machine.play(SearchContext { heuristic_ctx, from, slot_idx, solution, approx_median })
     }
 
@@ -364,7 +365,7 @@ where
         let raw_reward = feedback.sample.reward;
         let current_scale = self.signal_stats.scale();
 
-        // 1. Update Ruler (PHYSICS)
+        // 1. Update ruler.
         // We only update the "Scale of Success" based on positive outcomes.
         // We do NOT shrink the ruler just because we failed.
         if raw_reward > f64::EPSILON {
@@ -372,7 +373,7 @@ where
             self.signal_stats.update(raw_reward.min(clamp_limit));
         }
 
-        // 2. Normalize (Adaptive)
+        // 2. Normalize.
         let normalized_reward =
             if raw_reward.abs() > f64::EPSILON { raw_reward / self.signal_stats.scale() } else { 0.0 };
         let normalized_feedback = SearchFeedback {
@@ -381,12 +382,12 @@ where
             solution: None,
         };
 
-        // 3. Update Contextual Slot Machine
+        // 3. Update contextual slot machine.
         let from = &feedback.sample.transition.0;
         let slots = self.slot_machines.get_mut(from).expect("cannot get slot machines");
         slots[feedback.slot_idx].0.update(&normalized_feedback);
 
-        // 4. Observe telemetry
+        // 4. Observe telemetry.
         self.tracker.observe_sample(generation, feedback.sample.clone());
     }
 
@@ -408,59 +409,6 @@ where
     }
 }
 
-/// Sample of search telemetry.
-#[derive(Clone)]
-struct SearchSample {
-    name: String,
-    duration: usize,
-    reward: Float,
-    transition: (SearchState, SearchState),
-}
-
-struct HeuristicSample {
-    state: SearchState,
-    name: String,
-    alpha: Float,
-    beta: Float,
-    mu: Float,
-    v: Float,
-    n: usize,
-}
-
-impl<C, O, S> Display for DynamicSelective<C, O, S>
-where
-    C: HeuristicContext<Objective = O, Solution = S>,
-    O: HeuristicObjective<Solution = S>,
-    S: HeuristicSolution,
-{
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        if !self.agent.tracker.telemetry_enabled() {
-            return Ok(());
-        }
-
-        f.write_fmt(format_args!("TELEMETRY\n"))?;
-        f.write_fmt(format_args!("search:\n"))?;
-        f.write_fmt(format_args!("name,generation,reward,from,to,duration\n"))?;
-        for (generation, sample) in self.agent.tracker.search_telemetry.iter() {
-            f.write_fmt(format_args!(
-                "{},{},{},{},{},{}\n",
-                sample.name, generation, sample.reward, sample.transition.0, sample.transition.1, sample.duration
-            ))?;
-        }
-
-        f.write_fmt(format_args!("heuristic:\n"))?;
-        f.write_fmt(format_args!("generation,state,name,alpha,beta,mu,v,n\n"))?;
-        for (generation, sample) in self.agent.tracker.heuristic_telemetry.iter() {
-            f.write_fmt(format_args!(
-                "{},{},{},{},{},{},{},{}\n",
-                generation, sample.state, sample.name, sample.alpha, sample.beta, sample.mu, sample.v, sample.n
-            ))?;
-        }
-
-        Ok(())
-    }
-}
-
 fn compare_to_best<C, O, S>(heuristic_ctx: &C, solution: &S) -> Ordering
 where
     C: HeuristicContext<Objective = O, Solution = S>,
@@ -488,11 +436,11 @@ where
         .map(|best_known| {
             let objective = heuristic_ctx.objective();
 
-            // Calculate normalized relative distances [0, 1]
+            // Calculate normalized relative distances [0, 1].
             let distance_initial = get_relative_distance(objective, new_solution, initial_solution);
             let distance_best = get_relative_distance(objective, new_solution, best_known);
 
-            // Reward Components (Max ~4.0 for Local, ~10.0 for Global)
+            // Reward components (max ~4.0 for local, ~10.0 for global).
             let reward_initial = (distance_initial + SearchRewards::DISTANCE_OFFSET) * SearchRewards::BASE_REWARD;
             let reward_best = (distance_best + SearchRewards::DISTANCE_OFFSET)
                 * SearchRewards::BASE_REWARD
@@ -514,26 +462,6 @@ where
 
 /// Estimates performance of the used operation based on its duration and the current search phase.
 /// Returns a reward multiplier in the range `[0.8, 1.2]`.
-///
-/// # Strategy: Context-Aware Time Dilation
-///
-/// This function balances **Throughput (Speed)** vs. **Depth (Quality)** by adapting to the
-/// current `improvement_1000_ratio`:
-///
-/// 1.  **Flow State (High Improvement):** When the solver is finding frequent improvements,
-///     we prioritize **Efficiency**. Fast operators are rewarded, and slow operators are penalized.
-///     This maximizes the generation of diverse individuals to populate the pool.
-/// 2.  **Stagnation (Low Improvement):** When the solver is stuck, we prioritize **Power**.
-///     The time penalty is dampened or removed. This ensures that "Heavy" operators (e.g.,
-///     complex Ruin & Recreate), which are naturally slower but capable of escaping local optima,
-///     are not unfairly penalized against faster, ineffective local search operators.
-///
-/// # Logic
-///
-/// *   **Continuous Scaling:** Uses a logarithmic curve to avoid artificial "cliffs" in reward.
-/// *   **Phase Damping:** The time modifier is scaled by the improvement ratio.
-/// *   **Safety Clamp:** The final multiplier is bounded to `+/- 20%` to ensure that execution
-///     time never overrides the actual quality signal (distance improvement).
 fn estimate_reward_perf_multiplier<C, O, S>(search_ctx: &SearchContext<C, O, S>, duration: usize) -> Float
 where
     C: HeuristicContext<Objective = O, Solution = S>,
@@ -551,32 +479,23 @@ where
 
     let time_ratio = duration as Float / median;
 
-    // 1. Calculate the raw time modifier (Logarithmic)
-    // Fast (0.5x) -> +0.15 reward
-    // Slow (2.0x) -> -0.15 reward
-    // We use a gentle curve so we don't distort the signal too much.
+    // Calculate the raw time modifier (logarithmic).
     let raw_modifier = (1.0 / time_ratio).ln() * 0.15;
 
-    // 2. Apply Phase-Dependent Damping
-    // If improvement is HIGH (> 0.1), we care about speed. Damping = 1.0.
-    // If improvement is LOW (< 0.001), we ignore speed. Damping = 0.0.
-    // This allows slow, heavy operators to "catch up" in ranking when the easy gains are gone.
-
-    // Smooth transition from 0.0 to 1.0 based on improvement ratio
-    // We saturate at 0.1 (10% improvement is considered "Fast Flow")
+    // Apply phase-dependent damping.
+    // Smooth transition from 0.0 to 1.0 based on improvement ratio.
+    // We saturate at 0.1 (10% improvement is considered "Fast Flow").
     let phase_damping = (improvement_ratio * 10.0).clamp(0.0, 1.0);
-
-    // Apply damping
     let final_modifier = raw_modifier * phase_damping;
 
-    // 3. Final Safety Clamp defined by Reward Physics
+    // Final safety clamp defined by reward physics.
     let tolerance = SearchRewards::PERF_TOLERANCE;
     (1.0 + final_modifier).clamp(1.0 - tolerance, 1.0 + tolerance)
 }
 
-/// Returns normalized distance in `[0.0, 1.0]` (absolute magnitude)
+/// Returns the normalized distance in `[0.0, 1.0]` (absolute magnitude)
 /// where 1.0 = Improvement on Primary Objective.
-/// Returns negative value if worse.
+/// Returns a negative value if worse.
 fn get_relative_distance<O, S>(objective: &O, a: &S, b: &S) -> Float
 where
     O: HeuristicObjective<Solution = S>,
@@ -607,7 +526,7 @@ where
     assert_ne!(total_objectives, 0, "cannot have an empty objective here");
     assert_ne!(total_objectives, idx, "cannot have the index equal to total amount of objectives");
 
-    // Normalization: Divide by total_objectives to map to [0, 1]
+    // Normalization: Divide by total_objectives to map to [0, 1].
     let priority_amplifier = (total_objectives - idx) as Float / total_objectives as Float;
 
     let value = a
@@ -620,7 +539,7 @@ where
     value * sign * priority_amplifier
 }
 
-/// Signal tracker that observes ONLY positive values to establish baseline for "Success".
+/// Signal tracker that observes ONLY positive values to establish a baseline for "Success".
 /// Uses exponential moving average for stability in sparse signals.
 #[derive(Clone)]
 struct SignalStats {
@@ -633,21 +552,21 @@ impl SignalStats {
         Self { mean: 0.0, n: 0.0 }
     }
 
-    /// Observe ONLY positive values to establish a baseline for "Success"
+    /// Observes ONLY positive values to establish a baseline for "Success".
     fn update(&mut self, value: Float) {
         if value <= f64::EPSILON {
             return;
         }
 
-        // Horizon: Adapt to the scale of the last ~200 SUCCESSFUL operations
+        // Horizon: Adapt to the scale of the last ~200 successful operations.
         // This is structural (adaptation speed), not problem-specific.
         let window_size = 200.0;
         let decay = 1.0 - (1.0 / window_size);
 
         self.n = self.n * decay + 1.0;
 
-        // Exponential Moving Average of the Magnitude
-        // We use this instead of Welford's variance for stability in sparse signals
+        // Exponential moving average of the magnitude.
+        // We use this instead of Welford's variance for stability in sparse signals.
         let learning_rate = 1.0 / self.n;
         self.mean = self.mean * (1.0 - learning_rate) + value * learning_rate;
     }
@@ -659,7 +578,7 @@ impl SignalStats {
     }
 }
 
-/// Enhanced diagnostic tracker for Thompson sampling analysis
+/// Enhanced diagnostic tracker for Thompson sampling analysis.
 struct HeuristicTracker {
     total_median: RemedianUsize,
     search_telemetry: Vec<(usize, SearchSample)>,
@@ -668,7 +587,7 @@ struct HeuristicTracker {
 }
 
 impl HeuristicTracker {
-    /// Creates new tracker with diagnostic configuration
+    /// Creates a new tracker with diagnostic configuration.
     pub fn new(is_experimental: bool) -> Self {
         Self {
             total_median: RemedianUsize::new(11, 7, |a, b| a.cmp(b)),
@@ -683,12 +602,12 @@ impl HeuristicTracker {
         self.is_experimental
     }
 
-    /// Returns median approximation.
+    /// Returns the median approximation.
     pub fn approx_median(&self) -> Option<usize> {
         self.total_median.approx_median()
     }
 
-    /// Observes a current sample. Updates total duration median.
+    /// Observes the current sample and updates the total duration median.
     pub fn observe_sample(&mut self, generation: usize, sample: SearchSample) {
         self.total_median.add_observation(sample.duration);
         if self.telemetry_enabled() {
@@ -696,9 +615,64 @@ impl HeuristicTracker {
         }
     }
 
+    /// Observes heuristic parameters for telemetry tracking.
     pub fn observe_params(&mut self, generation: usize, sample: HeuristicSample) {
         if self.telemetry_enabled() {
             self.heuristic_telemetry.push((generation, sample));
         }
     }
 }
+
+/// A sample of search telemetry.
+#[derive(Clone)]
+struct SearchSample {
+    name: String,
+    duration: usize,
+    reward: Float,
+    transition: (SearchState, SearchState),
+}
+
+/// A sample of heuristic parameters telemetry.
+struct HeuristicSample {
+    state: SearchState,
+    name: String,
+    alpha: Float,
+    beta: Float,
+    mu: Float,
+    v: Float,
+    n: usize,
+}
+
+impl<C, O, S> Display for DynamicSelective<C, O, S>
+where
+    C: HeuristicContext<Objective = O, Solution = S>,
+    O: HeuristicObjective<Solution = S>,
+    S: HeuristicSolution,
+{
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        if !self.agent.tracker.telemetry_enabled() {
+            return Ok(());
+        }
+
+        f.write_fmt(format_args!("TELEMETRY\n"))?;
+        f.write_fmt(format_args!("search:\n"))?;
+        f.write_fmt(format_args!("name,generation,reward,from,to,duration\n"))?;
+        for (generation, sample) in self.agent.tracker.search_telemetry.iter() {
+            f.write_fmt(format_args!(
+                "{},{},{},{},{},{}\n",
+                sample.name, generation, sample.reward, sample.transition.0, sample.transition.1, sample.duration
+            ))?;
+        }
+
+        f.write_fmt(format_args!("heuristic:\n"))?;
+        f.write_fmt(format_args!("generation,state,name,alpha,beta,mu,v,n\n"))?;
+        for (generation, sample) in self.agent.tracker.heuristic_telemetry.iter() {
+            f.write_fmt(format_args!(
+                "{},{},{},{},{},{},{},{}\n",
+                generation, sample.state, sample.name, sample.alpha, sample.beta, sample.mu, sample.v, sample.n
+            ))?;
+        }
+
+        Ok(())
+    }
+}

From 5ac3ce3913a87c76a4bab084b02773cd7da66a81 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Sun, 11 Jan 2026 21:25:32 +0100
Subject: [PATCH 15/30] Improve heuristic operators list

---
 vrp-core/src/solver/heuristic.rs | 135 +++++++++++++------------------
 1 file changed, 56 insertions(+), 79 deletions(-)

diff --git a/vrp-core/src/solver/heuristic.rs b/vrp-core/src/solver/heuristic.rs
index 0a0762b81..cc8c544f5 100644
--- a/vrp-core/src/solver/heuristic.rs
+++ b/vrp-core/src/solver/heuristic.rs
@@ -408,7 +408,6 @@ mod statik {
             (Arc::new(RecreateWithGaps::new(2, 20, random.clone())), 5),
             (Arc::new(RecreateWithFarthest::new(random.clone())), 2),
             (Arc::new(RecreateWithSkipBest::new(4, 8, random.clone())), 2),
-            (Arc::new(RecreateWithNearestNeighbor::new(random.clone())), 1),
             (Arc::new(RecreateWithSlice::new(random.clone())), 1),
             (
                 Arc::new(RecreateWithSkipRandom::default_explorative_phased(
@@ -492,7 +491,7 @@ mod statik {
 mod dynamic {
     use super::*;
 
-    fn get_recreates(problem: &Problem, random: Arc<dyn Random>) -> Vec<(Arc<dyn Recreate>, String, Float)> {
+    fn get_weighted_recreates(problem: &Problem, random: Arc<dyn Random>) -> Vec<(Arc<dyn Recreate>, String, Float)> {
         let cheapest: Arc<dyn Recreate> = Arc::new(RecreateWithCheapest::new(random.clone()));
         vec![
             (cheapest.clone(), "cheapest".to_string(), 1.),
@@ -502,7 +501,6 @@ mod dynamic {
             (Arc::new(RecreateWithPerturbation::new_with_defaults(random.clone())), "perturbation".to_string(), 1.),
             (Arc::new(RecreateWithGaps::new(2, 20, random.clone())), "gaps".to_string(), 1.),
             (Arc::new(RecreateWithFarthest::new(random.clone())), "farthest".to_string(), 1.),
-            (Arc::new(RecreateWithNearestNeighbor::new(random.clone())), "nearest".to_string(), 1.),
             (
                 Arc::new(RecreateWithSkipRandom::default_explorative_phased(cheapest.clone(), random.clone())),
                 "skip_random".to_string(),
@@ -522,27 +520,35 @@ mod dynamic {
         .collect()
     }
 
-    fn get_ruins_with_limits(limits: RemovalLimits, prefix: &str) -> Vec<(Arc<dyn Ruin>, String, Float)> {
+    /// Returns weighted ruin strategies that combine normal and small limits with 1:2 ratio.
+    /// This reduces operator count while preserving exploration of both limit ranges.
+    fn get_weighted_ruins(
+        problem: Arc<Problem>,
+        normal_limits: RemovalLimits,
+        small_limits: RemovalLimits,
+    ) -> Vec<(Arc<dyn Ruin>, String, Float)> {
+        // Helper to create weighted ruin combining normal and small limits (1:2 ratio, favoring small)
+        let create_weighted = |factory: fn(RemovalLimits) -> Arc<dyn Ruin>| {
+            Arc::new(WeightedRuin::new(vec![(factory(normal_limits.clone()), 2), (factory(small_limits.clone()), 1)]))
+                as Arc<dyn Ruin>
+        };
+
         vec![
-            (Arc::new(AdjustedStringRemoval::new_with_defaults(limits.clone())), format!("{prefix}_asr"), 7.),
-            (Arc::new(NeighbourRemoval::new(limits.clone())), format!("{prefix}_neighbour_removal"), 5.),
-            (Arc::new(WorstJobRemoval::new(4, limits.clone())), format!("{prefix}_worst_job"), 4.),
-            (Arc::new(RandomJobRemoval::new(limits.clone())), format!("{prefix}_random_job_removal"), 4.),
-            (Arc::new(RandomRouteRemoval::new(limits.clone())), format!("{prefix}_random_route_removal"), 2.),
-            (Arc::new(CloseRouteRemoval::new(limits.clone())), format!("{prefix}_close_route_removal"), 4.),
-            (Arc::new(WorstRouteRemoval::new(limits)), format!("{prefix}_worst_route_removal"), 5.),
+            (
+                create_weighted(|limits| Arc::new(AdjustedStringRemoval::new_with_defaults(limits))),
+                "asr".to_string(),
+                7.,
+            ),
+            (create_weighted(|limits| Arc::new(NeighbourRemoval::new(limits))), "neighbour".to_string(), 5.),
+            (create_weighted(|limits| Arc::new(WorstRouteRemoval::new(limits))), "worst_route".to_string(), 5.),
+            (create_weighted(|limits| Arc::new(WorstJobRemoval::new(4, limits))), "worst_job".to_string(), 4.),
+            (create_weighted(|limits| Arc::new(CloseRouteRemoval::new(limits))), "close_route".to_string(), 4.),
+            (create_weighted(|limits| Arc::new(RandomJobRemoval::new(limits))), "random_job".to_string(), 4.),
+            (create_weighted(|limits| Arc::new(RandomRouteRemoval::new(limits))), "random_route".to_string(), 2.),
+            (Arc::new(ClusterRemoval::new_with_defaults(problem).unwrap()), "cluster".to_string(), 4.),
         ]
     }
 
-    fn get_ruins(problem: Arc<Problem>) -> Vec<(Arc<dyn Ruin>, String, Float)> {
-        vec![(
-            // TODO avoid unwrap
-            Arc::new(ClusterRemoval::new_with_defaults(problem.clone()).unwrap()),
-            "cluster_removal".to_string(),
-            4.,
-        )]
-    }
-
     fn get_mutations(
         problem: Arc<Problem>,
         environment: Arc<Environment>,
@@ -596,17 +602,11 @@ mod dynamic {
 
         // NOTE: consider checking usage of names within heuristic filter before changing them
 
-        let recreates = get_recreates(problem.as_ref(), random.clone());
-
-        let ruins = get_ruins_with_limits(normal_limits.clone(), "normal")
-            .into_iter()
-            .chain(get_ruins_with_limits(small_limits.clone(), "small"))
-            .chain(get_ruins(problem.clone()))
-            .collect::<Vec<_>>();
-
+        let recreates = get_weighted_recreates(problem.as_ref(), random.clone());
+        let ruins = get_weighted_ruins(problem.clone(), normal_limits.clone(), small_limits.clone());
         let extra_random_job = Arc::new(RandomJobRemoval::new(small_limits));
 
-        // NOTE we need to wrap any of ruin methods in composite which calls restore context before recreate
+        // Wrap ruins in composite which calls restore context before recreate
         let ruins = ruins
             .into_iter()
             .map::<(Arc<dyn Ruin>, String, Float), _>(|(ruin, name, weight)| {
@@ -614,11 +614,7 @@ mod dynamic {
             })
             .collect::<Vec<_>>();
 
-        let mutations = get_mutations(problem.clone(), environment.clone());
-
-        let heuristic_filter = problem.extras.get_heuristic_filter();
-
-        recreates
+        let ruin_recreate_ops = recreates
             .iter()
             .flat_map(|(recreate, recreate_name, recreate_weight)| {
                 ruins.iter().map::<(TargetSearchOperator, String, Float), _>(move |(ruin, ruin_name, ruin_weight)| {
@@ -629,61 +625,39 @@ mod dynamic {
                     )
                 })
             })
+            .collect::<Vec<_>>();
+
+        let mutations = get_mutations(problem.clone(), environment.clone());
+        let heuristic_filter = problem.extras.get_heuristic_filter();
+
+        ruin_recreate_ops
+            .into_iter()
             .chain(mutations)
             .filter(|(_, name, _)| heuristic_filter.as_ref().is_none_or(|filter| (filter)(name.as_str())))
             .collect::<Vec<_>>()
     }
 
+    /// Creates a default ruin-and-recreate operator for internal use (e.g., decompose search, infeasible search).
+    /// Uses weighted ruins and recreates from the main operator building logic.
     pub fn create_default_inner_ruin_recreate(
         problem: Arc<Problem>,
         environment: Arc<Environment>,
     ) -> Arc<RuinAndRecreate> {
-        let (_, small_limits) = get_limits(problem.as_ref());
+        let (normal_limits, small_limits) = get_limits(problem.as_ref());
         let random = environment.random.clone();
 
-        // initialize recreate
-        let cheapest = Arc::new(RecreateWithCheapest::new(random.clone()));
-        let recreate = Arc::new(WeightedRecreate::new(vec![
-            (cheapest.clone(), 1),
-            (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), 3),
-            (Arc::new(RecreateWithSkipBest::new(1, 2, random.clone())), 1),
-            (Arc::new(RecreateWithPerturbation::new_with_defaults(random.clone())), 1),
-            (Arc::new(RecreateWithSkipBest::new(3, 4, random.clone())), 1),
-            (Arc::new(RecreateWithGaps::new(2, 20, random.clone())), 1),
-            (Arc::new(RecreateWithFarthest::new(random.clone())), 1),
-            (Arc::new(RecreateWithSlice::new(random.clone())), 1),
-            (Arc::new(RecreateWithSkipRandom::default_explorative_phased(cheapest, random.clone())), 1),
-        ]));
+        let recreates = get_weighted_recreates(problem.as_ref(), random.clone());
+        let ruins = get_weighted_ruins(problem.clone(), normal_limits, small_limits);
 
-        // initialize ruin
-        let random_route = Arc::new(RandomRouteRemoval::new(small_limits.clone()));
-        let random_job = Arc::new(RandomJobRemoval::new(small_limits.clone()));
-        let random_ruin = Arc::new(WeightedRuin::new(vec![(random_job.clone(), 10), (random_route.clone(), 1)]));
-        let ruin = Arc::new(WeightedRuin::new(vec![
-            (
-                Arc::new(CompositeRuin::new(vec![
-                    (Arc::new(AdjustedStringRemoval::new_with_defaults(small_limits.clone())), 5.),
-                    (random_ruin.clone(), 0.1),
-                ])),
-                1,
-            ),
-            (
-                Arc::new(CompositeRuin::new(vec![
-                    (Arc::new(NeighbourRemoval::new(small_limits.clone())), 1.),
-                    (random_ruin.clone(), 0.1),
-                ])),
-                1,
-            ),
-            (
-                Arc::new(CompositeRuin::new(vec![
-                    (Arc::new(WorstJobRemoval::new(4, small_limits)), 1.),
-                    (random_ruin.clone(), 0.1),
-                ])),
-                1,
-            ),
-        ]));
+        // Convert to weighted format (drop names, keep weights)
+        let weighted_ruins = ruins.into_iter().map(|(ruin, _, weight)| (ruin, weight as usize)).collect();
+        let weighted_recreates =
+            recreates.into_iter().map(|(recreate, _, weight)| (recreate, weight as usize)).collect();
 
-        Arc::new(RuinAndRecreate::new(ruin, recreate))
+        Arc::new(RuinAndRecreate::new(
+            Arc::new(WeightedRuin::new(weighted_ruins)),
+            Arc::new(WeightedRecreate::new(weighted_recreates)),
+        ))
     }
 
     pub fn create_default_local_search(random: Arc<dyn Random>) -> Arc<LocalSearch> {
@@ -719,9 +693,12 @@ mod dynamic {
     }
 
     fn create_composite_decompose_search(problem: Arc<Problem>, environment: Arc<Environment>) -> TargetSearchOperator {
-        let limits = RemovalLimits { removed_activities_range: (10..100), affected_routes_range: 1..1 };
-        let route_removal_operator =
-            Arc::new(RuinAndRecreate::new(Arc::new(RandomRouteRemoval::new(limits)), Arc::new(DummyRecreate)));
+        let limits = RemovalLimits { removed_activities_range: (10..100), affected_routes_range: 1..2 };
+        let ruin = WeightedRuin::new(vec![
+            (Arc::new(RandomRouteRemoval::new(limits.clone())), 1),
+            (Arc::new(WorstRouteRemoval::new(limits)), 1),
+        ]);
+        let route_removal_operator = Arc::new(RuinAndRecreate::new(Arc::new(ruin), Arc::new(DummyRecreate)));
 
         Arc::new(DecomposeSearch::new(
             Arc::new(CompositeHeuristicOperator::new(vec![

From bf82f5f7ca6ed2edffb7b712754be2f86c0febc0 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Sun, 11 Jan 2026 21:45:54 +0100
Subject: [PATCH 16/30] Fix broken tests

---
 rosomaxa/src/hyper/dynamic_selective.rs       |  4 +-
 .../unit/algorithms/rl/slot_machine_test.rs   | 16 ++++---
 .../unit/hyper/dynamic_selective_test.rs      | 45 +++++++++++++++----
 3 files changed, 46 insertions(+), 19 deletions(-)

diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs
index 3fec3874f..314c0427c 100644
--- a/rosomaxa/src/hyper/dynamic_selective.rs
+++ b/rosomaxa/src/hyper/dynamic_selective.rs
@@ -468,9 +468,7 @@ where
     O: HeuristicObjective<Solution = S>,
     S: HeuristicSolution,
 {
-    let stats = search_ctx.heuristic_ctx.statistics();
-    let improvement_ratio = stats.improvement_1000_ratio;
-
+    let improvement_ratio = search_ctx.heuristic_ctx.statistics().improvement_1000_ratio;
     let approx_median = &search_ctx.approx_median;
     let median = match approx_median {
         Some(m) if *m > 0 => *m as Float,
diff --git a/rosomaxa/tests/unit/algorithms/rl/slot_machine_test.rs b/rosomaxa/tests/unit/algorithms/rl/slot_machine_test.rs
index b8c2b3c5b..9fc616356 100644
--- a/rosomaxa/tests/unit/algorithms/rl/slot_machine_test.rs
+++ b/rosomaxa/tests/unit/algorithms/rl/slot_machine_test.rs
@@ -1,6 +1,6 @@
 use super::*;
 use crate::helpers::utils::create_test_random;
-use crate::utils::{DefaultDistributionSampler, random_argmax};
+use crate::utils::DefaultDistributionSampler;
 
 #[derive(Clone)]
 struct TestAction(DefaultDistributionSampler);
@@ -35,7 +35,7 @@ fn can_find_proper_estimations() {
             let slot_means: &[Float; 5] = &[5., 9., 7., 13., 11.];
             let slot_vars: &[Float; 5] = &[2., 3., 4., 6., 1.];
             let prior_mean = 1.;
-            let attempts = 1000;
+            let attempts_per_slot = 1000;
             let delta = 2.;
 
             let random = create_test_random();
@@ -44,11 +44,13 @@ fn can_find_proper_estimations() {
                 .map(|_| SlotMachine::new(prior_mean, TestAction(sampler.clone()), sampler.clone()))
                 .collect::<Vec<_>>();
 
-            for _ in 0..attempts {
-                let slot_idx = random_argmax(slots.iter().map(|slot| slot.sample()), random.as_ref()).unwrap();
-                let slot = &mut slots[slot_idx];
-                let feedback = slot.play((slot_means[slot_idx], slot_vars[slot_idx].sqrt()));
-                slot.update(&feedback);
+            // Play each slot independently to test estimation convergence
+            for slot_idx in 0..sockets {
+                for _ in 0..attempts_per_slot {
+                    let slot = &mut slots[slot_idx];
+                    let feedback = slot.play((slot_means[slot_idx], slot_vars[slot_idx]));
+                    slot.update(&feedback);
+                }
             }
 
             slots
diff --git a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs
index 814091be0..4d0449243 100644
--- a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs
+++ b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs
@@ -56,19 +56,46 @@ fn can_estimate_median() {
     assert!(median > 0);
 }
 
-parameterized_test! {can_estimate_reward_multiplier, (approx_median, duration, expected), {
-    can_estimate_reward_multiplier_impl(approx_median, duration, expected);
+parameterized_test! {can_estimate_reward_multiplier, (improvement_ratio, approx_median, duration, expected), {
+    can_estimate_reward_multiplier_impl(improvement_ratio, approx_median, duration, expected);
 }}
 
 can_estimate_reward_multiplier! {
-    case_01_moderate: (Some(1), 1, 2.0),       // 1.0 (moderate) * 2.0 (stagnation bonus)
-    case_02_allegro: (Some(2), 1, 3.0),        // 1.5 (fast) * 2.0 (stagnation bonus)
-    case_03_allegretto: (Some(10), 8, 2.5),    // 1.25 (fast-ish) * 2.0 (stagnation bonus)
-    case_04_andante: (Some(8), 13, 1.5),       // 0.75 (slow) * 2.0 (stagnation bonus)
+    case_01_fast_with_improvement: (0.1, Some(10), 5, 1.104),    // Fast operator during improvement: ln(2.0)*0.15*1.0 ≈ 0.104
+    case_02_slow_with_improvement: (0.1, Some(10), 20, 0.896),   // Slow operator during improvement: ln(0.5)*0.15*1.0 ≈ -0.104
+    case_03_fast_partial_improvement: (0.05, Some(10), 5, 1.052), // Fast with 50% damping: ln(2.0)*0.15*0.5 ≈ 0.052
+    case_04_no_improvement: (0.0, Some(10), 5, 1.0),             // No improvement = no bonus
+    case_05_no_median: (0.1, None, 5, 1.0),                      // No median = baseline
+    case_06_clamped_fast: (0.1, Some(10), 1, 1.2),               // Very fast but clamped to PERF_TOLERANCE (0.2)
+    case_07_clamped_slow: (0.1, Some(10), 100, 0.8),             // Very slow but clamped to -PERF_TOLERANCE (-0.2)
 }
 
-fn can_estimate_reward_multiplier_impl(approx_median: Option<usize>, duration: usize, expected: Float) {
-    let heuristic_ctx = create_default_heuristic_context();
+fn can_estimate_reward_multiplier_impl(
+    improvement_ratio: Float,
+    approx_median: Option<usize>,
+    duration: usize,
+    expected: Float,
+) {
+    // Create a mock context with the specified improvement ratio
+    let mut heuristic_ctx = create_default_heuristic_context();
+
+    // Simulate improvement ratio by triggering generations
+    if improvement_ratio > 0.0 {
+        let num_improvements = (improvement_ratio * 1000.0) as usize;
+
+        // Add improving solutions
+        for i in 0..num_improvements {
+            let solution = VectorSolution::new(vec![], -(i as Float), vec![]);
+            heuristic_ctx.on_generation(vec![solution], 0.0, Timer::start());
+        }
+
+        // Add non-improving solutions
+        for _ in num_improvements..1000 {
+            let solution = VectorSolution::new(vec![], 100.0, vec![]);
+            heuristic_ctx.on_generation(vec![solution], 0.0, Timer::start());
+        }
+    }
+
     let solution = VectorSolution::new(vec![], 0., vec![]);
     let search_ctx = SearchContext {
         heuristic_ctx: &heuristic_ctx,
@@ -80,7 +107,7 @@ fn can_estimate_reward_multiplier_impl(approx_median: Option<usize>, duration: u
 
     let result = estimate_reward_perf_multiplier(&search_ctx, duration);
 
-    assert_eq!(result, expected);
+    assert!((result - expected).abs() < 0.001, "Expected {expected}, got {result}");
 }
 
 #[test]

From 0de8b355d525fbdff45812d478968169a2470bf4 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Sun, 11 Jan 2026 21:47:20 +0100
Subject: [PATCH 17/30] Update changelog

---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a5b330fad..7020d457f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,7 +14,8 @@ are already published. So, I stick to it for now.
 * improved remedian algorithm
 * separate calculations for distance/duration from cost minimization
 * change GSOM distance function
-* improved SISR implementation
+* improve SISR implementation
+* improve dynamic selective heuristic
 
 ### Added
 

From ba8e3bae59166b6c505ee5e67588ee4ab0f6476a Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Sun, 11 Jan 2026 21:54:05 +0100
Subject: [PATCH 18/30] Fix clippy warnings

---
 rosomaxa/src/algorithms/rl/slot_machine.rs |  2 +-
 rosomaxa/src/hyper/dynamic_selective.rs    | 12 +-----------
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/rosomaxa/src/algorithms/rl/slot_machine.rs b/rosomaxa/src/algorithms/rl/slot_machine.rs
index cb314b0fe..32f20aa96 100644
--- a/rosomaxa/src/algorithms/rl/slot_machine.rs
+++ b/rosomaxa/src/algorithms/rl/slot_machine.rs
@@ -62,7 +62,7 @@ where
         let beta = 1.0;
 
         // Prior mean is clamped to a reasonable Z-score range (-0 to 10)
-        let mu = prior_mean.max(0.01).min(10.0);
+        let mu = prior_mean.clamp(0.01, 10.0);
 
         // Variance expectation v = Beta / (Alpha - 1) = 1.0.
         // This implies we are "uncertain" by about +/- 1.0 standard deviation unit,
diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs
index 314c0427c..f3a593f06 100644
--- a/rosomaxa/src/hyper/dynamic_selective.rs
+++ b/rosomaxa/src/hyper/dynamic_selective.rs
@@ -233,21 +233,11 @@ where
                 let median = context.approx_median.unwrap_or(duration).max(1) as Float;
                 let ratio = duration as Float / median;
 
-                // Logic:
-                // Ratio 0.2 (Fast)  -> -0.01 * 0.2 = -0.002 (Too small, clamp to MIN) -> -0.01
-                // Ratio 1.0 (Avg)   -> -0.01 * 1.0 = -0.01
-                // Ratio 5.0 (Slow)  -> -0.01 * 5.0 = -0.05
-                // Ratio 10.0 (Slower) -> -0.01 * 10.0 = -0.1 (At MAX boundary)
-                // Ratio 20.0 (Very Slow) -> -0.01 * 20.0 = -0.2 (Clamp to MAX) -> -0.1
-
                 // Base penalty unit: PENALTY_MIN (-0.01) per "Median Unit of Time".
                 let raw_penalty = SearchRewards::PENALTY_MIN * ratio;
 
                 // Clamp to the range [PENALTY_MAX, PENALTY_MIN] = [-0.1, -0.01].
-                // Note: min/max semantics with negative numbers:
-                // - max(PENALTY_MAX) ensures we don't go below -0.1 (more negative).
-                // - min(PENALTY_MIN) ensures we don't go above -0.01 (less negative).
-                raw_penalty.max(SearchRewards::PENALTY_MAX).min(SearchRewards::PENALTY_MIN)
+                raw_penalty.clamp(SearchRewards::PENALTY_MAX, SearchRewards::PENALTY_MIN)
             }
         };
 

From a8328610a5724e2c13a92d5487557898bbb1efed Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Mon, 12 Jan 2026 22:59:15 +0100
Subject: [PATCH 19/30] Improve UI for heuristic playground

---
 experiments/heuristic-research/www/index.html | 163 ++++---
 experiments/heuristic-research/www/index.js   | 297 ++++++++----
 experiments/heuristic-research/www/style.css  | 432 +++++++++++++-----
 3 files changed, 644 insertions(+), 248 deletions(-)

diff --git a/experiments/heuristic-research/www/index.html b/experiments/heuristic-research/www/index.html
index a37846797..d7ac391c9 100644
--- a/experiments/heuristic-research/www/index.html
+++ b/experiments/heuristic-research/www/index.html
@@ -10,106 +10,149 @@
 <noscript>This page contains webassembly and javascript content, please enable javascript in your browser.</noscript>
 <script src="bootstrap.js"></script>
 <main>
-    <h1>Heuristic Research Playground</h1>
+    <h1>🔬 Heuristic Research Playground</h1>
 
     <div class="row">
-        <!-- vertical tabulation with controls -->
-        <div class="column" id="controlTab">
-            <div class="tab-vert">
-                <button id="functionTabButton" class="tablinks-vert">Function Bench</button>
-                <button id="vrpTabButton" class="tablinks-vert">VRP Bench</button>
-            </div>
-
-            <div id="functionTab" class="tabcontent-vert">
+        <!-- Benchmark Configuration -->
+        <div class="column controls-panel">
+            <div class="control-group">
                 <div class="select-wrap">
-                    <label>Function type</label>
-                    <select name="color" style="width: 100%;" id="plotFunction">
-                        <option value="rosenbrock">Rosenbrock</option>
-                        <option value="rastrigin">Rastrigin</option>
-                        <option value="himmelblau">Himmelblau</option>
-                        <option value="ackley">Ackley</option>
-                        <option value="matyas">Matyas</option>
+                    <label>Benchmark Type</label>
+                    <select id="benchmarkType">
+                        <option value="function">Function Optimization</option>
+                        <option value="vrp">Vehicle Routing Problem</option>
                     </select>
                 </div>
-
             </div>
 
-            <div id="vrpTab" class="tabcontent-vert">
-                <div class="select-wrap">
-                    <label>Format type</label>
-                    <select name="color" style="width: 100%;" id="vrpFormat">
-                        <option value="tsplib">TSPLIB95</option>
-                        <option value="solomon">Solomon</option>
-                        <option value="lilim">LiLim</option>
-                        <option value="state">Heuristic State</option>
-                    </select>
+            <div id="functionControls" class="benchmark-controls">
+                <div class="control-group">
+                    <div class="select-wrap">
+                        <label>Function</label>
+                        <select id="plotFunction">
+                            <option value="rosenbrock">Rosenbrock</option>
+                            <option value="rastrigin">Rastrigin</option>
+                            <option value="himmelblau">Himmelblau</option>
+                            <option value="ackley">Ackley</option>
+                            <option value="matyas">Matyas</option>
+                        </select>
+                    </div>
                 </div>
-
-                <div class="input-container">
-                    <input type="file" id="fileSelector" accept='.txt,.vrp,.json'>
+                
+                <div class="control-group" style="min-height: 60px;">
+                    <label style="display: flex; align-items: center; gap: 6px; margin-bottom: 6px;">
+                        <input type="checkbox" id="autoInitPoint" checked>
+                        <span style="font-size: 11px;">Random Initial Point</span>
+                    </label>
+                    
+                    <div id="manualPointControls" class="hide">
+                        <div class="control-row" style="gap: 6px;">
+                            <div style="flex: 1;">
+                                <input type="number" id="initX" step="0.1" value="0" 
+                                       placeholder="X" 
+                                       style="width: 100%; padding: 6px; border: 1px solid var(--border); border-radius: 4px; font-size: 12px;">
+                            </div>
+                            <div style="flex: 1;">
+                                <input type="number" id="initZ" step="0.1" value="0" 
+                                       placeholder="Z"
+                                       style="width: 100%; padding: 6px; border: 1px solid var(--border); border-radius: 4px; font-size: 12px;">
+                            </div>
+                        </div>
+                    </div>
                 </div>
             </div>
 
+            <div id="vrpControls" class="benchmark-controls hide">
+                <div class="control-group">
+                    <div class="select-wrap">
+                        <label>Format</label>
+                        <select id="vrpFormat">
+                            <option value="tsplib">TSPLIB95</option>
+                            <option value="solomon">Solomon</option>
+                            <option value="lilim">LiLim</option>
+                            <option value="state">Heuristic State</option>
+                        </select>
+                    </div>
+                </div>
+                <div class="control-group">
+                    <div class="input-container">
+                        <input type="file" id="fileSelector" accept='.txt,.vrp,.json'>
+                    </div>
+                </div>
+            </div>
         </div>
 
-        <!-- common params -->
-        <div class="column" id="commonControls">
-            <div class="select-wrap">
-                <label>Population type</label>
-                <select name="color" style="width: 100%;" id="plotPopulation">
-                    <option value="rosomaxa">Rosomaxa</option>
-                    <option value="elitism">Elitism</option>
-                    <option value="greedy">Greedy</option>
-                </select>
+        <!-- Algorithm Parameters -->
+        <div class="column controls-panel">
+            <div class="control-group">
+                <div class="select-wrap">
+                    <label>Population Type</label>
+                    <select id="plotPopulation">
+                        <option value="rosomaxa">Rosomaxa</option>
+                        <option value="elitism">Elitism</option>
+                        <option value="greedy">Greedy</option>
+                    </select>
+                </div>
             </div>
 
-            <div id="coordLabel"></div>
-            <div style="white-space: nowrap">
-                <input type="range" min="0" max="157" id="pitch" value="10" />
-            </div>
-            <div style="white-space: nowrap">
-                <input type="range" min="0" max="314" id="yaw" value="50" />
+            <div class="control-group">
+                <label style="font-size: 10px; margin-bottom: 4px;">Max Generations</label>
+                <input type="number" id="maxGenerations" min="100" max="10000" step="100" value="2000" 
+                       style="width: 100%; padding: 8px; border: 1px solid var(--border); border-radius: 4px; font-size: 14px;">
             </div>
 
-            <div id="status">Loading WebAssembly...</div>
-            <input type="range" min="1" max="1000" id="generations" value="0" class="hide" />
+            <div class="hide" id="generationControl" style="margin-top: 8px;">
+                <label style="font-size: 10px;">Generation: <span id="currentGen">0</span> / <span id="maxGen">0</span></label>
+                <input type="range" min="1" max="1000" id="generations" value="0" />
+            </div>
 
         </div>
-    </div>
 
-    <div class="row">
-        <div class="column">
-            <input type="button" id="run" value="Run experiment" class="block" />
+        <!-- Visualization Controls -->
+        <div class="column controls-panel">
+            <div class="control-row" style="gap: 8px;">
+                <div class="control-group flex-1">
+                    <label style="font-size: 10px;">Pitch <span id="pitchValue">0.10</span></label>
+                    <input type="range" min="0" max="157" id="pitch" value="10" />
+                </div>
+                <div class="control-group flex-1">
+                    <label style="font-size: 10px;">Yaw <span id="yawValue">0.50</span></label>
+                    <input type="range" min="0" max="314" id="yaw" value="50" />
+                </div>
+            </div>
+
+            <div id="status" class="status-box" style="margin-top: 8px;">Loading WebAssembly...</div>
+            <button type="button" id="run" class="block run-button" style="margin-top: 12px;">▶ Run Experiment</button>
         </div>
     </div>
 
     <!-- horizontal tabulation with canvases -->
     <div id="canvasTab">
         <div class="tab">
-            <button id="solutionTabButton" class="tablinks")>Solution Space</button>
+            <button id="solutionTabButton" class="tablinks">Solution Space</button>
             <button id="searchTabButton" class="tablinks">Search Iteration</button>
-            <button id="overallTabButton" class="tablinks">Overall Statistic</button>
-            <button id="bestTabButton" class="tablinks">Best Statistic</button>
-            <button id="durationTabButton" class="tablinks">Duration Statistic</button>
+            <button id="overallTabButton" class="tablinks">Overall Stats</button>
+            <button id="bestTabButton" class="tablinks">Best Stats</button>
+            <button id="durationTabButton" class="tablinks">Duration Stats</button>
             <button id="fitnessTabButton" class="tablinks">Best Fitness</button>
         </div>
         <div id="solutionTab" class="tabcontent">
-            <canvas id="solutionCanvas" width="600" height="400"></canvas>
+            <canvas id="solutionCanvas" width="800" height="600"></canvas>
         </div>
         <div id="searchTab" class="tabcontent">
-            <canvas id="searchCanvas" width="600" height="200"></canvas>
+            <canvas id="searchCanvas" width="800" height="400"></canvas>
         </div>
         <div id="bestTab" class="tabcontent">
-            <canvas id="bestCanvas" width="600" height="400"></canvas>
+            <canvas id="bestCanvas" width="800" height="600"></canvas>
         </div>
         <div id="durationTab" class="tabcontent">
-            <canvas id="durationCanvas" width="600" height="400"></canvas>
+            <canvas id="durationCanvas" width="800" height="600"></canvas>
         </div>
         <div id="overallTab" class="tabcontent">
-            <canvas id="overallCanvas" width="600" height="400"></canvas>
+            <canvas id="overallCanvas" width="800" height="600"></canvas>
         </div>
         <div id="fitnessTab" class="tabcontent">
-            <canvas id="fitnessCanvas" width="600" height="400"></canvas>
+            <canvas id="fitnessCanvas" width="800" height="600"></canvas>
         </div>
     </div>
 </main>
diff --git a/experiments/heuristic-research/www/index.js b/experiments/heuristic-research/www/index.js
index 6c34caa6b..a1dcab44f 100644
--- a/experiments/heuristic-research/www/index.js
+++ b/experiments/heuristic-research/www/index.js
@@ -7,27 +7,33 @@ const bestCanvas = document.getElementById("bestCanvas");
 const durationCanvas = document.getElementById("durationCanvas");
 const fitnessCanvas = document.getElementById("fitnessCanvas");
 
-const coordLabel = document.getElementById("coordLabel");
+const benchmarkType = document.getElementById("benchmarkType");
+const functionControls = document.getElementById("functionControls");
+const vrpControls = document.getElementById("vrpControls");
 const fileSelector = document.getElementById("fileSelector");
 const plotPopulation = document.getElementById("plotPopulation");
 const plotFunction = document.getElementById("plotFunction");
 const vrpFormat = document.getElementById("vrpFormat");
 const pitch = document.getElementById("pitch");
 const yaw = document.getElementById("yaw");
+const pitchValue = document.getElementById("pitchValue");
+const yawValue = document.getElementById("yawValue");
 const status = document.getElementById("status");
 const run = document.getElementById("run");
 const generations = document.getElementById("generations");
+const generationControl = document.getElementById("generationControl");
+const currentGen = document.getElementById("currentGen");
+const maxGen = document.getElementById("maxGen");
+const maxGenerations = document.getElementById("maxGenerations");
+const autoInitPoint = document.getElementById("autoInitPoint");
+const manualPointControls = document.getElementById("manualPointControls");
+const initX = document.getElementById("initX");
+const initZ = document.getElementById("initZ");
 
 /** Main entry point */
 export function main() {
     setupListeners();
-    setupCanvas(solutionCanvas, 800);
-    setupCanvas(searchCanvas, 800);
-    setupCanvas(overallCanvas, 800);
-    setupCanvas(bestCanvas, 800);
-    setupCanvas(durationCanvas, 800);
-    setupCanvas(fitnessCanvas, 800);
-
+    resizeAllCanvases();
     updateDynamicPlots();
     updateStaticPlots();
 }
@@ -43,27 +49,36 @@ export function setup(WasmChart, run_function_experiment, run_vrp_experiment, lo
 
 /** Add event listeners. */
 function setupListeners() {
-    status.innerText = "WebAssembly loaded!";
+    status.innerText = "✓ WebAssembly loaded!";
+    status.classList.add('success');
+    
+    benchmarkType.addEventListener("change", switchBenchmarkType);
     fileSelector.addEventListener("change", openFile);
     plotFunction.addEventListener("change", changePlot);
     plotPopulation.addEventListener("change", changePlot);
+    autoInitPoint.addEventListener("change", toggleInitPointMode);
 
     yaw.addEventListener("change", updatePlots);
     pitch.addEventListener("change", updatePlots);
     generations.addEventListener("change", updatePlots);
 
-    yaw.addEventListener("input", updatePlots);
-    pitch.addEventListener("input", updatePlots);
-    generations.addEventListener("input", updatePlots);
-
-    run.addEventListener("click", runExperiment)
-    window.addEventListener("resize", setupCanvas);
+    yaw.addEventListener("input", (e) => {
+        updateSliderValue(e.target, yawValue);
+        updatePlots();
+    });
+    pitch.addEventListener("input", (e) => {
+        updateSliderValue(e.target, pitchValue);
+        updatePlots();
+    });
+    generations.addEventListener("input", (e) => {
+        currentGen.innerText = e.target.value;
+        updatePlots();
+    });
 
-    // setup vertical tabs buttons
-    ['function', 'vrp'].forEach(function(type) {
-        document.getElementById(type + 'TabButton').addEventListener("click", function(evt) {
-            openTab(evt, 'controlTab', type + 'Tab', '-vert');
-        });
+    run.addEventListener("click", runExperiment);
+    window.addEventListener("resize", () => {
+        resizeAllCanvases();
+        updatePlots();
     });
 
     // setup horizontal tab buttons
@@ -74,7 +89,6 @@ function setupListeners() {
     });
 
     // open default tabs
-    document.getElementById("functionTabButton").click();
     document.getElementById("solutionTabButton").click();
 
     // allow to control generation range using left-right arrows
@@ -82,34 +96,129 @@ function setupListeners() {
         switch (event.key) {
             case "ArrowLeft":
                 generations.value = Math.max(parseInt(generations.value) - 1, parseInt(generations.min));
+                currentGen.innerText = generations.value;
                 updatePlots();
                 break;
             case "ArrowRight":
                 generations.value = Math.min(parseInt(generations.value) + 1, parseInt(generations.max));
+                currentGen.innerText = generations.value;
                 updatePlots();
                 break;
         }
     });
+    
+    // Initialize slider values
+    updateSliderValue(pitch, pitchValue);
+    updateSliderValue(yaw, yawValue);
+    
+    // Set initial ranges for function
+    updateInitPointRanges();
 }
 
 /** Setup canvas to properly handle high DPI and redraw current plot. */
-function setupCanvas(canvas, size) {
-    if (canvas.style === undefined) {
+function setupCanvas(canvas) {
+    if (!canvas || !canvas.style) {
         return;
     }
 
-    const aspectRatio = canvas.width / canvas.height;
-    //const size = canvas.parentNode.offsetWidth * 1.2;
-    canvas.style.width = size + "px";
-    canvas.style.height = size / aspectRatio + "px";
-    canvas.width = size;
-    canvas.height = size / aspectRatio;
+    const container = canvas.parentNode;
+    const containerWidth = container.offsetWidth - 20; // subtract padding
+    const originalAspectRatio = canvas.width / canvas.height;
+    
+    // Set display size (CSS pixels)
+    const displayWidth = Math.min(containerWidth, 950);
+    const displayHeight = displayWidth / originalAspectRatio;
+    
+    canvas.style.width = displayWidth + "px";
+    canvas.style.height = displayHeight + "px";
+}
+
+/** Resize all canvases */
+function resizeAllCanvases() {
+    [solutionCanvas, searchCanvas, overallCanvas, bestCanvas, durationCanvas, fitnessCanvas].forEach(canvas => {
+        setupCanvas(canvas);
+    });
+}
+
+/** Update slider display value */
+function updateSliderValue(slider, display) {
+    const value = (Number(slider.value) / 100.0).toFixed(2);
+    display.innerText = value;
+}
+
+/** Switch between Function and VRP benchmark types */
+function switchBenchmarkType() {
+    const type = benchmarkType.value;
+    if (type === 'function') {
+        functionControls.classList.remove('hide');
+        vrpControls.classList.add('hide');
+    } else {
+        functionControls.classList.add('hide');
+        vrpControls.classList.remove('hide');
+        
+        // Show message if no file loaded
+        if (!Chart.data) {
+            status.innerText = 'Please load a VRP problem file';
+            status.classList.remove('success', 'loading');
+        }
+    }
+    changePlot();
+}
+
+/** Toggle between automatic and manual initial point selection */
+function toggleInitPointMode() {
+    if (autoInitPoint.checked) {
+        manualPointControls.classList.add('hide');
+    } else {
+        manualPointControls.classList.remove('hide');
+    }
+}
+
+/** Update initial point input ranges based on selected function */
+function updateInitPointRanges() {
+    const functionName = plotFunction.value;
+    let min, max;
+    
+    switch(functionName) {
+        case 'rosenbrock':
+            min = -2.0; max = 2.0;
+            break;
+        case 'rastrigin':
+            min = -5.12; max = 5.12;
+            break;
+        case 'himmelblau':
+            min = -5.0; max = 5.0;
+            break;
+        case 'ackley':
+            min = -5.0; max = 5.0;
+            break;
+        case 'matyas':
+            min = -10.0; max = 10.0;
+            break;
+        default:
+            min = -5.0; max = 5.0;
+    }
+    
+    initX.min = min;
+    initX.max = max;
+    initZ.min = min;
+    initZ.max = max;
+    
+    // Reset to center if out of range
+    if (parseFloat(initX.value) < min || parseFloat(initX.value) > max) {
+        initX.value = 0;
+    }
+    if (parseFloat(initZ.value) < min || parseFloat(initZ.value) > max) {
+        initZ.value = 0;
+    }
 }
 
 /** Changes plot **/
 function changePlot() {
     Chart.clear();
-    generations.classList.add("hide");
+    generationControl.classList.add("hide");
+    currentGen.innerText = "0";
+    updateInitPointRanges();
     updatePlots();
 }
 
@@ -123,6 +232,11 @@ function openFile(event) {
 
         Chart.data = content;
         run.classList.remove("hide");
+        
+        // Update status to show file loaded
+        status.innerText = `✓ File loaded: ${input.files[0].name}`;
+        status.classList.add('success');
+        status.classList.remove('loading');
     };
     reader.readAsText(input.files[0]);
 }
@@ -139,10 +253,8 @@ function updateDynamicPlots(run) {
     let population_type = plotPopulation.selectedOptions[0].value;
     let heuristic_kind = "best";
 
-    coordLabel.innerText = `Rotation: pitch=${pitch_value}, yaw=${yaw_value}`
-
-    // TODO configure parameters from outside
-    let max_gen = 2000
+    // Get max generations from user input
+    let max_gen = parseInt(maxGenerations.value);
 
     const start = performance.now();
     switch (getExperimentType()) {
@@ -172,29 +284,36 @@ function updateDynamicPlots(run) {
             if (run) {
                 let function_name = plotFunction.selectedOptions[0].value;
                 var x = 0.0, z = 0.0;
-                switch(function_name) {
-                    case 'rosenbrock':
-                        x = getRandomInRange(-2.0, 2.0)
-                        z = getRandomInRange(-2.0, 2.0)
-                        break;
-                    case 'rastrigin':
-                        x = getRandomInRange(-5.12, 5.12)
-                        z = getRandomInRange(-5.12, 5.12)
-                        break;
-                    case 'himmelblau':
-                        x = getRandomInRange(-5.0, 5.0)
-                        z = getRandomInRange(-5.0, 5.0)
-                        break;
-                    case 'ackley':
-                        x = getRandomInRange(-5.0, 5.0)
-                        z = getRandomInRange(-5.0, 5.0)
-                        break;
-                    case 'matyas':
-                        x = getRandomInRange(-10.0, 10.0)
-                        z = getRandomInRange(-10.0, 10.0)
-                        break;
-                    default:
-                        break;
+                
+                // Use manual point if checkbox is unchecked, otherwise random
+                if (!autoInitPoint.checked) {
+                    x = parseFloat(initX.value);
+                    z = parseFloat(initZ.value);
+                } else {
+                    switch(function_name) {
+                        case 'rosenbrock':
+                            x = getRandomInRange(-2.0, 2.0)
+                            z = getRandomInRange(-2.0, 2.0)
+                            break;
+                        case 'rastrigin':
+                            x = getRandomInRange(-5.12, 5.12)
+                            z = getRandomInRange(-5.12, 5.12)
+                            break;
+                        case 'himmelblau':
+                            x = getRandomInRange(-5.0, 5.0)
+                            z = getRandomInRange(-5.0, 5.0)
+                            break;
+                        case 'ackley':
+                            x = getRandomInRange(-5.0, 5.0)
+                            z = getRandomInRange(-5.0, 5.0)
+                            break;
+                        case 'matyas':
+                            x = getRandomInRange(-10.0, 10.0)
+                            z = getRandomInRange(-10.0, 10.0)
+                            break;
+                        default:
+                            break;
+                    }
                 }
 
                 console.log(`init point is: (${x}, ${z})`)
@@ -213,24 +332,35 @@ function updateDynamicPlots(run) {
                 }
             }
 
-            Chart.vrp(solutionCanvas, generation_value, pitch_value, yaw_value);
+            // Only render if data has been loaded
+            if (Chart.data) {
+                Chart.vrp(solutionCanvas, generation_value, pitch_value, yaw_value);
+            }
             break;
         }
     }
 
-    Chart.search_iteration(searchCanvas, generation_value, heuristic_kind);
-    Chart.search_best_statistics(bestCanvas, generation_value, heuristic_kind);
-    Chart.search_duration_statistics(durationCanvas, generation_value, heuristic_kind);
-    Chart.search_overall_statistics(overallCanvas, generation_value, heuristic_kind);
+    // Only render statistics if there's data to display
+    if (Chart.data || getExperimentType() === 'function') {
+        Chart.search_iteration(searchCanvas, generation_value, heuristic_kind);
+        Chart.search_best_statistics(bestCanvas, generation_value, heuristic_kind);
+        Chart.search_duration_statistics(durationCanvas, generation_value, heuristic_kind);
+        Chart.search_overall_statistics(overallCanvas, generation_value, heuristic_kind);
+    }
 
     const end = performance.now();
 
     if (run) {
         generations.max = max_gen;
-        generations.classList.remove("hide");
+        maxGen.innerText = max_gen;
+        currentGen.innerText = "0";
+        generations.value = "0";
+        generationControl.classList.remove("hide");
     }
 
-    status.innerText = `Generation: ${generation_value} rendered in ${Math.ceil(end - start)}ms`;
+    status.innerText = `Generation: ${generation_value} | Rendered in ${Math.ceil(end - start)}ms`;
+    status.classList.remove('loading');
+    status.classList.add('success');
 }
 
 function updateStaticPlots() {
@@ -239,15 +369,29 @@ function updateStaticPlots() {
             Chart.fitness_func(fitnessCanvas);
             break;
         case 'vrp':
-            Chart.fitness_vrp(fitnessCanvas)
+            // Only render if data has been loaded
+            if (Chart.data) {
+                Chart.fitness_vrp(fitnessCanvas);
+            }
             break;
         }
 }
 
 /** Runs experiment. */
 function runExperiment() {
-    updateDynamicPlots(true);
-    updateStaticPlots(true);
+    run.disabled = true;
+    run.innerHTML = '⏳ Running...';
+    status.innerText = 'Running experiment...';
+    status.classList.add('loading');
+    status.classList.remove('success');
+    
+    // Use setTimeout to allow UI to update
+    setTimeout(() => {
+        updateDynamicPlots(true);
+        updateStaticPlots(true);
+        run.disabled = false;
+        run.innerHTML = '▶ Run Experiment';
+    }, 50);
 }
 
 function updatePlots() {
@@ -256,24 +400,7 @@ function updatePlots() {
 }
 
 function getExperimentType() {
-    const buttons = document.querySelectorAll('.tablinks-vert');
-    for (const button of buttons) {
-        if (button.classList.contains('active')) {
-            switch (button.textContent) {
-                case 'Function Bench':
-                    return 'function'
-                case 'VRP Bench':
-                    return 'vrp'
-                default:
-                    console.error("unknown experiment type: '" + button.textContent + "'");
-                    return 'function'
-            }
-        }
-    }
-
-    console.error("no active tab detected");
-
-    return 'function';
+    return benchmarkType.value;
 }
 
 function openTab(evt, containerId, tabId, suffix) {
diff --git a/experiments/heuristic-research/www/style.css b/experiments/heuristic-research/www/style.css
index b22d5cf22..f3e4fa2e7 100644
--- a/experiments/heuristic-research/www/style.css
+++ b/experiments/heuristic-research/www/style.css
@@ -1,20 +1,44 @@
+/* CSS Variables for easy theming */
+:root {
+    --primary-color: #0366d6;
+    --primary-hover: #0256c5;
+    --secondary-color: #586069;
+    --background: #ffffff;
+    --surface: #f6f8fa;
+    --border: #e1e4e8;
+    --text-primary: #24292e;
+    --text-secondary: #586069;
+    --success: #28a745;
+    --warning: #ffa500;
+    --shadow: rgba(0, 0, 0, 0.1);
+    --transition: all 0.3s ease;
+}
+
 html, body, main {
     width: 100%;
     margin: 0;
     padding: 0;
 }
 
+* {
+    box-sizing: border-box;
+}
+
 body {
     margin: auto;
-    max-width: 840px;
+    max-width: 1000px;
     display: flex;
     flex-direction: column;
+    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
+    color: var(--text-primary);
+    background-color: var(--background);
+    line-height: 1.6;
+    padding: 10px;
 }
 
-@media (max-width: 800px) {
+@media (max-width: 768px) {
     body {
-        padding: 10px;
-        box-sizing: border-box;
+        padding: 5px;
     }
 }
 
@@ -22,187 +46,389 @@ main {
     display: flex;
     flex-direction: column;
     align-items: center;
+    padding: 0;
+}
+
+h1 {
+    font-size: 1.5rem;
+    font-weight: 600;
+    margin: 10px 0 15px 0;
+    color: var(--text-primary);
+    text-align: center;
 }
 
 footer {
-    margin-top: 2em;
+    margin-top: 1.5em;
+    padding: 1em 0;
     font-size: 12px;
     text-align: center;
+    border-top: 1px solid var(--border);
+    color: var(--text-secondary);
+}
+
+footer a {
+    color: var(--primary-color);
+    text-decoration: none;
+    margin: 0 8px;
+    transition: var(--transition);
+}
+
+footer a:hover {
+    text-decoration: underline;
 }
 
 label, #coordLabel, #status {
-    color: grey;
-    font-size: 10px;
-    height: 15px
+    color: var(--text-secondary);
+    font-size: 11px;
+    font-weight: 500;
+    height: auto;
+    margin-bottom: 4px;
+    display: block;
+}
+
+/* Control panels */
+.controls-panel {
+    display: flex;
+    flex-direction: column;
+    gap: 8px;
+}
+
+.control-group {
+    margin-bottom: 0;
+}
+
+.control-group label {
+    display: flex;
+    align-items: center;
+}
+
+.control-row {
+    display: flex;
+    gap: 10px;
+}
+
+.flex-1 {
+    flex: 1;
+}
+
+.benchmark-controls {
+    display: flex;
+    flex-direction: column;
+    gap: 8px;
+}
+
+input[type="checkbox"] {
+    width: 16px;
+    height: 16px;
+    cursor: pointer;
+    accent-color: var(--primary-color);
+}
+
+input[type="number"] {
+    font-family: inherit;
+    color: var(--text-primary);
+}
+
+input[type="number"]:focus {
+    outline: none;
+    border-color: var(--primary-color) !important;
+    box-shadow: 0 0 0 3px rgba(3, 102, 214, 0.1);
+}
+
+.action-panel {
+    justify-content: flex-start;
+    min-width: 200px;
+    gap: 10px;
+}
+
+.run-button {
+    flex-shrink: 0;
+}
+
+.status-box {
+    flex-shrink: 0;
 }
 
 
 #run {
     display: block;
-    margin-right: auto;
-    margin-left: auto;
+    margin: 0 auto;
 }
 
 .hide {
-    visibility: hidden;
-    height: 0px;
+    display: none !important;
 }
 
 .row {
     display: flex;
-    width: 840px;
+    width: 100%;
+    gap: 10px;
+    margin-bottom: 10px;
+}
+
+@media (max-width: 768px) {
+    .row {
+        flex-direction: column;
+        gap: 8px;
+    }
 }
 
 .column {
-    flex: 50%;
-    padding: 8px;
+    flex: 1;
+    padding: 12px;
+    background: var(--surface);
+    border-radius: 6px;
+    border: 1px solid var(--border);
 }
 
 .block {
     display: block;
     width: 100%;
     border: none;
-    background-color: #00b7e4;
-    padding: 14px 28px;
-    font-size: 16px;
+    background-color: var(--primary-color);
+    color: white;
+    padding: 12px 24px;
+    font-size: 15px;
+    font-weight: 600;
     cursor: pointer;
     text-align: center;
+    border-radius: 6px;
+    transition: var(--transition);
+    box-shadow: 0 1px 3px var(--shadow);
 }
 
-/* Change background color of buttons on hover */
 .block:hover {
-    background-color: #0ecfff;
+    background-color: var(--primary-hover);
+    box-shadow: 0 2px 6px var(--shadow);
+    transform: translateY(-1px);
+}
+
+.block:active {
+    transform: translateY(0);
+    box-shadow: 0 1px 2px var(--shadow);
 }
 
-* {box-sizing: border-box}
+.block:disabled {
+    opacity: 0.6;
+    cursor: not-allowed;
+    transform: none;
+}
 
 /* Horizontal Tabs */
 .tab {
-    overflow: hidden;
-    border: 1px solid #ccc;
-    background-color: #f1f1f1;
+    display: flex;
+    flex-wrap: wrap;
+    gap: 3px;
+    background-color: var(--surface);
+    padding: 6px;
+    border-radius: 6px 6px 0 0;
+    border: 1px solid var(--border);
+    border-bottom: none;
 }
 
-/* Style the buttons that are used to open the tab content */
 .tab button {
-    background-color: inherit;
-    float: left;
+    background-color: transparent;
     border: none;
     outline: none;
     cursor: pointer;
-    padding: 14px 16px;
-    transition: 0.3s;
+    padding: 8px 12px;
+    font-size: 12px;
+    font-weight: 500;
+    color: var(--text-secondary);
+    border-radius: 4px;
+    transition: var(--transition);
+    white-space: nowrap;
 }
 
-/* Change background color of buttons on hover */
 .tab button:hover {
-    background-color: #ddd;
+    background-color: rgba(3, 102, 214, 0.1);
+    color: var(--primary-color);
 }
 
-/* Create an active/current tablink class */
 .tab button.active {
-    background-color: #ccc;
+    background-color: white;
+    color: var(--primary-color);
+    box-shadow: 0 1px 3px var(--shadow);
 }
 
-/* Style the tab content */
 .tabcontent {
     display: none;
-    padding: 6px 12px;
-    border: 1px solid #ccc;
+    padding: 10px;
+    background: white;
+    border: 1px solid var(--border);
     border-top: none;
+    border-radius: 0 0 6px 6px;
+}
+
+.tabcontent canvas {
+    display: block;
+    margin: 0 auto;
+    max-width: 100%;
+    width: 100%;
+    height: auto;
+    border-radius: 4px;
 }
 
-/* Vertical Tab */
+/* Select */
+.select-wrap {
+    border: 1px solid var(--border);
+    border-radius: 6px;
+    padding: 6px 10px;
+    width: 100%;
+    background-color: white;
+    position: relative;
+    margin-bottom: 10px;
+    transition: var(--transition);
+}
 
-.tab-vert {
-    float: left;
-    border: 1px solid #ccc;
-    background-color: #f1f1f1;
-    width: 30%;
-    height: 200px;
+.select-wrap:focus-within {
+    border-color: var(--primary-color);
+    box-shadow: 0 0 0 3px rgba(3, 102, 214, 0.1);
 }
 
-/* Style the buttons that are used to open the tab content */
-.tab-vert button {
-    display: block;
-    background-color: inherit;
-    color: black;
-    padding: 22px 16px;
+.select-wrap label {
+    font-size: 9px;
+    text-transform: uppercase;
+    color: var(--text-secondary);
+    padding: 0;
+    position: absolute;
+    top: 6px;
+    left: 10px;
+    font-weight: 600;
+    letter-spacing: 0.5px;
+}
+
+select {
+    background-color: transparent;
+    border: 0;
+    height: 40px;
+    font-size: 14px;
+    color: var(--text-primary);
     width: 100%;
-    border: none;
+    padding-top: 14px;
+    cursor: pointer;
     outline: none;
-    text-align: left;
+}
+
+select option {
+    padding: 10px;
+}
+
+/* File uploader */
+.input-container {
+    border: 1px solid var(--border);
+    border-radius: 6px;
+    overflow: hidden;
+    background: white;
+}
+
+input[type=file] {
+    width: 100%;
+    font-size: 12px;
+    color: var(--text-primary);
+}
+
+input[type=file]::file-selector-button {
+    background-color: var(--primary-color);
+    color: white;
+    border: 0;
+    border-right: 1px solid var(--border);
+    padding: 8px 12px;
+    margin-right: 10px;
+    font-weight: 600;
+    font-size: 12px;
     cursor: pointer;
-    transition: 0.3s;
+    transition: var(--transition);
 }
 
-/* Change background color of buttons on hover */
-.tab-vert button:hover {
-    background-color: #ddd;
+input[type=file]::file-selector-button:hover {
+    background-color: var(--primary-hover);
 }
 
-/* Create an active/current "tab button" class */
-.tab-vert button.active {
-    background-color: #ccc;
+/* Range inputs (sliders) */
+input[type=range] {
+    width: 100%;
+    height: 4px;
+    border-radius: 2px;
+    background: var(--border);
+    outline: none;
+    margin: 4px 0 0 0;
+    -webkit-appearance: none;
 }
 
-/* Style the tab content */
-.tabcontent-vert {
-    padding: 8px;
-    float: left;
-    border: 1px solid #ccc;
-    width: 70%;
-    border-left: none;
-    height: 200px;
+input[type=range]::-webkit-slider-thumb {
+    -webkit-appearance: none;
+    appearance: none;
+    width: 14px;
+    height: 14px;
+    border-radius: 50%;
+    background: var(--primary-color);
+    cursor: pointer;
+    transition: var(--transition);
+    box-shadow: 0 1px 3px var(--shadow);
 }
 
-#commonControls {
-    padding: 16px;
+input[type=range]::-webkit-slider-thumb:hover {
+    background: var(--primary-hover);
+    transform: scale(1.1);
 }
 
-/* Select */
-.select-wrap {
-    border: 1px solid #777;
-    border-radius: 4px;
-    padding: 0 5px;
-    width: 140px;
-    background-color:#fff;
-    position:relative;
+input[type=range]::-moz-range-thumb {
+    width: 14px;
+    height: 14px;
+    border-radius: 50%;
+    background: var(--primary-color);
+    cursor: pointer;
+    border: none;
+    transition: var(--transition);
+    box-shadow: 0 1px 3px var(--shadow);
 }
-.select-wrap label{
-    font-size:8px;
-    text-transform: uppercase;
-    color: #777;
-    padding: 0 8px;
-    position: absolute;
-    top:6px;
+
+input[type=range]::-moz-range-thumb:hover {
+    background: var(--primary-hover);
+    transform: scale(1.1);
 }
 
-select{
-    background-color: #fff;
-    border:0px;
-    height:50px;
-    font-size: 16px;
+/* Loading spinner */
+.spinner {
+    display: inline-block;
+    width: 16px;
+    height: 16px;
+    border: 2px solid rgba(255, 255, 255, 0.3);
+    border-radius: 50%;
+    border-top-color: white;
+    animation: spin 0.8s linear infinite;
+    margin-left: 8px;
+    vertical-align: middle;
 }
 
-/* file uploader */
+@keyframes spin {
+    to { transform: rotate(360deg); }
+}
 
-.input-container {
-    border: 1px solid #e5e5e5;
+/* Status indicator */
+#status {
+    padding: 8px 12px;
+    background: var(--surface);
+    border-radius: 4px;
+    text-align: center;
+    margin: 0;
+    font-size: 11px;
+    border: 1px solid var(--border);
 }
 
-input[type=file]::file-selector-button {
-    background-color: #00b7e4;
-    color: #000;
-    border: 0px;
-    border-right: 1px solid #e5e5e5;
-    padding: 10px 15px;
-    margin-right: 20px;
-    transition: .5s;
+#status.loading {
+    color: var(--warning);
+    border-color: var(--warning);
 }
 
-input[type=file]::file-selector-button:hover {
-    background-color: #0ecfff;
-    border: 0px;
-    border-right: 1px solid #e5e5e5;
+#status.success {
+    color: var(--success);
+    border-color: var(--success);
+}
+
+/* Canvas container */
+#canvasTab {
+    width: 100%;
+    margin-top: 10px;
 }
\ No newline at end of file

From 07bad8a22b7a41abbf1ee73f41cec199e7f07b7f Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Tue, 13 Jan 2026 13:22:30 +0100
Subject: [PATCH 20/30] Support loading of raw experimental output for dynamic
 heuristic

---
 experiments/heuristic-research/src/lib.rs     |  1 +
 .../heuristic-research/src/plots/mod.rs       | 26 +++++-
 .../heuristic-research/src/solver/proxies.rs  | 25 ++++++
 .../heuristic-research/src/solver/state.rs    | 51 ++++++------
 .../src/solver/vrp/population.rs              |  6 +-
 experiments/heuristic-research/www/index.js   |  9 +++
 rosomaxa/src/hyper/dynamic_selective.rs       | 79 ++++++++++++++++---
 7 files changed, 155 insertions(+), 42 deletions(-)

diff --git a/experiments/heuristic-research/src/lib.rs b/experiments/heuristic-research/src/lib.rs
index f92a15ace..8e305e557 100644
--- a/experiments/heuristic-research/src/lib.rs
+++ b/experiments/heuristic-research/src/lib.rs
@@ -126,6 +126,7 @@ pub fn run_vrp_experiment(format_type: &str, problem: &str, population_type: &st
 /// Loads experiment data from json serialized representation.
 #[wasm_bindgen]
 pub fn load_state(data: &str) -> usize {
+    set_panic_hook_once();
     match ExperimentData::try_from(data) {
         Ok(data) => *EXPERIMENT_DATA.lock().unwrap() = data,
         Err(err) => web_sys::console::log_1(&err.into()),
diff --git a/experiments/heuristic-research/src/plots/mod.rs b/experiments/heuristic-research/src/plots/mod.rs
index 7d82726d9..33c35223b 100644
--- a/experiments/heuristic-research/src/plots/mod.rs
+++ b/experiments/heuristic-research/src/plots/mod.rs
@@ -247,7 +247,9 @@ fn get_search_config(generation: usize, kind: &str) -> SearchDrawConfig {
             let names_rev = data.heuristic_state.names.iter().map(|(k, v)| (*v, k)).collect::<HashMap<_, _>>();
             let _states_rev = data.heuristic_state.states.iter().map(|(k, v)| (*v, k)).collect::<HashMap<_, _>>();
 
-            data.heuristic_state.search_states.get(&generation).map(|states| {
+            // Find nearest available generation if exact match doesn't exist
+            let nearest_gen = find_nearest_generation(&data.heuristic_state.search_states, generation);
+            data.heuristic_state.search_states.get(&nearest_gen).map(|states| {
                 let estimations = states
                     .iter()
                     // NOTE: just show all transitions
@@ -264,7 +266,7 @@ fn get_search_config(generation: usize, kind: &str) -> SearchDrawConfig {
                         get_search_statistics(
                             &data.heuristic_state.search_states,
                             &names_rev,
-                            generation,
+                            nearest_gen,
                             |SearchResult(_, _, (_, to_state_idx), _)| to_state_idx == best_state_idx,
                             |acc, SearchResult(name_idx, ..)| {
                                 acc[*name_idx] += 1;
@@ -276,7 +278,7 @@ fn get_search_config(generation: usize, kind: &str) -> SearchDrawConfig {
                 let overall = get_search_statistics(
                     &data.heuristic_state.search_states,
                     &names_rev,
-                    generation,
+                    nearest_gen,
                     |_| true,
                     |acc, SearchResult(name_idx, ..)| {
                         acc[*name_idx] += 1;
@@ -287,7 +289,7 @@ fn get_search_config(generation: usize, kind: &str) -> SearchDrawConfig {
                 let durations = get_search_statistics(
                     &data.heuristic_state.search_states,
                     &names_rev,
-                    generation,
+                    nearest_gen,
                     |_| true,
                     |acc: &mut Vec<(usize, usize)>, SearchResult(name_idx, _, _, duration)| {
                         let (total, count) = (acc[*name_idx].0, acc[*name_idx].1);
@@ -305,6 +307,22 @@ fn get_search_config(generation: usize, kind: &str) -> SearchDrawConfig {
         .unwrap_or_default()
 }
 
+/// Finds the nearest available generation that is <= the requested generation.
+/// Returns the requested generation if it exists, otherwise the closest smaller one.
+fn find_nearest_generation(generations: &HashMap<usize, Vec<SearchResult>>, requested: usize) -> usize {
+    if generations.contains_key(&requested) {
+        return requested;
+    }
+    
+    // Find the largest generation that is smaller than requested
+    generations
+        .keys()
+        .filter(|&&g| g <= requested)
+        .copied()
+        .max()
+        .unwrap_or(requested)
+}
+
 fn get_search_statistics<T, FF, AF>(
     search_states: &HashMap<usize, Vec<SearchResult>>,
     names_rev: &HashMap<usize, &String>,
diff --git a/experiments/heuristic-research/src/solver/proxies.rs b/experiments/heuristic-research/src/solver/proxies.rs
index fd705b06f..197c2d3fd 100644
--- a/experiments/heuristic-research/src/solver/proxies.rs
+++ b/experiments/heuristic-research/src/solver/proxies.rs
@@ -41,6 +41,31 @@ impl<'a> TryFrom<&'a str> for ExperimentData {
     type Error = String;
 
     fn try_from(value: &'a str) -> Result<Self, Self::Error> {
+        // Check if this is telemetry CSV format (contains "TELEMETRY" somewhere in the content)
+        // Extract telemetry section if present, otherwise try JSON
+        if let Some(telemetry_start) = value.find("TELEMETRY") {
+            // Extract everything from TELEMETRY onwards
+            let telemetry_content = &value[telemetry_start..];
+            
+            // Parse telemetry CSV using existing parser
+            let heuristic_state = HyperHeuristicState::try_parse_all(telemetry_content)
+                .ok_or_else(|| "Failed to parse telemetry data".to_string())?;
+            
+            // Find max generation from telemetry data
+            let max_generation = heuristic_state.search_states.keys()
+                .chain(heuristic_state.heuristic_states.keys())
+                .copied()
+                .max()
+                .unwrap_or(0);
+            
+            let mut experiment_data = ExperimentData::default();
+            experiment_data.heuristic_state = heuristic_state;
+            experiment_data.generation = max_generation;
+            
+            return Ok(experiment_data);
+        }
+        
+        // Try parsing as JSON
         serde_json::from_str(value).map_err(|err| format!("cannot deserialize experiment data: {err}"))
     }
 }
diff --git a/experiments/heuristic-research/src/solver/state.rs b/experiments/heuristic-research/src/solver/state.rs
index fdb68f9cc..f2eff096d 100644
--- a/experiments/heuristic-research/src/solver/state.rs
+++ b/experiments/heuristic-research/src/solver/state.rs
@@ -172,31 +172,32 @@ impl HyperHeuristicState {
                 .values_mut()
                 .for_each(|states| states.sort_by(|SearchResult(a, ..), SearchResult(b, ..)| a.cmp(b)));
 
-            let mut heuristic_states = data.lines().skip_while(|line| *line != "heuristic:").skip(2).fold(
-                HashMap::<_, Vec<_>>::new(),
-                |mut data, line| {
-                    let fields: Vec<String> = line.split(',').map(|s| s.to_string()).collect();
-
-                    let generation: usize = fields[0].parse().unwrap();
-                    let state = fields[1].clone();
-                    let name = fields[2].clone();
-                    let alpha = fields[3].parse().unwrap();
-                    let beta = fields[4].parse().unwrap();
-                    let mu = fields[5].parse().unwrap();
-                    let v = fields[6].parse().unwrap();
-                    let n = fields[7].parse().unwrap();
-
-                    insert_to_map(&mut states, state.clone());
-                    insert_to_map(&mut names, name.clone());
-
-                    let state = states.get(&state).copied().unwrap();
-                    let name = names.get(&name).copied().unwrap();
-
-                    data.entry(generation).or_default().push(HeuristicResult(state, name, alpha, beta, mu, v, n));
-
-                    data
-                },
-            );
+            let mut heuristic_states =
+                data.lines().skip_while(|line| *line != "heuristic:").skip(2).take_while(|line| !line.is_empty()).fold(
+                    HashMap::<_, Vec<_>>::new(),
+                    |mut data, line| {
+                        let fields: Vec<String> = line.split(',').map(|s| s.to_string()).collect();
+
+                        let generation: usize = fields[0].parse().unwrap();
+                        let state = fields[1].clone();
+                        let name = fields[2].clone();
+                        let alpha = fields[3].parse().unwrap();
+                        let beta = fields[4].parse().unwrap();
+                        let mu = fields[5].parse().unwrap();
+                        let v = fields[6].parse().unwrap();
+                        let n = fields[7].parse().unwrap();
+
+                        insert_to_map(&mut states, state.clone());
+                        insert_to_map(&mut names, name.clone());
+
+                        let state = states.get(&state).copied().unwrap();
+                        let name = names.get(&name).copied().unwrap();
+
+                        data.entry(generation).or_default().push(HeuristicResult(state, name, alpha, beta, mu, v, n));
+
+                        data
+                    },
+                );
             heuristic_states
                 .values_mut()
                 .for_each(|states| states.sort_by(|HeuristicResult(_, a, ..), HeuristicResult(_, b, ..)| a.cmp(b)));
diff --git a/experiments/heuristic-research/src/solver/vrp/population.rs b/experiments/heuristic-research/src/solver/vrp/population.rs
index c14f5c663..ee485d4a8 100644
--- a/experiments/heuristic-research/src/solver/vrp/population.rs
+++ b/experiments/heuristic-research/src/solver/vrp/population.rs
@@ -10,7 +10,7 @@ pub fn get_population_fitness_fn(generation: usize) -> FitnessFn {
         .lock()
         .ok()
         .and_then(|data| data.on_generation.get(&generation).map(|(footprint, _)| footprint.clone()))
-        .map(|footprint| {
+        .map(|footprint| -> FitnessFn {
             Arc::new(move |input: &[Float]| {
                 if let &[from, to] = input {
                     footprint.get(from as usize, to as usize) as Float
@@ -19,7 +19,7 @@ pub fn get_population_fitness_fn(generation: usize) -> FitnessFn {
                 }
             })
         })
-        .expect("cannot get data from EXPERIMENT_DATA")
+        .unwrap_or_else(|| Arc::new(|_: &[Float]| 0.0) as FitnessFn)
 }
 
 /// Returns a description of population state.
@@ -32,5 +32,5 @@ pub fn get_population_desc(generation: usize) -> String {
                 format!("total [{}], known edges [{}]", individuals.len(), footprint.desc())
             })
         })
-        .expect("cannot get data from EXPERIMENT_DATA")
+        .unwrap_or_else(|| String::from("no data"))
 }
diff --git a/experiments/heuristic-research/www/index.js b/experiments/heuristic-research/www/index.js
index a1dcab44f..be706b17b 100644
--- a/experiments/heuristic-research/www/index.js
+++ b/experiments/heuristic-research/www/index.js
@@ -325,6 +325,15 @@ function updateDynamicPlots(run) {
         case 'vrp': {
             if (run) {
                 let format_type = vrpFormat.selectedOptions[0].value;
+                
+                // Check if data has been loaded
+                if (!Chart.data) {
+                    status.innerText = '⚠ Please load a VRP file first';
+                    status.classList.remove('success');
+                    status.classList.add('loading');
+                    return;
+                }
+                
                 if (format_type === "state") {
                     max_gen = Chart.load_state(Chart.data);
                 } else {
diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs
index f3a593f06..9b1bfadf6 100644
--- a/rosomaxa/src/hyper/dynamic_selective.rs
+++ b/rosomaxa/src/hyper/dynamic_selective.rs
@@ -642,23 +642,82 @@ where
             return Ok(());
         }
 
+        // To avoid WASM memory issues, downsample telemetry while preserving analysis capability
+        // Strategy: keep early samples, sample periodically, and keep recent samples
+        const MAX_SAMPLES: usize = 5000;
+        const EARLY_SAMPLES: usize = 500;
+        const RECENT_SAMPLES: usize = 500;
+
         f.write_fmt(format_args!("TELEMETRY\n"))?;
         f.write_fmt(format_args!("search:\n"))?;
         f.write_fmt(format_args!("name,generation,reward,from,to,duration\n"))?;
-        for (generation, sample) in self.agent.tracker.search_telemetry.iter() {
-            f.write_fmt(format_args!(
-                "{},{},{},{},{},{}\n",
-                sample.name, generation, sample.reward, sample.transition.0, sample.transition.1, sample.duration
-            ))?;
+
+        let search_total = self.agent.tracker.search_telemetry.len();
+        if search_total <= MAX_SAMPLES {
+            // Small enough, output all
+            for (generation, sample) in self.agent.tracker.search_telemetry.iter() {
+                f.write_fmt(format_args!(
+                    "{},{},{},{},{},{}\n",
+                    sample.name, generation, sample.reward, sample.transition.0, sample.transition.1, sample.duration
+                ))?;
+            }
+        } else {
+            // Downsample: early + periodic middle + recent
+            let middle_samples = MAX_SAMPLES - EARLY_SAMPLES - RECENT_SAMPLES;
+            let middle_start = EARLY_SAMPLES;
+            let middle_end = search_total - RECENT_SAMPLES;
+            let step = (middle_end - middle_start) / middle_samples;
+
+            for (i, (generation, sample)) in self.agent.tracker.search_telemetry.iter().enumerate() {
+                let include = i < EARLY_SAMPLES
+                    || i >= search_total - RECENT_SAMPLES
+                    || (i >= middle_start && i < middle_end && (i - middle_start) % step == 0);
+
+                if include {
+                    f.write_fmt(format_args!(
+                        "{},{},{},{},{},{}\n",
+                        sample.name,
+                        generation,
+                        sample.reward,
+                        sample.transition.0,
+                        sample.transition.1,
+                        sample.duration
+                    ))?;
+                }
+            }
         }
 
         f.write_fmt(format_args!("heuristic:\n"))?;
         f.write_fmt(format_args!("generation,state,name,alpha,beta,mu,v,n\n"))?;
-        for (generation, sample) in self.agent.tracker.heuristic_telemetry.iter() {
-            f.write_fmt(format_args!(
-                "{},{},{},{},{},{},{},{}\n",
-                generation, sample.state, sample.name, sample.alpha, sample.beta, sample.mu, sample.v, sample.n
-            ))?;
+
+        let heuristic_total = self.agent.tracker.heuristic_telemetry.len();
+        if heuristic_total <= MAX_SAMPLES {
+            // Small enough, output all
+            for (generation, sample) in self.agent.tracker.heuristic_telemetry.iter() {
+                f.write_fmt(format_args!(
+                    "{},{},{},{},{},{},{},{}\n",
+                    generation, sample.state, sample.name, sample.alpha, sample.beta, sample.mu, sample.v, sample.n
+                ))?;
+            }
+        } else {
+            // Downsample: early + periodic middle + recent
+            let middle_samples = MAX_SAMPLES - EARLY_SAMPLES - RECENT_SAMPLES;
+            let middle_start = EARLY_SAMPLES;
+            let middle_end = heuristic_total - RECENT_SAMPLES;
+            let step = (middle_end - middle_start) / middle_samples;
+
+            for (i, (generation, sample)) in self.agent.tracker.heuristic_telemetry.iter().enumerate() {
+                let include = i < EARLY_SAMPLES
+                    || i >= heuristic_total - RECENT_SAMPLES
+                    || (i >= middle_start && i < middle_end && (i - middle_start) % step == 0);
+
+                if include {
+                    f.write_fmt(format_args!(
+                        "{},{},{},{},{},{},{},{}\n",
+                        generation, sample.state, sample.name, sample.alpha, sample.beta, sample.mu, sample.v, sample.n
+                    ))?;
+                }
+            }
         }
 
         Ok(())

From 725aa17519f94af300dfb408bff2023f88c5a790 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Tue, 13 Jan 2026 20:49:08 +0100
Subject: [PATCH 21/30] Improve a bit code

---
 experiments/heuristic-research/src/plots/mod.rs    |  9 ++-------
 .../heuristic-research/src/solver/proxies.rs       | 14 ++++++++------
 vrp-core/src/solver/heuristic.rs                   |  8 ++++----
 vrp-core/src/solver/search/decompose_search.rs     |  2 +-
 4 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/experiments/heuristic-research/src/plots/mod.rs b/experiments/heuristic-research/src/plots/mod.rs
index 33c35223b..b24cb5b09 100644
--- a/experiments/heuristic-research/src/plots/mod.rs
+++ b/experiments/heuristic-research/src/plots/mod.rs
@@ -313,14 +313,9 @@ fn find_nearest_generation(generations: &HashMap<usize, Vec<SearchResult>>, requ
     if generations.contains_key(&requested) {
         return requested;
     }
-    
+
     // Find the largest generation that is smaller than requested
-    generations
-        .keys()
-        .filter(|&&g| g <= requested)
-        .copied()
-        .max()
-        .unwrap_or(requested)
+    generations.keys().filter(|&&g| g <= requested).copied().max().unwrap_or(requested)
 }
 
 fn get_search_statistics<T, FF, AF>(
diff --git a/experiments/heuristic-research/src/solver/proxies.rs b/experiments/heuristic-research/src/solver/proxies.rs
index 197c2d3fd..abc17a6f6 100644
--- a/experiments/heuristic-research/src/solver/proxies.rs
+++ b/experiments/heuristic-research/src/solver/proxies.rs
@@ -46,25 +46,27 @@ impl<'a> TryFrom<&'a str> for ExperimentData {
         if let Some(telemetry_start) = value.find("TELEMETRY") {
             // Extract everything from TELEMETRY onwards
             let telemetry_content = &value[telemetry_start..];
-            
+
             // Parse telemetry CSV using existing parser
             let heuristic_state = HyperHeuristicState::try_parse_all(telemetry_content)
                 .ok_or_else(|| "Failed to parse telemetry data".to_string())?;
-            
+
             // Find max generation from telemetry data
-            let max_generation = heuristic_state.search_states.keys()
+            let max_generation = heuristic_state
+                .search_states
+                .keys()
                 .chain(heuristic_state.heuristic_states.keys())
                 .copied()
                 .max()
                 .unwrap_or(0);
-            
+
             let mut experiment_data = ExperimentData::default();
             experiment_data.heuristic_state = heuristic_state;
             experiment_data.generation = max_generation;
-            
+
             return Ok(experiment_data);
         }
-        
+
         // Try parsing as JSON
         serde_json::from_str(value).map_err(|err| format!("cannot deserialize experiment data: {err}"))
     }
diff --git a/vrp-core/src/solver/heuristic.rs b/vrp-core/src/solver/heuristic.rs
index cc8c544f5..426286b47 100644
--- a/vrp-core/src/solver/heuristic.rs
+++ b/vrp-core/src/solver/heuristic.rs
@@ -574,22 +574,22 @@ mod dynamic {
                 "local_reschedule_departure".to_string(),
                 1.,
             ),
-            (Arc::new(LKHSearch::new(LKHSearchMode::ImprovementOnly)), "lkh_strict".to_string(), 5.),
+            (Arc::new(LKHSearch::new(LKHSearchMode::ImprovementOnly)), "lkh_strict".to_string(), 3.),
             (
                 Arc::new(LocalSearch::new(Arc::new(ExchangeSwapStar::new(
                     environment.random.clone(),
                     SINGLE_HEURISTIC_QUOTA_LIMIT,
                 )))),
                 "local_swap_star".to_string(),
-                10.,
+                5.,
             ),
             // decompose search methods with different inner heuristic
             (
                 create_variable_search_decompose_search(problem.clone(), environment.clone()),
                 "decompose_search_var".to_string(),
-                25.,
+                10.,
             ),
-            (create_composite_decompose_search(problem, environment), "decompose_search_com".to_string(), 25.),
+            (create_composite_decompose_search(problem, environment), "decompose_search_com".to_string(), 10.),
         ]
     }
 
diff --git a/vrp-core/src/solver/search/decompose_search.rs b/vrp-core/src/solver/search/decompose_search.rs
index 6855ac16d..7aa6e68e0 100644
--- a/vrp-core/src/solver/search/decompose_search.rs
+++ b/vrp-core/src/solver/search/decompose_search.rs
@@ -118,7 +118,7 @@ fn create_multiple_insertion_contexts(
 
     let route_groups = group_routes_by_proximity(insertion_ctx);
     let (min, max) = max_routes_range;
-    let max = if insertion_ctx.solution.routes.len() < 4 { 2 } else { max };
+    let max = if insertion_ctx.solution.routes.len() < max as usize { (max / 2).max(min) } else { max };
 
     // identify route groups and create contexts from them
     let used_indices = RefCell::new(HashSet::new());

From d9446e8511121596a1a5f73eb65584e279ffad2e Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Tue, 13 Jan 2026 20:55:09 +0100
Subject: [PATCH 22/30] Apply some refactorings and fix clippy warning

---
 .../heuristic-research/src/solver/proxies.rs  | 18 +++++++--------
 rosomaxa/src/algorithms/gsom/contraction.rs   |  2 +-
 rosomaxa/src/algorithms/gsom/network.rs       |  9 ++++++--
 rosomaxa/src/algorithms/gsom/state.rs         |  2 +-
 rosomaxa/src/hyper/dynamic_selective.rs       |  4 ++--
 rosomaxa/src/population/elitism.rs            |  9 +++++++-
 rosomaxa/src/population/greedy.rs             |  9 +++++++-
 rosomaxa/src/population/mod.rs                |  9 ++++++--
 rosomaxa/src/population/rosomaxa.rs           | 23 ++++++++++++++-----
 .../unit/algorithms/gsom/network_test.rs      |  6 ++---
 rosomaxa/tests/unit/population/greedy_test.rs |  2 +-
 11 files changed, 64 insertions(+), 29 deletions(-)

diff --git a/experiments/heuristic-research/src/solver/proxies.rs b/experiments/heuristic-research/src/solver/proxies.rs
index abc17a6f6..9601b958a 100644
--- a/experiments/heuristic-research/src/solver/proxies.rs
+++ b/experiments/heuristic-research/src/solver/proxies.rs
@@ -52,7 +52,7 @@ impl<'a> TryFrom<&'a str> for ExperimentData {
                 .ok_or_else(|| "Failed to parse telemetry data".to_string())?;
 
             // Find max generation from telemetry data
-            let max_generation = heuristic_state
+            let generation = heuristic_state
                 .search_states
                 .keys()
                 .chain(heuristic_state.heuristic_states.keys())
@@ -60,11 +60,7 @@ impl<'a> TryFrom<&'a str> for ExperimentData {
                 .max()
                 .unwrap_or(0);
 
-            let mut experiment_data = ExperimentData::default();
-            experiment_data.heuristic_state = heuristic_state;
-            experiment_data.generation = max_generation;
-
-            return Ok(experiment_data);
+            return Ok(ExperimentData { heuristic_state, generation, ..Default::default() });
         }
 
         // Try parsing as JSON
@@ -156,7 +152,7 @@ where
         self.generation = statistics.generation;
         self.acquire().generation = statistics.generation;
 
-        let individuals_data = self.inner.all().map(|individual| individual.into()).collect::<Vec<_>>();
+        let individuals_data = self.inner.iter().map(|individual| individual.into()).collect::<Vec<_>>();
         // NOTE this is not exactly how footprint is calculated in production code.
         // In the production version, approximation of the footprint is used to avoid iterating over
         // all individuals in the population on generation update.
@@ -191,8 +187,12 @@ where
         self.inner.ranked()
     }
 
-    fn all(&self) -> Box<dyn Iterator<Item = &'_ Self::Individual> + '_> {
-        self.inner.all()
+    fn iter(&self) -> Box<dyn Iterator<Item = &'_ Self::Individual> + '_> {
+        self.inner.iter()
+    }
+
+    fn into_iter(self) -> Box<dyn Iterator<Item = Self::Individual>> {
+        self.inner.into_iter()
     }
 
     fn size(&self) -> usize {
diff --git a/rosomaxa/src/algorithms/gsom/contraction.rs b/rosomaxa/src/algorithms/gsom/contraction.rs
index ab3c2133f..e94884310 100644
--- a/rosomaxa/src/algorithms/gsom/contraction.rs
+++ b/rosomaxa/src/algorithms/gsom/contraction.rs
@@ -27,7 +27,7 @@ where
 
     // find nodes which should be removed
     let removed = network
-        .get_nodes()
+        .iter_nodes()
         .map(|node| node.coordinate)
         .filter(|coord| coord.0 % x_decim == 0 || coord.1 % y_decim == 0)
         .collect::<Vec<_>>();
diff --git a/rosomaxa/src/algorithms/gsom/network.rs b/rosomaxa/src/algorithms/gsom/network.rs
index 79b63d774..11bb4ce7d 100644
--- a/rosomaxa/src/algorithms/gsom/network.rs
+++ b/rosomaxa/src/algorithms/gsom/network.rs
@@ -210,7 +210,7 @@ where
     }
 
     /// Return nodes in arbitrary order.
-    pub fn get_nodes(&self) -> impl Iterator<Item = &Node<I, S>> + '_ {
+    pub fn iter_nodes(&self) -> impl Iterator<Item = &Node<I, S>> + '_ {
         self.nodes.values()
     }
 
@@ -219,6 +219,11 @@ where
         self.nodes.iter()
     }
 
+    /// Iterates over coordinates and their nodes.
+    pub fn into_iter(self) -> impl Iterator<Item = (Coordinate, Node<I, S>)> {
+        self.nodes.into_iter()
+    }
+
     /// Returns a total amount of nodes.
     pub fn size(&self) -> usize {
         self.nodes.len()
@@ -242,7 +247,7 @@ where
 
     /// Returns max unified distance of the network.
     pub fn max_unified_distance(&self) -> Float {
-        self.get_nodes().map(|node| node.unified_distance(self, 1)).max_by(|a, b| a.total_cmp(b)).unwrap_or_default()
+        self.iter_nodes().map(|node| node.unified_distance(self, 1)).max_by(|a, b| a.total_cmp(b)).unwrap_or_default()
     }
 
     /// Performs training loop multiple times.
diff --git a/rosomaxa/src/algorithms/gsom/state.rs b/rosomaxa/src/algorithms/gsom/state.rs
index 9e7414efa..415cd7928 100644
--- a/rosomaxa/src/algorithms/gsom/state.rs
+++ b/rosomaxa/src/algorithms/gsom/state.rs
@@ -48,7 +48,7 @@ where
     let mse = network.mse();
 
     let nodes = network
-        .get_nodes()
+        .iter_nodes()
         .map(|node| {
             let mut dump = String::new();
             write!(dump, "{}", node.storage).unwrap();
diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs
index 9b1bfadf6..e821555d9 100644
--- a/rosomaxa/src/hyper/dynamic_selective.rs
+++ b/rosomaxa/src/hyper/dynamic_selective.rs
@@ -671,7 +671,7 @@ where
             for (i, (generation, sample)) in self.agent.tracker.search_telemetry.iter().enumerate() {
                 let include = i < EARLY_SAMPLES
                     || i >= search_total - RECENT_SAMPLES
-                    || (i >= middle_start && i < middle_end && (i - middle_start) % step == 0);
+                    || (i >= middle_start && i < middle_end && (i - middle_start).is_multiple_of(step));
 
                 if include {
                     f.write_fmt(format_args!(
@@ -709,7 +709,7 @@ where
             for (i, (generation, sample)) in self.agent.tracker.heuristic_telemetry.iter().enumerate() {
                 let include = i < EARLY_SAMPLES
                     || i >= heuristic_total - RECENT_SAMPLES
-                    || (i >= middle_start && i < middle_end && (i - middle_start) % step == 0);
+                    || (i >= middle_start && i < middle_end && (i - middle_start).is_multiple_of(step));
 
                 if include {
                     f.write_fmt(format_args!(
diff --git a/rosomaxa/src/population/elitism.rs b/rosomaxa/src/population/elitism.rs
index 70bfdeca3..f94de862c 100644
--- a/rosomaxa/src/population/elitism.rs
+++ b/rosomaxa/src/population/elitism.rs
@@ -92,10 +92,17 @@ where
         Box::new(self.individuals.iter())
     }
 
-    fn all(&self) -> Box<dyn Iterator<Item = &'_ Self::Individual> + '_> {
+    fn iter(&self) -> Box<dyn Iterator<Item = &'_ Self::Individual> + '_> {
         Box::new(self.individuals.iter())
     }
 
+    fn into_iter(self) -> Box<dyn Iterator<Item = Self::Individual>>
+    where
+        Self::Individual: 'static,
+    {
+        Box::new(self.individuals.into_iter())
+    }
+
     fn size(&self) -> usize {
         self.individuals.len()
     }
diff --git a/rosomaxa/src/population/greedy.rs b/rosomaxa/src/population/greedy.rs
index cf911eb83..a4f78b754 100644
--- a/rosomaxa/src/population/greedy.rs
+++ b/rosomaxa/src/population/greedy.rs
@@ -62,10 +62,17 @@ where
         Box::new(self.best_known.iter())
     }
 
-    fn all(&self) -> Box<dyn Iterator<Item = &'_ Self::Individual> + '_> {
+    fn iter(&self) -> Box<dyn Iterator<Item = &'_ Self::Individual> + '_> {
         Box::new(self.best_known.iter())
     }
 
+    fn into_iter(self) -> Box<dyn Iterator<Item = Self::Individual>>
+    where
+        Self::Individual: 'static,
+    {
+        Box::new(self.best_known.into_iter())
+    }
+
     fn size(&self) -> usize {
         usize::from(self.best_known.is_some())
     }
diff --git a/rosomaxa/src/population/mod.rs b/rosomaxa/src/population/mod.rs
index b9183b986..31936cb5c 100644
--- a/rosomaxa/src/population/mod.rs
+++ b/rosomaxa/src/population/mod.rs
@@ -52,8 +52,13 @@ pub trait HeuristicPopulation: Send + Sync {
     /// Returns subset of individuals within their rank sorted according their quality.
     fn ranked(&self) -> Box<dyn Iterator<Item = &'_ Self::Individual> + '_>;
 
-    /// Returns all individuals in arbitrary order.
-    fn all(&self) -> Box<dyn Iterator<Item = &'_ Self::Individual> + '_>;
+    /// Returns iterator over all individuals in arbitrary order.
+    fn iter(&self) -> Box<dyn Iterator<Item = &'_ Self::Individual> + '_>;
+
+    /// Consumes population and returns an iterator over all individuals in arbitrary order.
+    fn into_iter(self) -> Box<dyn Iterator<Item = Self::Individual>>
+    where
+        Self::Individual: 'static;
 
     /// Returns population size.
     fn size(&self) -> usize;
diff --git a/rosomaxa/src/population/rosomaxa.rs b/rosomaxa/src/population/rosomaxa.rs
index 245af97e2..1a7a8a30a 100644
--- a/rosomaxa/src/population/rosomaxa.rs
+++ b/rosomaxa/src/population/rosomaxa.rs
@@ -89,9 +89,9 @@ where
 
 impl<C, O, S> HeuristicPopulation for Rosomaxa<C, O, S>
 where
-    C: RosomaxaContext<Solution = S>,
-    O: HeuristicObjective<Solution = S> + Alternative,
-    S: RosomaxaSolution<Context = C>,
+    C: RosomaxaContext<Solution = S> + 'static,
+    O: HeuristicObjective<Solution = S> + Alternative + 'static,
+    S: RosomaxaSolution<Context = C> + 'static,
 {
     type Objective = O;
     type Individual = S;
@@ -177,12 +177,23 @@ where
         self.elite.ranked()
     }
 
-    fn all(&self) -> Box<dyn Iterator<Item = &'_ Self::Individual> + '_> {
+    fn iter(&self) -> Box<dyn Iterator<Item = &'_ Self::Individual> + '_> {
         match &self.phase {
             RosomaxaPhases::Exploration { network, .. } => {
-                Box::new(self.elite.all().chain(network.get_nodes().flat_map(|node| node.storage.population.all())))
+                Box::new(self.elite.iter().chain(network.iter_nodes().flat_map(|node| node.storage.population.iter())))
             }
-            _ => self.elite.all(),
+            _ => self.elite.iter(),
+        }
+    }
+
+    fn into_iter(self) -> Box<dyn Iterator<Item = Self::Individual>> {
+        match self.phase {
+            RosomaxaPhases::Exploration { network, .. } => Box::new(
+                self.elite
+                    .into_iter()
+                    .chain(network.into_iter().flat_map(|(_, node)| node.storage.population.into_iter())),
+            ),
+            _ => self.elite.into_iter(),
         }
     }
 
diff --git a/rosomaxa/tests/unit/algorithms/gsom/network_test.rs b/rosomaxa/tests/unit/algorithms/gsom/network_test.rs
index 6d99200db..f1b64ca9a 100644
--- a/rosomaxa/tests/unit/algorithms/gsom/network_test.rs
+++ b/rosomaxa/tests/unit/algorithms/gsom/network_test.rs
@@ -153,7 +153,7 @@ fn can_create_network() {
     assert_eq!(count_data_stored(&network.nodes), initial_data.len());
 
     // Check node properties
-    for node in network.get_nodes() {
+    for node in network.iter_nodes() {
         assert_eq!(node.weights.len(), 3);
         assert!(node.error >= 0.);
     }
@@ -294,7 +294,7 @@ fn can_create_network_with_spiral_distribution() {
     assert!(non_empty_nodes >= (size / 10), "too sparse {non_empty_nodes}");
     assert!(network.size() <= (size * 2), "too big {}", network.size());
     let distances: Vec<_> = network
-        .get_nodes()
+        .iter_nodes()
         .flat_map(|node| node.storage.data.iter().map(|data| euclidian_distance(&node.weights, data.weights())))
         .collect();
     let avg_distance = distances.iter().sum::<Float>() / distances.len() as Float;
@@ -466,7 +466,7 @@ fn can_create_new_network_empty_regions() {
 
     let network = NetworkType::new(&(), initial_data, config, random, |_| DataStorageFactory).unwrap();
 
-    let nodes_vec: Vec<_> = network.get_nodes().collect();
+    let nodes_vec: Vec<_> = network.iter_nodes().collect();
     let total_pairs = (nodes_vec.len() * (nodes_vec.len() - 1)) / 2;
     let failed_pairs = (0..nodes_vec.len())
         .flat_map(|i| {
diff --git a/rosomaxa/tests/unit/population/greedy_test.rs b/rosomaxa/tests/unit/population/greedy_test.rs
index 0053849c0..aa90b17e6 100644
--- a/rosomaxa/tests/unit/population/greedy_test.rs
+++ b/rosomaxa/tests/unit/population/greedy_test.rs
@@ -51,7 +51,7 @@ fn can_select_when_empty() {
     let population = Greedy::<_, _>::new(objective, 1, None);
 
     assert_eq!(population.select().count(), 0);
-    assert_eq!(population.all().count(), 0);
+    assert_eq!(population.iter().count(), 0);
 }
 
 #[test]

From ba470dc8d747a65ca32f7bbd8ea81f3c7b2af12c Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Tue, 13 Jan 2026 23:16:53 +0100
Subject: [PATCH 23/30] Add into iterators for getting individuals from context

---
 .../heuristic-research/src/solver/proxies.rs       |  4 ++--
 rosomaxa/src/algorithms/gsom/network.rs            |  2 +-
 rosomaxa/src/lib.rs                                |  7 ++++++-
 rosomaxa/src/population/elitism.rs                 |  2 +-
 rosomaxa/src/population/greedy.rs                  |  2 +-
 rosomaxa/src/population/mod.rs                     |  2 +-
 rosomaxa/src/population/rosomaxa.rs                | 14 +++++++-------
 vrp-core/src/solver/mod.rs                         |  5 +++++
 8 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/experiments/heuristic-research/src/solver/proxies.rs b/experiments/heuristic-research/src/solver/proxies.rs
index 9601b958a..ec1b56e3b 100644
--- a/experiments/heuristic-research/src/solver/proxies.rs
+++ b/experiments/heuristic-research/src/solver/proxies.rs
@@ -191,8 +191,8 @@ where
         self.inner.iter()
     }
 
-    fn into_iter(self) -> Box<dyn Iterator<Item = Self::Individual>> {
-        self.inner.into_iter()
+    fn into_iter(self: Box<Self>) -> Box<dyn Iterator<Item = Self::Individual>> {
+        Box::new(self.inner).into_iter()
     }
 
     fn size(&self) -> usize {
diff --git a/rosomaxa/src/algorithms/gsom/network.rs b/rosomaxa/src/algorithms/gsom/network.rs
index 11bb4ce7d..9d43e9f34 100644
--- a/rosomaxa/src/algorithms/gsom/network.rs
+++ b/rosomaxa/src/algorithms/gsom/network.rs
@@ -220,7 +220,7 @@ where
     }
 
     /// Iterates over coordinates and their nodes.
-    pub fn into_iter(self) -> impl Iterator<Item = (Coordinate, Node<I, S>)> {
+    pub fn into_iter_nodes(self) -> impl Iterator<Item = (Coordinate, Node<I, S>)> {
         self.nodes.into_iter()
     }
 
diff --git a/rosomaxa/src/lib.rs b/rosomaxa/src/lib.rs
index 07cb400c9..859296933 100644
--- a/rosomaxa/src/lib.rs
+++ b/rosomaxa/src/lib.rs
@@ -163,7 +163,7 @@ where
 impl<O, S> TelemetryHeuristicContext<O, S>
 where
     O: HeuristicObjective<Solution = S>,
-    S: HeuristicSolution,
+    S: HeuristicSolution + 'static,
 {
     /// Creates a new instance of `TelemetryHeuristicContext`.
     pub fn new(
@@ -176,6 +176,11 @@ where
         Self { objective, population, telemetry, environment }
     }
 
+    /// Consumes context and returns all individuals.
+    pub fn into_individuals(self) -> Box<dyn Iterator<Item = S>> {
+        self.population.into_iter()
+    }
+
     /// Adds solution to population.
     pub fn add_solution(&mut self, solution: S) {
         self.population.add(solution);
diff --git a/rosomaxa/src/population/elitism.rs b/rosomaxa/src/population/elitism.rs
index f94de862c..7b8b8c116 100644
--- a/rosomaxa/src/population/elitism.rs
+++ b/rosomaxa/src/population/elitism.rs
@@ -96,7 +96,7 @@ where
         Box::new(self.individuals.iter())
     }
 
-    fn into_iter(self) -> Box<dyn Iterator<Item = Self::Individual>>
+    fn into_iter(self: Box<Self>) -> Box<dyn Iterator<Item = Self::Individual>>
     where
         Self::Individual: 'static,
     {
diff --git a/rosomaxa/src/population/greedy.rs b/rosomaxa/src/population/greedy.rs
index a4f78b754..30e899d7d 100644
--- a/rosomaxa/src/population/greedy.rs
+++ b/rosomaxa/src/population/greedy.rs
@@ -66,7 +66,7 @@ where
         Box::new(self.best_known.iter())
     }
 
-    fn into_iter(self) -> Box<dyn Iterator<Item = Self::Individual>>
+    fn into_iter(self: Box<Self>) -> Box<dyn Iterator<Item = Self::Individual>>
     where
         Self::Individual: 'static,
     {
diff --git a/rosomaxa/src/population/mod.rs b/rosomaxa/src/population/mod.rs
index 31936cb5c..2b82a1627 100644
--- a/rosomaxa/src/population/mod.rs
+++ b/rosomaxa/src/population/mod.rs
@@ -56,7 +56,7 @@ pub trait HeuristicPopulation: Send + Sync {
     fn iter(&self) -> Box<dyn Iterator<Item = &'_ Self::Individual> + '_>;
 
     /// Consumes population and returns an iterator over all individuals in arbitrary order.
-    fn into_iter(self) -> Box<dyn Iterator<Item = Self::Individual>>
+    fn into_iter(self: Box<Self>) -> Box<dyn Iterator<Item = Self::Individual>>
     where
         Self::Individual: 'static;
 
diff --git a/rosomaxa/src/population/rosomaxa.rs b/rosomaxa/src/population/rosomaxa.rs
index 1a7a8a30a..102eccd8a 100644
--- a/rosomaxa/src/population/rosomaxa.rs
+++ b/rosomaxa/src/population/rosomaxa.rs
@@ -186,14 +186,14 @@ where
         }
     }
 
-    fn into_iter(self) -> Box<dyn Iterator<Item = Self::Individual>> {
+    fn into_iter(self: Box<Self>) -> Box<dyn Iterator<Item = Self::Individual>> {
         match self.phase {
-            RosomaxaPhases::Exploration { network, .. } => Box::new(
-                self.elite
-                    .into_iter()
-                    .chain(network.into_iter().flat_map(|(_, node)| node.storage.population.into_iter())),
-            ),
-            _ => self.elite.into_iter(),
+            RosomaxaPhases::Exploration { network, .. } => {
+                Box::new(Box::new(self.elite).into_iter().chain(
+                    network.into_iter_nodes().flat_map(|(_, node)| Box::new(node.storage.population).into_iter()),
+                ))
+            }
+            _ => Box::new(self.elite).into_iter(),
         }
     }
 
diff --git a/vrp-core/src/solver/mod.rs b/vrp-core/src/solver/mod.rs
index 7dd90c513..53a4e3796 100644
--- a/vrp-core/src/solver/mod.rs
+++ b/vrp-core/src/solver/mod.rs
@@ -124,6 +124,11 @@ impl RefinementContext {
         Self { problem, environment, inner_context, state: Default::default(), initial_footprint }
     }
 
+    /// Consumes context and returns all individuals.
+    pub fn into_individuals(self) -> Box<dyn Iterator<Item = InsertionContext>> {
+        self.inner_context.into_individuals()
+    }
+
     /// Adds solution to population.
     pub fn add_solution(&mut self, solution: InsertionContext) {
         self.inner_context.add_solution(solution);

From 01a7516d7054ab8d15dbf0843053b8cd76838f60 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Tue, 13 Jan 2026 23:49:19 +0100
Subject: [PATCH 24/30] Modify decompose search to diversify solution if no
 improvement

---
 .../src/solver/search/decompose_search.rs     | 41 ++++++++++++-------
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/vrp-core/src/solver/search/decompose_search.rs b/vrp-core/src/solver/search/decompose_search.rs
index 7aa6e68e0..70b100177 100644
--- a/vrp-core/src/solver/search/decompose_search.rs
+++ b/vrp-core/src/solver/search/decompose_search.rs
@@ -90,11 +90,25 @@ impl DecomposeSearch {
             (refinement_ctx, route_indices)
         });
 
-        // merge evolution results into one insertion_ctx
-        let mut insertion_ctx = decomposed.into_iter().fold(
-            InsertionContext::new_empty(refinement_ctx.problem.clone(), refinement_ctx.environment.clone()),
-            |insertion_ctx, decomposed| merge_best(decomposed, original_insertion_ctx, insertion_ctx),
-        );
+        // get new and old parts and detect if there was any improvement in any part
+        let ((new_parts, old_parts), improvements): ((Vec<_>, Vec<_>), Vec<_>) =
+            decomposed.into_iter().map(|decomposed| get_solution_parts(decomposed, original_insertion_ctx)).unzip();
+
+        let has_improvements = improvements.iter().any(|is_improvement| *is_improvement);
+
+        let mut insertion_ctx = if has_improvements {
+            improvements.into_iter().zip(new_parts.into_iter().zip(old_parts.into_iter())).fold(
+                InsertionContext::new_empty(refinement_ctx.problem.clone(), refinement_ctx.environment.clone()),
+                |accumulated, (is_improvement, (new_part, old_part))| {
+                    merge_parts(if is_improvement { new_part } else { old_part }, accumulated)
+                },
+            )
+        } else {
+            new_parts.into_iter().fold(
+                InsertionContext::new_empty(refinement_ctx.problem.clone(), refinement_ctx.environment.clone()),
+                |accumulated, new_part| merge_parts(new_part, accumulated),
+            )
+        };
 
         insertion_ctx.restore();
         finalize_insertion_ctx(&mut insertion_ctx);
@@ -248,24 +262,23 @@ fn decompose_insertion_context(
         .and_then(|contexts| if contexts.len() > 1 { Some(contexts) } else { None })
 }
 
-fn merge_best(
+fn get_solution_parts(
     decomposed: (RefinementContext, HashSet<usize>),
     original_insertion_ctx: &InsertionContext,
-    accumulated: InsertionContext,
-) -> InsertionContext {
+) -> ((SolutionContext, SolutionContext), bool) {
     let (decomposed_ctx, route_indices) = decomposed;
-    let decomposed_insertion_ctx = decomposed_ctx.ranked().next().expect(GREEDY_ERROR);
+    let decomposed_insertion_ctx = decomposed_ctx.into_individuals().next().expect(GREEDY_ERROR);
     let environment = original_insertion_ctx.environment.clone();
 
     let (partial_insertion_ctx, _) = create_partial_insertion_ctx(original_insertion_ctx, environment, route_indices);
     let goal = partial_insertion_ctx.problem.goal.as_ref();
 
-    let source_solution = if goal.total_order(decomposed_insertion_ctx, &partial_insertion_ctx) == Ordering::Less {
-        &decomposed_insertion_ctx.solution
-    } else {
-        &partial_insertion_ctx.solution
-    };
+    let is_improvement = goal.total_order(&decomposed_insertion_ctx, &partial_insertion_ctx) == Ordering::Less;
+
+    ((decomposed_insertion_ctx.solution, partial_insertion_ctx.solution), is_improvement)
+}
 
+fn merge_parts(source_solution: SolutionContext, accumulated: InsertionContext) -> InsertionContext {
     let mut accumulated = accumulated;
     let dest_solution = &mut accumulated.solution;
 

From 4ba697b154f4065817f1f680a59b6bd3d79bb468 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Wed, 14 Jan 2026 00:15:41 +0100
Subject: [PATCH 25/30] Fix clippy warning

---
 vrp-core/src/solver/search/decompose_search.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vrp-core/src/solver/search/decompose_search.rs b/vrp-core/src/solver/search/decompose_search.rs
index 70b100177..a16b29633 100644
--- a/vrp-core/src/solver/search/decompose_search.rs
+++ b/vrp-core/src/solver/search/decompose_search.rs
@@ -97,7 +97,7 @@ impl DecomposeSearch {
         let has_improvements = improvements.iter().any(|is_improvement| *is_improvement);
 
         let mut insertion_ctx = if has_improvements {
-            improvements.into_iter().zip(new_parts.into_iter().zip(old_parts.into_iter())).fold(
+            improvements.into_iter().zip(new_parts.into_iter().zip(old_parts)).fold(
                 InsertionContext::new_empty(refinement_ctx.problem.clone(), refinement_ctx.environment.clone()),
                 |accumulated, (is_improvement, (new_part, old_part))| {
                     merge_parts(if is_improvement { new_part } else { old_part }, accumulated)

From baec260189840464b04ea452fac8e69bb76c0123 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Wed, 14 Jan 2026 19:59:44 +0100
Subject: [PATCH 26/30] Add some improvements on heuristics

---
 rosomaxa/src/hyper/dynamic_selective.rs       |  47 ++--
 rosomaxa/src/utils/random.rs                  |  46 ++++
 vrp-cli/src/extensions/solve/config.rs        |   4 +-
 vrp-core/src/solver/heuristic.rs              |  77 ++++---
 .../src/solver/search/decompose_search.rs     | 213 ++++++++++++++----
 .../solver/search/local/exchange_swap_star.rs |  12 +-
 .../solver/search/decompose_search_test.rs    |   2 +-
 .../search/local/exchange_swap_star_test.rs   |   4 +-
 8 files changed, 289 insertions(+), 116 deletions(-)

diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs
index e821555d9..0ebe11296 100644
--- a/rosomaxa/src/hyper/dynamic_selective.rs
+++ b/rosomaxa/src/hyper/dynamic_selective.rs
@@ -6,7 +6,7 @@ use super::*;
 use crate::Timer;
 use crate::algorithms::math::RemedianUsize;
 use crate::algorithms::rl::{SlotAction, SlotFeedback, SlotMachine};
-use crate::utils::{DefaultDistributionSampler, random_argmax};
+use crate::utils::{DefaultDistributionSampler, select_softmax_index};
 use std::cmp::Ordering;
 use std::collections::HashMap;
 use std::fmt::Formatter;
@@ -123,26 +123,31 @@ impl SearchRewards {
 
     /// Multiplier applied when a solution improves the Global Best Known.
     /// This is the "Jackpot" factor.
-    pub const GLOBAL_BEST_MULTIPLIER: Float = 2.5;
+    pub const GLOBAL_BEST_MULTIPLIER: Float = 2.0;
 
     /// Multiplier applied when a solution is diverse but not a global improvement.
     /// Keeps "Diverse" operators alive but with low signal.
-    pub const DIVERSE_MULTIPLIER: Float = 0.05;
+    pub const DIVERSE_MULTIPLIER: Float = 0.01;
 
     /// The maximum percentage (+/-) that execution duration can affect the reward.
     /// e.g., 0.2 means performance can scale reward by [0.8, 1.2].
     pub const PERF_TOLERANCE: Float = 0.2;
 
+    /// Softmax/Boltzmann policy temperature range (over Thompson samples).
+    /// Higher temperature => more exploration.
+    pub const SOFTMAX_TEMPERATURE_MIN: Float = 0.3;
+    pub const SOFTMAX_TEMPERATURE_MAX: Float = 2.0;
+
     /// The "Cheap Failure"
     /// The penalty applied when an operator produces no improvement.
     /// A small negative value ensures that "doing nothing" is worse than "finding diverse solutions".
     /// This forces the Slot Machine to eventually lower the confidence of stagnating operators.
-    pub const PENALTY_MIN: Float = -0.01;
+    pub const PENALTY_MIN: Float = -0.5;
 
     /// The "Expensive Failure"
     /// The penalty applied when an operator produces a negative outcome.
     /// A larger negative value ensures that failures are strongly discouraged.
-    pub const PENALTY_MAX: Float = -0.1;
+    pub const PENALTY_MAX: Float = -2.0;
 
     /// Calculates the ratio between the "Jackpot" and the "Normal" signal.
     /// Used by the Agent to determine how many times larger than the average
@@ -157,6 +162,14 @@ impl SearchRewards {
 
         max_theoretical / typical_reward
     }
+
+    /// Derives a temperature based on the search progress.
+    /// When the search plateaus, we increase exploration.
+    pub fn softmax_temperature(improvement_1000_ratio: Float) -> Float {
+        // In other places we treat 10% improvement as "fast flow".
+        let flow = (improvement_1000_ratio * 10.0).clamp(0.0, 1.0);
+        Self::SOFTMAX_TEMPERATURE_MIN + (1.0 - flow) * (Self::SOFTMAX_TEMPERATURE_MAX - Self::SOFTMAX_TEMPERATURE_MIN)
+    }
 }
 
 /// Search state for Thompson sampling.
@@ -334,14 +347,11 @@ where
         };
 
         // Get contextually appropriate slot machines.
-        let (slot_idx, slot_machine) = self
-            .slot_machines
-            .get(&from)
-            .and_then(|slots| {
-                random_argmax(slots.iter().map(|(slot, _)| slot.sample()), self.random.as_ref())
-                    .and_then(|slot_idx| slots.get(slot_idx).map(|(slot, _)| (slot_idx, slot)))
-            })
-            .expect("cannot get slot machine");
+        let slots = self.slot_machines.get(&from).expect("cannot get slot machines");
+        let temperature = SearchRewards::softmax_temperature(heuristic_ctx.statistics().improvement_1000_ratio);
+        let samples = slots.iter().map(|(slot, _)| slot.sample()).collect::<Vec<_>>();
+        let slot_idx = select_softmax_index(&samples, temperature, self.random.as_ref());
+        let slot_machine = &slots[slot_idx].0;
 
         let approx_median = self.tracker.approx_median();
 
@@ -430,15 +440,16 @@ where
             let distance_initial = get_relative_distance(objective, new_solution, initial_solution);
             let distance_best = get_relative_distance(objective, new_solution, best_known);
 
-            // Reward components (max ~4.0 for local, ~10.0 for global).
             let reward_initial = (distance_initial + SearchRewards::DISTANCE_OFFSET) * SearchRewards::BASE_REWARD;
-            let reward_best = (distance_best + SearchRewards::DISTANCE_OFFSET)
-                * SearchRewards::BASE_REWARD
-                * SearchRewards::GLOBAL_BEST_MULTIPLIER;
 
             match (distance_initial.total_cmp(&0.), distance_best.total_cmp(&0.)) {
                 // Global Jackpot
-                (Ordering::Greater, Ordering::Greater) => Some(reward_initial + reward_best),
+                (Ordering::Greater, Ordering::Greater) => {
+                    let reward_best = (distance_best + SearchRewards::DISTANCE_OFFSET)
+                        * SearchRewards::BASE_REWARD
+                        * SearchRewards::GLOBAL_BEST_MULTIPLIER;
+                    Some(reward_initial + reward_best)
+                }
 
                 // Local/Diverse Improvement
                 (Ordering::Greater, _) => Some(reward_initial * SearchRewards::DIVERSE_MULTIPLIER),
diff --git a/rosomaxa/src/utils/random.rs b/rosomaxa/src/utils/random.rs
index 4bbeb1909..bb51da416 100644
--- a/rosomaxa/src/utils/random.rs
+++ b/rosomaxa/src/utils/random.rs
@@ -219,3 +219,49 @@ where
         })
         .map(|(idx, _)| idx)
 }
+
+/// Selects an index from samples using softmax (Boltzmann) distribution with given temperature.
+pub fn select_softmax_index(samples: &[Float], temperature: Float, random: &dyn Random) -> usize {
+    assert!(!samples.is_empty());
+
+    // If we ever see +inf, treat it as a hard-max tie among +inf arms.
+    if samples.iter().any(|&s| s.is_infinite() && s.is_sign_positive()) {
+        let idxs = samples
+            .iter()
+            .enumerate()
+            .filter(|(_, s)| s.is_infinite() && s.is_sign_positive())
+            .map(|(idx, _)| idx)
+            .collect::<Vec<_>>();
+        let pick = random.uniform_int(0, (idxs.len() - 1) as i32) as usize;
+        return idxs[pick];
+    }
+
+    let temperature = temperature.max(f64::EPSILON);
+    let max_sample = samples.iter().copied().filter(|s| s.is_finite()).max_by(|a, b| a.total_cmp(b)).unwrap_or(0.0);
+
+    // Stable softmax: w_i = exp((s_i - max_s) / T). Since (s_i - max_s) <= 0,
+    // weights are in (0, 1] and never overflow.
+    let mut weights = Vec::with_capacity(samples.len());
+    let mut sum = 0.0;
+    for &s in samples {
+        let w = if s.is_finite() { ((s - max_sample) / temperature).exp() } else { 0.0 };
+        sum += w;
+        weights.push(w);
+    }
+
+    if sum <= f64::EPSILON {
+        // Degenerate case: fall back to argmax tie-break.
+        return random_argmax(samples.iter().copied(), random).unwrap_or(0);
+    }
+
+    let mut r = random.uniform_real(0.0, sum);
+    for (idx, w) in weights.iter().enumerate() {
+        r -= *w;
+        if r <= 0.0 {
+            return idx;
+        }
+    }
+
+    // Numerical edge: return last.
+    samples.len() - 1
+}
diff --git a/vrp-cli/src/extensions/solve/config.rs b/vrp-cli/src/extensions/solve/config.rs
index d31289390..133597b14 100644
--- a/vrp-cli/src/extensions/solve/config.rs
+++ b/vrp-cli/src/extensions/solve/config.rs
@@ -625,7 +625,7 @@ fn create_operator(
 
             let operator = create_default_heuristic_operator(problem, environment.clone());
             (
-                Arc::new(DecomposeSearch::new(operator, (routes.min, routes.max), *repeat, 200)),
+                Arc::new(DecomposeSearch::new(operator, (routes.min, routes.max), *repeat)),
                 create_operator_probability(probability, environment.random.clone()),
             )
         }
@@ -699,7 +699,7 @@ fn create_local_search(
     let operators = inners
         .iter()
         .map::<(Arc<dyn LocalOperator>, usize), _>(|op| match op {
-            LocalOperatorType::SwapStar { weight } => (Arc::new(ExchangeSwapStar::new(random.clone(), 200)), *weight),
+            LocalOperatorType::SwapStar { weight } => (Arc::new(ExchangeSwapStar::new(random.clone())), *weight),
             LocalOperatorType::InterRouteBest { weight, noise } => {
                 (Arc::new(ExchangeInterRouteBest::new(noise.probability, noise.min, noise.max)), *weight)
             }
diff --git a/vrp-core/src/solver/heuristic.rs b/vrp-core/src/solver/heuristic.rs
index 426286b47..a0d59e807 100644
--- a/vrp-core/src/solver/heuristic.rs
+++ b/vrp-core/src/solver/heuristic.rs
@@ -134,7 +134,7 @@ pub fn get_static_heuristic(
 
     let heuristic_group: TargetHeuristicGroup = vec![
         (
-            Arc::new(DecomposeSearch::new(default_operator.clone(), (2, 4), 4, SINGLE_HEURISTIC_QUOTA_LIMIT)),
+            Arc::new(DecomposeSearch::new(default_operator.clone(), (2, 4), 4)),
             create_context_operator_probability(
                 300,
                 10,
@@ -279,8 +279,6 @@ fn get_limits(problem: &Problem) -> (RemovalLimits, RemovalLimits) {
     (normal_limits, small_limits)
 }
 
-const SINGLE_HEURISTIC_QUOTA_LIMIT: usize = 200;
-
 pub use self::builder::create_default_init_operators;
 pub use self::builder::create_default_processing;
 pub use self::statik::create_default_heuristic_operator;
@@ -475,7 +473,7 @@ mod statik {
     pub fn create_default_local_search(random: Arc<dyn Random>) -> TargetSearchOperator {
         Arc::new(LocalSearch::new(Arc::new(CompositeLocalOperator::new(
             vec![
-                (Arc::new(ExchangeSwapStar::new(random, SINGLE_HEURISTIC_QUOTA_LIMIT)), 200),
+                (Arc::new(ExchangeSwapStar::new(random)), 200),
                 (Arc::new(ExchangeInterRouteBest::default()), 100),
                 (Arc::new(ExchangeSequence::default()), 100),
                 (Arc::new(ExchangeInterRouteRandom::default()), 30),
@@ -494,8 +492,8 @@ mod dynamic {
     fn get_weighted_recreates(problem: &Problem, random: Arc<dyn Random>) -> Vec<(Arc<dyn Recreate>, String, Float)> {
         let cheapest: Arc<dyn Recreate> = Arc::new(RecreateWithCheapest::new(random.clone()));
         vec![
-            (cheapest.clone(), "cheapest".to_string(), 1.),
-            (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), "blinks".to_string(), 3.),
+            (cheapest.clone(), "cheapest".to_string(), 2.),
+            (Arc::new(RecreateWithBlinks::new_with_defaults(random.clone())), "blinks".to_string(), 2.),
             (Arc::new(RecreateWithSkipBest::new(1, 2, random.clone())), "skip_best".to_string(), 1.),
             (Arc::new(RecreateWithRegret::new(1, 3, random.clone())), "regret".to_string(), 1.),
             (Arc::new(RecreateWithPerturbation::new_with_defaults(random.clone())), "perturbation".to_string(), 1.),
@@ -537,19 +535,19 @@ mod dynamic {
             (
                 create_weighted(|limits| Arc::new(AdjustedStringRemoval::new_with_defaults(limits))),
                 "asr".to_string(),
-                7.,
+                2.,
             ),
-            (create_weighted(|limits| Arc::new(NeighbourRemoval::new(limits))), "neighbour".to_string(), 5.),
-            (create_weighted(|limits| Arc::new(WorstRouteRemoval::new(limits))), "worst_route".to_string(), 5.),
-            (create_weighted(|limits| Arc::new(WorstJobRemoval::new(4, limits))), "worst_job".to_string(), 4.),
-            (create_weighted(|limits| Arc::new(CloseRouteRemoval::new(limits))), "close_route".to_string(), 4.),
-            (create_weighted(|limits| Arc::new(RandomJobRemoval::new(limits))), "random_job".to_string(), 4.),
-            (create_weighted(|limits| Arc::new(RandomRouteRemoval::new(limits))), "random_route".to_string(), 2.),
-            (Arc::new(ClusterRemoval::new_with_defaults(problem).unwrap()), "cluster".to_string(), 4.),
+            (create_weighted(|limits| Arc::new(NeighbourRemoval::new(limits))), "neighbour".to_string(), 1.),
+            (create_weighted(|limits| Arc::new(WorstRouteRemoval::new(limits))), "worst_route".to_string(), 1.),
+            (create_weighted(|limits| Arc::new(WorstJobRemoval::new(4, limits))), "worst_job".to_string(), 1.),
+            (create_weighted(|limits| Arc::new(CloseRouteRemoval::new(limits))), "close_route".to_string(), 1.),
+            (create_weighted(|limits| Arc::new(RandomJobRemoval::new(limits))), "random_job".to_string(), 1.),
+            (create_weighted(|limits| Arc::new(RandomRouteRemoval::new(limits))), "random_route".to_string(), 1.),
+            (Arc::new(ClusterRemoval::new_with_defaults(problem).unwrap()), "cluster".to_string(), 1.),
         ]
     }
 
-    fn get_mutations(
+    fn get_search_operators(
         problem: Arc<Problem>,
         environment: Arc<Environment>,
     ) -> Vec<(TargetSearchOperator, String, Float)> {
@@ -574,22 +572,14 @@ mod dynamic {
                 "local_reschedule_departure".to_string(),
                 1.,
             ),
-            (Arc::new(LKHSearch::new(LKHSearchMode::ImprovementOnly)), "lkh_strict".to_string(), 3.),
+            (Arc::new(LKHSearch::new(LKHSearchMode::ImprovementOnly)), "lkh_strict".to_string(), 1.),
             (
-                Arc::new(LocalSearch::new(Arc::new(ExchangeSwapStar::new(
-                    environment.random.clone(),
-                    SINGLE_HEURISTIC_QUOTA_LIMIT,
-                )))),
+                Arc::new(LocalSearch::new(Arc::new(ExchangeSwapStar::new(environment.random.clone())))),
                 "local_swap_star".to_string(),
-                5.,
-            ),
-            // decompose search methods with different inner heuristic
-            (
-                create_variable_search_decompose_search(problem.clone(), environment.clone()),
-                "decompose_search_var".to_string(),
-                10.,
+                2.,
             ),
-            (create_composite_decompose_search(problem, environment), "decompose_search_com".to_string(), 10.),
+            // combined decompose search with different inner heuristics
+            (create_combined_decompose_search(problem, environment), "decompose_search".to_string(), 2.),
         ]
     }
 
@@ -621,22 +611,30 @@ mod dynamic {
                     (
                         Arc::new(RuinAndRecreate::new(ruin.clone(), recreate.clone())),
                         format!("{ruin_name}+{recreate_name}"),
-                        ruin_weight * recreate_weight,
+                        ruin_weight + recreate_weight,
                     )
                 })
             })
             .collect::<Vec<_>>();
 
-        let mutations = get_mutations(problem.clone(), environment.clone());
+        let operators = get_search_operators(problem.clone(), environment.clone());
         let heuristic_filter = problem.extras.get_heuristic_filter();
 
         ruin_recreate_ops
             .into_iter()
-            .chain(mutations)
+            .chain(operators)
             .filter(|(_, name, _)| heuristic_filter.as_ref().is_none_or(|filter| (filter)(name.as_str())))
             .collect::<Vec<_>>()
     }
 
+    /// Creates a default operator which is good for general use.
+    pub fn create_default_good_operator(problem: Arc<Problem>, environment: Arc<Environment>) -> TargetSearchOperator {
+        Arc::new(RuinAndRecreate::new(
+            Arc::new(AdjustedStringRemoval::new_with_defaults(get_limits(problem.as_ref()).0)),
+            Arc::new(RecreateWithBlinks::new_with_defaults(environment.random.clone())),
+        ))
+    }
+
     /// Creates a default ruin-and-recreate operator for internal use (e.g., decompose search, infeasible search).
     /// Uses weighted ruins and recreates from the main operator building logic.
     pub fn create_default_inner_ruin_recreate(
@@ -663,7 +661,7 @@ mod dynamic {
     pub fn create_default_local_search(random: Arc<dyn Random>) -> Arc<LocalSearch> {
         Arc::new(LocalSearch::new(Arc::new(CompositeLocalOperator::new(
             vec![
-                (Arc::new(ExchangeSwapStar::new(random, SINGLE_HEURISTIC_QUOTA_LIMIT / 4)), 2),
+                (Arc::new(ExchangeSwapStar::new(random)), 2),
                 (Arc::new(ExchangeInterRouteBest::default()), 1),
                 (Arc::new(ExchangeInterRouteRandom::default()), 1),
                 (Arc::new(ExchangeIntraRouteRandom::default()), 1),
@@ -682,13 +680,13 @@ mod dynamic {
             Arc::new(WeightedHeuristicOperator::new(
                 vec![
                     create_default_inner_ruin_recreate(problem.clone(), environment.clone()),
+                    create_default_good_operator(problem, environment.clone()),
                     create_default_local_search(environment.random.clone()),
                 ],
-                vec![10, 1],
+                vec![9, 3, 1],
             )),
             (2, 4),
             2,
-            SINGLE_HEURISTIC_QUOTA_LIMIT,
         ))
     }
 
@@ -707,7 +705,16 @@ mod dynamic {
             ])),
             (2, 4),
             2,
-            SINGLE_HEURISTIC_QUOTA_LIMIT,
+        ))
+    }
+
+    fn create_combined_decompose_search(problem: Arc<Problem>, environment: Arc<Environment>) -> TargetSearchOperator {
+        Arc::new(WeightedHeuristicOperator::new(
+            vec![
+                create_variable_search_decompose_search(problem.clone(), environment.clone()),
+                create_composite_decompose_search(problem, environment),
+            ],
+            vec![1, 1],
         ))
     }
 }
diff --git a/vrp-core/src/solver/search/decompose_search.rs b/vrp-core/src/solver/search/decompose_search.rs
index a16b29633..139e319d9 100644
--- a/vrp-core/src/solver/search/decompose_search.rs
+++ b/vrp-core/src/solver/search/decompose_search.rs
@@ -8,7 +8,6 @@ use crate::solver::search::create_environment_with_custom_quota;
 use crate::solver::*;
 use crate::utils::Either;
 use rosomaxa::utils::parallel_into_collect;
-use std::cell::RefCell;
 use std::cmp::Ordering;
 use std::collections::HashSet;
 use std::iter::{empty, once};
@@ -19,21 +18,15 @@ pub struct DecomposeSearch {
     inner_search: TargetSearchOperator,
     max_routes_range: (i32, i32),
     repeat_count: usize,
-    quota_limit: usize,
 }
 
 impl DecomposeSearch {
     /// Create a new instance of `DecomposeSearch`.
-    pub fn new(
-        inner_search: TargetSearchOperator,
-        max_routes_range: (usize, usize),
-        repeat_count: usize,
-        quota_limit: usize,
-    ) -> Self {
+    pub fn new(inner_search: TargetSearchOperator, max_routes_range: (usize, usize), repeat_count: usize) -> Self {
         assert!(max_routes_range.0 > 1);
         let max_routes_range = (max_routes_range.0 as i32, max_routes_range.1 as i32);
 
-        Self { inner_search, max_routes_range, repeat_count, quota_limit }
+        Self { inner_search, max_routes_range, repeat_count }
     }
 }
 
@@ -46,15 +39,9 @@ impl HeuristicSearchOperator for DecomposeSearch {
         let refinement_ctx = heuristic_ctx;
         let insertion_ctx = solution;
 
-        decompose_insertion_context(
-            refinement_ctx,
-            insertion_ctx,
-            self.max_routes_range,
-            self.repeat_count,
-            self.quota_limit,
-        )
-        .map(|contexts| self.refine_decomposed(refinement_ctx, insertion_ctx, contexts))
-        .unwrap_or_else(|| self.inner_search.search(heuristic_ctx, insertion_ctx))
+        decompose_insertion_context(refinement_ctx, insertion_ctx, self.max_routes_range, self.repeat_count)
+            .map(|contexts| self.refine_decomposed(refinement_ctx, contexts))
+            .unwrap_or_else(|| self.inner_search.search(heuristic_ctx, insertion_ctx))
     }
 }
 
@@ -64,21 +51,23 @@ impl DecomposeSearch {
     fn refine_decomposed(
         &self,
         refinement_ctx: &RefinementContext,
-        original_insertion_ctx: &InsertionContext,
         decomposed: Vec<(RefinementContext, HashSet<usize>)>,
     ) -> InsertionContext {
-        // NOTE: validate decomposition
+        // NOTE: validate decomposition in debug builds only
+        #[cfg(debug_assertions)]
         decomposed.iter().enumerate().for_each(|(outer_ix, (_, outer))| {
             decomposed.iter().enumerate().filter(|(inner_idx, _)| outer_ix != *inner_idx).for_each(
                 |(_, (_, inner))| {
-                    assert!(outer.intersection(inner).next().is_none());
+                    debug_assert!(outer.intersection(inner).next().is_none());
                 },
             );
         });
 
         // do actual refinement independently for each decomposed context
         let decomposed = parallel_into_collect(decomposed, |(mut refinement_ctx, route_indices)| {
-            let _ = (0..self.repeat_count).try_for_each(|_| {
+            let actual_repeat_count = get_repeat_count(self.repeat_count, refinement_ctx.environment.random.as_ref());
+
+            let _ = (0..actual_repeat_count).try_for_each(|_| {
                 let insertion_ctx = refinement_ctx.selected().next().expect(GREEDY_ERROR);
                 let insertion_ctx = self.inner_search.search(&refinement_ctx, insertion_ctx);
                 let is_quota_reached =
@@ -92,7 +81,7 @@ impl DecomposeSearch {
 
         // get new and old parts and detect if there was any improvement in any part
         let ((new_parts, old_parts), improvements): ((Vec<_>, Vec<_>), Vec<_>) =
-            decomposed.into_iter().map(|decomposed| get_solution_parts(decomposed, original_insertion_ctx)).unzip();
+            decomposed.into_iter().map(|decomposed| get_solution_parts(decomposed)).unzip();
 
         let has_improvements = improvements.iter().any(|is_improvement| *is_improvement);
 
@@ -118,7 +107,29 @@ impl DecomposeSearch {
 }
 
 fn create_population(insertion_ctx: InsertionContext) -> TargetPopulation {
-    Box::new(GreedyPopulation::new(insertion_ctx.problem.goal.clone(), 1, Some(insertion_ctx)))
+    // Keep baseline and (optionally) best/last candidate without reconstructing baseline later.
+    Box::new(DecomposePopulation::new(insertion_ctx.problem.goal.clone(), 1, insertion_ctx))
+}
+
+/// Selects a repeat count from 1 to max_repeat_count using exponential decay.
+/// Uses stack-allocated arrays for common cases to avoid heap allocation.
+fn get_repeat_count(max_repeat_count: usize, random: &dyn Random) -> usize {
+    if max_repeat_count == 1 {
+        return 1;
+    }
+
+    // create weights with exponential decay: [3^(n-1), 3^(n-2), ..., 3^1, 3^0]
+    let index = match max_repeat_count {
+        2 => random.weighted(&[3, 1]),
+        3 => random.weighted(&[9, 3, 1]),
+        4 => random.weighted(&[27, 9, 3, 1]),
+        _ => {
+            let weights: Vec<_> = (1..=max_repeat_count).map(|i| 3_usize.pow((max_repeat_count - i) as u32)).collect();
+            random.weighted(&weights) + 1
+        }
+    };
+
+    index + 1
 }
 
 fn create_multiple_insertion_contexts(
@@ -135,21 +146,24 @@ fn create_multiple_insertion_contexts(
     let max = if insertion_ctx.solution.routes.len() < max as usize { (max / 2).max(min) } else { max };
 
     // identify route groups and create contexts from them
-    let used_indices = RefCell::new(HashSet::new());
+    let mut used_indices: HashSet<usize> = HashSet::new();
     let insertion_ctxs = route_groups
         .into_iter()
         .enumerate()
-        .filter(|(outer_idx, _)| !used_indices.borrow().contains(outer_idx))
-        .map(|(outer_idx, route_group)| {
+        .filter_map(|(outer_idx, route_group)| {
+            if used_indices.contains(&outer_idx) {
+                return None;
+            }
+
             let group_size = environment.random.uniform_int(min, max) as usize;
             let route_group = once(outer_idx)
-                .chain(route_group.into_iter().filter(|inner_idx| !used_indices.borrow().contains(inner_idx)))
+                .chain(route_group.into_iter().filter(|inner_idx| !used_indices.contains(inner_idx)))
                 .take(group_size)
                 .collect::<HashSet<_>>();
 
-            used_indices.borrow_mut().extend(route_group.iter().cloned());
+            used_indices.extend(route_group.iter().copied());
 
-            create_partial_insertion_ctx(insertion_ctx, environment.clone(), route_group)
+            Some(create_partial_insertion_ctx(insertion_ctx, environment.clone(), route_group))
         })
         .chain(create_empty_insertion_ctxs(insertion_ctx, environment.clone()))
         .collect();
@@ -235,11 +249,10 @@ fn decompose_insertion_context(
     insertion_ctx: &InsertionContext,
     max_routes_range: (i32, i32),
     repeat: usize,
-    quota_limit: usize,
 ) -> Option<Vec<(RefinementContext, HashSet<usize>)>> {
     // NOTE make limit a bit higher than median
     let median = refinement_ctx.statistics().speed.get_median();
-    let limit = median.map(|median| (median * repeat).max(quota_limit));
+    let limit = median.map(|median| (((median.max(10) * repeat) as f64) * 1.5) as usize);
     let environment = create_environment_with_custom_quota(limit, refinement_ctx.environment.as_ref());
 
     create_multiple_insertion_contexts(insertion_ctx, environment.clone(), max_routes_range)
@@ -262,36 +275,136 @@ fn decompose_insertion_context(
         .and_then(|contexts| if contexts.len() > 1 { Some(contexts) } else { None })
 }
 
-fn get_solution_parts(
-    decomposed: (RefinementContext, HashSet<usize>),
-    original_insertion_ctx: &InsertionContext,
-) -> ((SolutionContext, SolutionContext), bool) {
-    let (decomposed_ctx, route_indices) = decomposed;
-    let decomposed_insertion_ctx = decomposed_ctx.into_individuals().next().expect(GREEDY_ERROR);
-    let environment = original_insertion_ctx.environment.clone();
+fn get_solution_parts(decomposed: (RefinementContext, HashSet<usize>)) -> ((SolutionContext, SolutionContext), bool) {
+    let (decomposed_ctx, _) = decomposed;
+    let mut individuals = decomposed_ctx.into_individuals();
 
-    let (partial_insertion_ctx, _) = create_partial_insertion_ctx(original_insertion_ctx, environment, route_indices);
-    let goal = partial_insertion_ctx.problem.goal.as_ref();
+    // Baseline is preserved by `DecomposePopulation` and yielded first.
+    let baseline = individuals.next().expect(GREEDY_ERROR);
+    // The second individual is always present:
+    // - if there was an improvement: best improved solution
+    // - otherwise: last non-improving solution (used for diversity)
+    let candidate = individuals.next().expect(GREEDY_ERROR);
 
-    let is_improvement = goal.total_order(&decomposed_insertion_ctx, &partial_insertion_ctx) == Ordering::Less;
+    let goal = baseline.problem.goal.as_ref();
 
-    ((decomposed_insertion_ctx.solution, partial_insertion_ctx.solution), is_improvement)
+    // When there is no improvement, `candidate` is the last non-improving solution for diversity.
+    // When there is an improvement, `candidate` is the best improved solution.
+    let is_improvement = goal.total_order(&candidate, &baseline) == Ordering::Less;
+
+    ((candidate.solution, baseline.solution), is_improvement)
 }
 
 fn merge_parts(source_solution: SolutionContext, accumulated: InsertionContext) -> InsertionContext {
     let mut accumulated = accumulated;
     let dest_solution = &mut accumulated.solution;
 
-    // NOTE theoretically, we can avoid deep copy here, but this would require an extension in Population trait
-    dest_solution.routes.extend(source_solution.routes.iter().map(|route_ctx| route_ctx.deep_copy()));
-    dest_solution.ignored.extend(source_solution.ignored.iter().cloned());
-    dest_solution.required.extend(source_solution.required.iter().cloned());
-    dest_solution.locked.extend(source_solution.locked.iter().cloned());
-    dest_solution.unassigned.extend(source_solution.unassigned.iter().map(|(k, v)| (k.clone(), v.clone())));
-
+    // register routes in registry before moving them
     source_solution.routes.iter().for_each(|route_ctx| {
         assert!(dest_solution.registry.use_route(route_ctx), "attempt to use route more than once");
     });
 
+    dest_solution.routes.extend(source_solution.routes);
+    dest_solution.ignored.extend(source_solution.ignored);
+    dest_solution.required.extend(source_solution.required);
+    dest_solution.locked.extend(source_solution.locked);
+    dest_solution.unassigned.extend(source_solution.unassigned);
+
     accumulated
 }
+
+/// A small population implementation used only by `DecomposeSearch`.
+///
+/// It preserves the original (baseline) individual so we can compare and (optionally) reuse it
+/// later without reconstructing/deep-copying it again from the original full solution.
+///
+/// Additionally, when there is no improvement, it keeps the last non-improving candidate which
+/// can be used to build a more diverse combined solution.
+struct DecomposePopulation {
+    objective: Arc<GoalContext>,
+    selection_size: usize,
+
+    baseline: InsertionContext,
+    best: Option<InsertionContext>,
+    last_non_improving: Option<InsertionContext>,
+}
+
+impl DecomposePopulation {
+    fn new(objective: Arc<GoalContext>, selection_size: usize, baseline: InsertionContext) -> Self {
+        Self { objective, selection_size, baseline, best: None, last_non_improving: None }
+    }
+
+    fn best_ref(&self) -> &InsertionContext {
+        self.best.as_ref().unwrap_or(&self.baseline)
+    }
+}
+
+impl HeuristicPopulation for DecomposePopulation {
+    type Objective = GoalContext;
+    type Individual = InsertionContext;
+
+    fn add_all(&mut self, individuals: Vec<Self::Individual>) -> bool {
+        if individuals.is_empty() {
+            return false;
+        }
+
+        individuals.into_iter().fold(false, |acc, individual| acc || self.add(individual))
+    }
+
+    fn add(&mut self, individual: Self::Individual) -> bool {
+        // Greedy update: replace best only when a strictly better solution is found.
+        if self.objective.total_order(self.best_ref(), &individual) == Ordering::Greater {
+            self.best = Some(individual);
+            // Once we found an improvement, we don't need to keep non-improving candidates.
+            self.last_non_improving = None;
+            true
+        } else {
+            // Keep the last non-improving candidate for diversity (used only when no improvements happen).
+            if self.best.is_none() {
+                self.last_non_improving = Some(individual);
+            }
+            false
+        }
+    }
+
+    fn on_generation(&mut self, _: &HeuristicStatistics) {}
+
+    fn cmp(&self, a: &Self::Individual, b: &Self::Individual) -> Ordering {
+        self.objective.total_order(a, b)
+    }
+
+    fn select(&self) -> Box<dyn Iterator<Item = &'_ Self::Individual> + '_> {
+        Box::new(std::iter::repeat_n(self.best_ref(), self.selection_size))
+    }
+
+    fn ranked(&self) -> Box<dyn Iterator<Item = &'_ Self::Individual> + '_> {
+        // Not used by `DecomposeSearch`, but provide a deterministic iteration order.
+        if let Some(best) = self.best.as_ref() {
+            Box::new(once(best).chain(once(&self.baseline)))
+        } else {
+            Box::new(once(&self.baseline))
+        }
+    }
+
+    fn iter(&self) -> Box<dyn Iterator<Item = &'_ Self::Individual> + '_> {
+        self.ranked()
+    }
+
+    fn into_iter(self: Box<Self>) -> Box<dyn Iterator<Item = Self::Individual>>
+    where
+        Self::Individual: 'static,
+    {
+        // Contract used by `get_solution_parts`:
+        // - Always yield baseline first.
+        // - Then yield either best (if any) or last non-improving (if any).
+        Box::new(once(self.baseline).chain(self.best.or(self.last_non_improving).into_iter()))
+    }
+
+    fn size(&self) -> usize {
+        1 + usize::from(self.best.is_some() || self.last_non_improving.is_some())
+    }
+
+    fn selection_phase(&self) -> SelectionPhase {
+        SelectionPhase::Exploitation
+    }
+}
diff --git a/vrp-core/src/solver/search/local/exchange_swap_star.rs b/vrp-core/src/solver/search/local/exchange_swap_star.rs
index 683f7e681..aeca49d46 100644
--- a/vrp-core/src/solver/search/local/exchange_swap_star.rs
+++ b/vrp-core/src/solver/search/local/exchange_swap_star.rs
@@ -24,17 +24,12 @@ use std::iter::once;
 pub struct ExchangeSwapStar {
     leg_selection: LegSelection,
     result_selector: Box<dyn ResultSelector>,
-    quota_limit: usize,
 }
 
 impl ExchangeSwapStar {
     /// Creates a new instance of `ExchangeSwapStar`.
-    pub fn new(random: Arc<dyn Random>, quota_limit: usize) -> Self {
-        Self {
-            leg_selection: LegSelection::Stochastic(random),
-            result_selector: Box::<BestResultSelector>::default(),
-            quota_limit,
-        }
+    pub fn new(random: Arc<dyn Random>) -> Self {
+        Self { leg_selection: LegSelection::Stochastic(random), result_selector: Box::<BestResultSelector>::default() }
     }
 }
 
@@ -50,7 +45,8 @@ impl LocalOperator for ExchangeSwapStar {
         let route_pairs = create_route_pairs(insertion_ctx, ROUTE_PAIRS_THRESHOLD);
 
         // modify environment to include median as an extra quota to prevent long runs
-        let limit = refinement_ctx.statistics().speed.get_median().map(|median| median.max(self.quota_limit));
+        let limit =
+            refinement_ctx.statistics().speed.get_median().map(|median| ((median.max(10) as f64) * 1.5) as usize);
         let mut insertion_ctx = InsertionContext {
             environment: create_environment_with_custom_quota(limit, insertion_ctx.environment.as_ref()),
             ..insertion_ctx.deep_copy()
diff --git a/vrp-core/tests/unit/solver/search/decompose_search_test.rs b/vrp-core/tests/unit/solver/search/decompose_search_test.rs
index c1c351085..b9826f0aa 100644
--- a/vrp-core/tests/unit/solver/search/decompose_search_test.rs
+++ b/vrp-core/tests/unit/solver/search/decompose_search_test.rs
@@ -54,7 +54,7 @@ fn can_perform_search() {
     let refinement_ctx = RefinementContext::new(problem.clone(), population, TelemetryMode::None, environment.clone());
     let insertion_ctx = InsertionContext::new_from_solution(problem.clone(), (solution, None), environment.clone());
     let inner_search = create_default_heuristic_operator(problem, environment);
-    let decompose_search = DecomposeSearch::new(inner_search, (2, 2), 10, 1000);
+    let decompose_search = DecomposeSearch::new(inner_search, (2, 2), 10);
 
     let result = decompose_search.search(&refinement_ctx, &insertion_ctx);
 
diff --git a/vrp-core/tests/unit/solver/search/local/exchange_swap_star_test.rs b/vrp-core/tests/unit/solver/search/local/exchange_swap_star_test.rs
index 76c65a8b5..b950b805a 100644
--- a/vrp-core/tests/unit/solver/search/local/exchange_swap_star_test.rs
+++ b/vrp-core/tests/unit/solver/search/local/exchange_swap_star_test.rs
@@ -81,7 +81,7 @@ fn can_use_exchange_swap_star_impl(jobs_order: Vec<Vec<&str>>, expected: Vec<Vec
         .collect::<Vec<_>>();
     assert_eq!(vehicles, vec!["0", "1", "2"]);
 
-    let insertion_ctx = ExchangeSwapStar::new(environment.random.clone(), 10000)
+    let insertion_ctx = ExchangeSwapStar::new(environment.random.clone())
         .explore(&create_default_refinement_ctx(insertion_ctx.problem.clone()), &insertion_ctx)
         .expect("cannot find new solution");
 
@@ -101,7 +101,7 @@ fn can_keep_locked_jobs_in_place() {
     );
     rearrange_jobs_in_routes(&mut insertion_ctx, jobs_order.as_slice());
 
-    let insertion_ctx = ExchangeSwapStar::new(environment.random.clone(), 1000)
+    let insertion_ctx = ExchangeSwapStar::new(environment.random.clone())
         .explore(&create_default_refinement_ctx(insertion_ctx.problem.clone()), &insertion_ctx)
         .expect("cannot find new solution");
 

From 86c9cac4725643dc6a0da09f213350ee9d50c599 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Wed, 14 Jan 2026 23:45:44 +0100
Subject: [PATCH 27/30] Improve dynamic heuristic

---
 rosomaxa/src/algorithms/rl/slot_machine.rs    |  24 +-
 rosomaxa/src/hyper/dynamic_selective.rs       | 405 +++++++-----------
 .../unit/hyper/dynamic_selective_test.rs      |  74 +---
 3 files changed, 168 insertions(+), 335 deletions(-)

diff --git a/rosomaxa/src/algorithms/rl/slot_machine.rs b/rosomaxa/src/algorithms/rl/slot_machine.rs
index 32f20aa96..55d9402bb 100644
--- a/rosomaxa/src/algorithms/rl/slot_machine.rs
+++ b/rosomaxa/src/algorithms/rl/slot_machine.rs
@@ -51,9 +51,6 @@ where
     S: DistributionSampler + Clone,
 {
     /// Creates a new instance with Universal Priors.
-    ///
-    /// Since the `SearchAgent` externally normalizes rewards to standard scores (Z-scores),
-    /// we can use universal constants for the priors rather than problem-specific guesses.
     pub fn new(prior_mean: Float, action: A, sampler: S) -> Self {
         // Universal priors for a Standard Normal distribution N(0, 1):
         // Alpha = 2.0 implies a weak prior belief with mathematically defined variance.
@@ -61,8 +58,8 @@ where
         let alpha = 2.0;
         let beta = 1.0;
 
-        // Prior mean is clamped to a reasonable Z-score range (-0 to 10)
-        let mu = prior_mean.clamp(0.01, 10.0);
+        // Prior mean is clamped to a reasonable reward range [0.1, 2.0].
+        let mu = prior_mean.clamp(0.1, 2.0);
 
         // Variance expectation v = Beta / (Alpha - 1) = 1.0.
         // This implies we are "uncertain" by about +/- 1.0 standard deviation unit,
@@ -102,16 +99,15 @@ where
     pub fn update(&mut self, feedback: &A::Feedback) {
         let reward = feedback.reward();
 
-        // --- 1. Memory Decay (Non-Stationarity) ---
+        // 1. Memory Decay (Non-Stationarity)
 
-        // A decay factor of 0.999 implies a "memory horizon" of ~1000 samples.
-        // This is crucial for VRP where improvements are rare (sparse signals).
-        // It provides enough patience to wait for "lottery ticket" wins while still
-        // allowing the agent to abandon operators that stop working in later phases.
-        const DECAY_FACTOR: Float = 0.999;
+        // A decay factor of 0.995 implies a "memory horizon" of ~200 samples.
+        // This faster forgetting prevents operator monopolies and allows the agent
+        // to adapt more quickly when search dynamics change between phases.
+        const DECAY_FACTOR: Float = 0.995;
 
         // Decay the sufficient statistics.
-        // CRITICAL: We clamp alpha to >= 2.0. The variance of the Inverse-Gamma
+        // We clamp alpha to >= 2.0. The variance of the Inverse-Gamma
         // distribution is defined as Beta / (Alpha - 1). If Alpha <= 1, variance is undefined.
         // Keeping Alpha >= 2.0 ensures numerical stability and prevents division by zero.
         self.alpha = (self.alpha * DECAY_FACTOR).max(2.0);
@@ -121,7 +117,7 @@ where
         // We do not decay this value so we can track total lifetime usage.
         self.n += 1;
 
-        // --- 2. Bayesian Update (Normal-Gamma) ---
+        // 2. Bayesian Update (Normal-Gamma)
 
         // Standard update adds 0.5 to Alpha for each new observation n=1.
         self.alpha += 0.5;
@@ -144,7 +140,7 @@ where
         // contribution to the variance relative to prior knowledge.
         self.beta += 0.5 * (reward - old_mu).powi(2) * effective_n / (effective_n + 1.0);
 
-        // --- 3. Variance Estimation ---
+        // 3. Variance Estimation
 
         // Calculate expected variance E[σ²] = Beta / (Alpha - 1).
         // Since we enforced Alpha >= 2.0 (decayed) + 0.5 (update) = 2.5,
diff --git a/rosomaxa/src/hyper/dynamic_selective.rs b/rosomaxa/src/hyper/dynamic_selective.rs
index 0ebe11296..a408d85af 100644
--- a/rosomaxa/src/hyper/dynamic_selective.rs
+++ b/rosomaxa/src/hyper/dynamic_selective.rs
@@ -6,7 +6,7 @@ use super::*;
 use crate::Timer;
 use crate::algorithms::math::RemedianUsize;
 use crate::algorithms::rl::{SlotAction, SlotFeedback, SlotMachine};
-use crate::utils::{DefaultDistributionSampler, select_softmax_index};
+use crate::utils::{DefaultDistributionSampler, random_argmax};
 use std::cmp::Ordering;
 use std::collections::HashMap;
 use std::fmt::Formatter;
@@ -105,72 +105,28 @@ where
 /// Type alias for slot machines used in Thompson sampling.
 pub type SlotMachines<'a, C, O, S> = Vec<(SlotMachine<SearchAction<'a, C, O, S>, DefaultDistributionSampler>, String)>;
 
-/// Centralized "Physics" for the Reward System.
-///
-/// This struct defines the constants that control how rewards are calculated,
-/// scaled, and clamped. Changing values here automatically propagates to
-/// reward estimation and signal normalization logic.
-struct SearchRewards;
-
-impl SearchRewards {
-    /// The base value for a standard improvement (Local Search success).
-    /// Used as the atomic unit for signal normalization.
-    pub const BASE_REWARD: Float = 2.0;
-
-    /// The offset added to relative distance to ensure positive rewards.
-    /// Logic: `(distance + OFFSET)`.
-    pub const DISTANCE_OFFSET: Float = 1.0;
-
-    /// Multiplier applied when a solution improves the Global Best Known.
-    /// This is the "Jackpot" factor.
-    pub const GLOBAL_BEST_MULTIPLIER: Float = 2.0;
-
-    /// Multiplier applied when a solution is diverse but not a global improvement.
-    /// Keeps "Diverse" operators alive but with low signal.
-    pub const DIVERSE_MULTIPLIER: Float = 0.01;
-
-    /// The maximum percentage (+/-) that execution duration can affect the reward.
-    /// e.g., 0.2 means performance can scale reward by [0.8, 1.2].
-    pub const PERF_TOLERANCE: Float = 0.2;
-
-    /// Softmax/Boltzmann policy temperature range (over Thompson samples).
-    /// Higher temperature => more exploration.
-    pub const SOFTMAX_TEMPERATURE_MIN: Float = 0.3;
-    pub const SOFTMAX_TEMPERATURE_MAX: Float = 2.0;
-
-    /// The "Cheap Failure"
-    /// The penalty applied when an operator produces no improvement.
-    /// A small negative value ensures that "doing nothing" is worse than "finding diverse solutions".
-    /// This forces the Slot Machine to eventually lower the confidence of stagnating operators.
-    pub const PENALTY_MIN: Float = -0.5;
-
-    /// The "Expensive Failure"
-    /// The penalty applied when an operator produces a negative outcome.
-    /// A larger negative value ensures that failures are strongly discouraged.
-    pub const PENALTY_MAX: Float = -2.0;
-
-    /// Calculates the ratio between the "Jackpot" and the "Normal" signal.
-    /// Used by the Agent to determine how many times larger than the average
-    /// a signal must be to be considered an "Outlier" that needs clamping.
-    pub fn signal_clamp_ratio(max_dist: Float) -> Float {
-        // Recalculate based on new constants
-        let base_term = max_dist + Self::DISTANCE_OFFSET;
-        let global_term = base_term * Self::GLOBAL_BEST_MULTIPLIER;
-        let max_theoretical = (base_term + global_term) * (1.0 + Self::PERF_TOLERANCE);
-
-        let typical_reward = (1.0 + Self::DISTANCE_OFFSET) * Self::BASE_REWARD;
-
-        max_theoretical / typical_reward
-    }
+/// Base reward for finding a new global best solution.
+/// This is the "jackpot" that operators compete for.
+const JACKPOT_BASE: Float = 2.0;
 
-    /// Derives a temperature based on the search progress.
-    /// When the search plateaus, we increase exploration.
-    pub fn softmax_temperature(improvement_1000_ratio: Float) -> Float {
-        // In other places we treat 10% improvement as "fast flow".
-        let flow = (improvement_1000_ratio * 10.0).clamp(0.0, 1.0);
-        Self::SOFTMAX_TEMPERATURE_MIN + (1.0 - flow) * (Self::SOFTMAX_TEMPERATURE_MAX - Self::SOFTMAX_TEMPERATURE_MIN)
-    }
-}
+/// Maximum reward for diverse improvements (soft ceiling via tanh saturation).
+/// Must be well below JACKPOT_BASE to ensure exploitation of best-finding operators.
+/// At 1.0, jackpots are guaranteed to be at least 2x more valuable than any diverse improvement.
+const DIVERSE_CAP: Float = 1.0;
+
+/// Minimum reward for any improvement (prevents near-zero rewards).
+/// Set high enough to be a meaningful positive signal.
+const MIN_REWARD: Float = 0.1;
+
+/// Penalty scale for failures (multiplied by time ratio).
+/// A failure that takes median time costs -0.1; twice median costs -0.2.
+const PENALTY_SCALE: Float = 0.1;
+
+/// Floor for rewards (maximum penalty).
+const REWARD_MIN: Float = -1.0;
+
+/// Ceiling for rewards (maximum jackpot + efficiency bonus).
+const REWARD_MAX: Float = 10.0;
 
 /// Search state for Thompson sampling.
 #[derive(PartialEq, Eq, Hash, Clone, Debug)]
@@ -230,29 +186,9 @@ where
 
         let duration = duration.as_millis() as usize;
 
-        // 1. Analyze result.
-        let distance_score = estimate_distance_reward(context.heuristic_ctx, context.solution, &new_solution);
-
-        // 2. Calculate final reward / penalty.
-        let reward = match distance_score {
-            // Success: Apply performance multiplier (bonus/penalty within +/- 20%).
-            Some(base_score) => {
-                let mult = estimate_reward_perf_multiplier(&context, duration);
-                base_score * mult
-            }
-            // Failure: Apply time-weighted penalty.
-            None => {
-                // Get median (defensive default to duration itself to avoid div/0).
-                let median = context.approx_median.unwrap_or(duration).max(1) as Float;
-                let ratio = duration as Float / median;
-
-                // Base penalty unit: PENALTY_MIN (-0.01) per "Median Unit of Time".
-                let raw_penalty = SearchRewards::PENALTY_MIN * ratio;
-
-                // Clamp to the range [PENALTY_MAX, PENALTY_MIN] = [-0.1, -0.01].
-                raw_penalty.clamp(SearchRewards::PENALTY_MAX, SearchRewards::PENALTY_MIN)
-            }
-        };
+        // Compute reward using the simplified V2.1 formula.
+        let reward =
+            compute_reward(context.heuristic_ctx, context.solution, &new_solution, duration, context.approx_median);
 
         let is_new_best = compare_to_best(context.heuristic_ctx, &new_solution) == Ordering::Less;
         let to = if is_new_best { SearchState::BestKnown } else { SearchState::Diverse };
@@ -279,11 +215,11 @@ where
 }
 
 struct SearchAgent<'a, C, O, S> {
-    // Separate learning contexts for different search phases
+    /// Separate learning contexts for different search phases (BestKnown vs Diverse).
     slot_machines: HashMap<SearchState, SlotMachines<'a, C, O, S>>,
-    // Shared scale invariance (universal physics)
-    signal_stats: SignalStats,
+    /// Tracks operator durations for median calculation.
     tracker: HeuristicTracker,
+    /// Random number generator for Thompson sampling selection.
     random: Arc<dyn Random>,
 }
 
@@ -294,24 +230,28 @@ where
     S: HeuristicSolution + 'a,
 {
     pub fn new(search_operators: HeuristicSearchOperators<C, O, S>, environment: &Environment) -> Self {
-        // Calculate the Mean Weight
-        // We want the "Average Operator" to have a prior mu = 1.0.
-        // This aligns with SignalStats which normalizes the average reward to 1.0.
+        // Normalize weights so the average operator has prior_mean ≈ 1.0.
+        // This aligns with typical success rewards (~1-3 range).
         let total_weight: Float = search_operators.iter().map(|(_, _, w)| *w).sum();
         let count = search_operators.len() as Float;
+        let avg_weight = if count > 0.0 && total_weight > f64::EPSILON { total_weight / count } else { 1.0 };
 
-        let avg_weight = if count > 0.0 { total_weight / count } else { 1.0 };
-
-        // Avoid division by zero if weights are weird
-        let target_mean = SearchRewards::BASE_REWARD;
-        let scale = if avg_weight > f64::EPSILON { target_mean / avg_weight } else { target_mean };
-
-        // Factory function to create identical slot configurations for each state
+        // Factory function to create slot configurations for each state.
+        // Uses domain knowledge (initial weights) as priors - important because:
+        // 1. We have many operators (cold start problem)
+        // 2. Limited search time may not be enough to learn from scratch
+        // 3. Weights encode expert knowledge about operator effectiveness
         let create_slots = || {
             search_operators
                 .iter()
                 .map(|(operator, name, initial_weight)| {
-                    let prior_mean = initial_weight * scale;
+                    // Smooth mapping of weight ratio to prior mean range [0.1, 3.0].
+                    // Uses tanh for smooth compression without hard cutoffs.
+                    // ratio=1 (average) → prior=1.0, higher ratios → up to 3.0, lower → down to 0.1
+                    let ratio = initial_weight / avg_weight;
+                    let t = (ratio - 1.0).tanh(); // smooth compression to [-1, 1]
+                    // Asymmetric scaling: [−1,0] → [0.1,1.0], [0,1] → [1.0,3.0]
+                    let prior_mean = if t >= 0.0 { 1.0 + t * 2.0 } else { 1.0 + t * 0.9 };
                     (
                         SlotMachine::new(
                             prior_mean,
@@ -324,20 +264,19 @@ where
                 .collect::<Vec<_>>()
         };
 
-        // Initialize separate states with identical priors but independent learning
+        // Initialize separate states with identical priors but independent learning.
         let slot_machines = once((SearchState::BestKnown, create_slots()))
             .chain(once((SearchState::Diverse, create_slots())))
             .collect();
 
         Self {
             slot_machines,
-            signal_stats: SignalStats::new(),
             tracker: HeuristicTracker::new(environment.is_experimental),
             random: environment.random.clone(),
         }
     }
 
-    /// Picks the relevant search operator based on learnings and runs the search.
+    /// Picks the relevant search operator using pure Thompson Sampling and runs the search.
     pub fn search(&self, heuristic_ctx: &C, solution: &S) -> SearchFeedback<S> {
         // Determine search context - critical for operator selection.
         let from = if matches!(compare_to_best(heuristic_ctx, solution), Ordering::Equal) {
@@ -348,9 +287,10 @@ where
 
         // Get contextually appropriate slot machines.
         let slots = self.slot_machines.get(&from).expect("cannot get slot machines");
-        let temperature = SearchRewards::softmax_temperature(heuristic_ctx.statistics().improvement_1000_ratio);
+
+        // Sample each arm, pick argmax with random tie-break.
         let samples = slots.iter().map(|(slot, _)| slot.sample()).collect::<Vec<_>>();
-        let slot_idx = select_softmax_index(&samples, temperature, self.random.as_ref());
+        let slot_idx = random_argmax(samples.into_iter(), self.random.as_ref()).unwrap_or(0);
         let slot_machine = &slots[slot_idx].0;
 
         let approx_median = self.tracker.approx_median();
@@ -359,35 +299,13 @@ where
         slot_machine.play(SearchContext { heuristic_ctx, from, slot_idx, solution, approx_median })
     }
 
-    /// Updates estimations based on search feedback with protected signal baseline.
+    /// Updates the slot machine with the raw reward (no normalization needed).
     pub fn update(&mut self, generation: usize, feedback: &SearchFeedback<S>) {
-        let max_dist = feedback.solution.as_ref().map_or(1., |s| s.fitness().count() as Float);
-        let raw_reward = feedback.sample.reward;
-        let current_scale = self.signal_stats.scale();
-
-        // 1. Update ruler.
-        // We only update the "Scale of Success" based on positive outcomes.
-        // We do NOT shrink the ruler just because we failed.
-        if raw_reward > f64::EPSILON {
-            let clamp_limit = current_scale * SearchRewards::signal_clamp_ratio(max_dist);
-            self.signal_stats.update(raw_reward.min(clamp_limit));
-        }
-
-        // 2. Normalize.
-        let normalized_reward =
-            if raw_reward.abs() > f64::EPSILON { raw_reward / self.signal_stats.scale() } else { 0.0 };
-        let normalized_feedback = SearchFeedback {
-            sample: SearchSample { reward: normalized_reward, ..feedback.sample.clone() },
-            slot_idx: feedback.slot_idx,
-            solution: None,
-        };
-
-        // 3. Update contextual slot machine.
         let from = &feedback.sample.transition.0;
         let slots = self.slot_machines.get_mut(from).expect("cannot get slot machines");
-        slots[feedback.slot_idx].0.update(&normalized_feedback);
+        slots[feedback.slot_idx].0.update(feedback);
 
-        // 4. Observe telemetry.
+        // Track telemetry.
         self.tracker.observe_sample(generation, feedback.sample.clone());
     }
 
@@ -409,105 +327,106 @@ where
     }
 }
 
-fn compare_to_best<C, O, S>(heuristic_ctx: &C, solution: &S) -> Ordering
+/// Computes the reward for an operator based on solution improvement.
+///
+/// Key design principles:
+/// 1. **Best-Known Anchoring**: Improvements far from best get diminished credit.
+/// 2. **Stagnation-Aware Efficiency**: Slow operators tolerated during stagnation.
+/// 3. **Bounded Output**: Rewards in [-1, 5] for stable Bayesian updates.
+fn compute_reward<C, O, S>(
+    heuristic_ctx: &C,
+    initial_solution: &S,
+    new_solution: &S,
+    duration: usize,
+    approx_median: Option<usize>,
+) -> Float
 where
     C: HeuristicContext<Objective = O, Solution = S>,
     O: HeuristicObjective<Solution = S>,
     S: HeuristicSolution,
 {
-    heuristic_ctx
-        .ranked()
-        .next()
-        .map(|best_known| heuristic_ctx.objective().total_order(solution, best_known))
-        .unwrap_or(Ordering::Less)
-}
+    let objective = heuristic_ctx.objective();
 
-/// Estimates new solution discovery reward based on distance metric.
-/// Returns `Some(reward)` for improvement, or `None` for stagnation.
-fn estimate_distance_reward<C, O, S>(heuristic_ctx: &C, initial_solution: &S, new_solution: &S) -> Option<Float>
-where
-    C: HeuristicContext<Objective = O, Solution = S>,
-    O: HeuristicObjective<Solution = S>,
-    S: HeuristicSolution,
-{
-    heuristic_ctx
-        .ranked()
-        .next()
-        .map(|best_known| {
-            let objective = heuristic_ctx.objective();
-
-            // Calculate normalized relative distances [0, 1].
-            let distance_initial = get_relative_distance(objective, new_solution, initial_solution);
-            let distance_best = get_relative_distance(objective, new_solution, best_known);
-
-            let reward_initial = (distance_initial + SearchRewards::DISTANCE_OFFSET) * SearchRewards::BASE_REWARD;
-
-            match (distance_initial.total_cmp(&0.), distance_best.total_cmp(&0.)) {
-                // Global Jackpot
-                (Ordering::Greater, Ordering::Greater) => {
-                    let reward_best = (distance_best + SearchRewards::DISTANCE_OFFSET)
-                        * SearchRewards::BASE_REWARD
-                        * SearchRewards::GLOBAL_BEST_MULTIPLIER;
-                    Some(reward_initial + reward_best)
-                }
+    // Get best known solution for anchoring.
+    let best_known = match heuristic_ctx.ranked().next() {
+        Some(best) => best,
+        None => return 0.0, // No population yet, neutral reward.
+    };
 
-                // Local/Diverse Improvement
-                (Ordering::Greater, _) => Some(reward_initial * SearchRewards::DIVERSE_MULTIPLIER),
+    // Determine improvement types.
+    let is_new_best = objective.total_order(new_solution, best_known) == Ordering::Less;
+    let is_improvement = objective.total_order(new_solution, initial_solution) == Ordering::Less;
+
+    // COMPUTE REWARD BASED ON IMPROVEMENT TYPE
+    let raw_reward = if is_new_best {
+        // JACKPOT: Found new global best!
+        // Apply magnitude scaling so small improvements become meaningful signals.
+        // Raw distance is often tiny (0.001 for 0.1% improvement).
+        // ln_1p(x * 1000) transforms: 0.001 -> ~0.69, 0.01 -> ~2.4, 0.1 -> ~4.6
+        let improvement_distance = get_relative_distance(new_solution, initial_solution);
+        let magnitude = (improvement_distance * 1000.0).ln_1p();
+        JACKPOT_BASE + magnitude
+    } else if is_improvement {
+        // DIVERSE IMPROVEMENT: Better than starting point, but not global best.
+        // Use tanh for soft saturation - large improvements asymptote to DIVERSE_CAP,
+        // ensuring diverse rewards stay well below JACKPOT_BASE.
+        let improvement_distance = get_relative_distance(new_solution, initial_solution);
+        let magnitude = (improvement_distance * 1000.0).ln_1p();
+        let saturated = magnitude.tanh();
+
+        // Proximity to best: closer solutions get higher reward.
+        let gap_to_best = get_relative_distance(new_solution, best_known);
+        let proximity_factor = (1.0 - gap_to_best).powi(2);
+
+        // Base utility: soft-capped and bounded [MIN_REWARD, DIVERSE_CAP].
+        let base_utility = (DIVERSE_CAP * saturated * proximity_factor).clamp(MIN_REWARD, DIVERSE_CAP);
+
+        // Apply efficiency modulation: fast improvements get bonus, slow ones get penalty.
+        let median = approx_median.unwrap_or(duration.max(1)).max(1) as Float;
+        let improvement_ratio = heuristic_ctx.statistics().improvement_1000_ratio;
+
+        // "Flow" measures search progress: 0 = stagnating, 1 = fast progress.
+        let flow = (improvement_ratio * 10.0).clamp(0.0, 1.0);
+
+        // Efficiency clamp range adapts to search phase (reduced impact):
+        // - Stagnation (flow=0): [0.9, 1.1] - 10% penalty to 10% bonus
+        // - Fast progress (flow=1): [0.8, 1.2] - 20% penalty to 20% bonus
+        let min_eff = 0.9 - flow * 0.1;
+        let max_eff = 1.1 + flow * 0.1;
+
+        let efficiency = (median / duration as Float).clamp(min_eff, max_eff);
+
+        base_utility * efficiency
+    } else {
+        // FAILURE: time-proportional penalty.
+        let median = approx_median.unwrap_or(duration.max(1)).max(1) as Float;
+        let time_ratio = duration as Float / median;
+        -PENALTY_SCALE * time_ratio
+    };
 
-                // Stagnation
-                _ => None,
-            }
-        })
-        .unwrap_or(None)
+    // Clamp to bounded range for stable Bayesian updates.
+    raw_reward.clamp(REWARD_MIN, REWARD_MAX)
 }
 
-/// Estimates performance of the used operation based on its duration and the current search phase.
-/// Returns a reward multiplier in the range `[0.8, 1.2]`.
-fn estimate_reward_perf_multiplier<C, O, S>(search_ctx: &SearchContext<C, O, S>, duration: usize) -> Float
+fn compare_to_best<C, O, S>(heuristic_ctx: &C, solution: &S) -> Ordering
 where
     C: HeuristicContext<Objective = O, Solution = S>,
     O: HeuristicObjective<Solution = S>,
     S: HeuristicSolution,
 {
-    let improvement_ratio = search_ctx.heuristic_ctx.statistics().improvement_1000_ratio;
-    let approx_median = &search_ctx.approx_median;
-    let median = match approx_median {
-        Some(m) if *m > 0 => *m as Float,
-        _ => return 1.0,
-    };
-
-    let time_ratio = duration as Float / median;
-
-    // Calculate the raw time modifier (logarithmic).
-    let raw_modifier = (1.0 / time_ratio).ln() * 0.15;
-
-    // Apply phase-dependent damping.
-    // Smooth transition from 0.0 to 1.0 based on improvement ratio.
-    // We saturate at 0.1 (10% improvement is considered "Fast Flow").
-    let phase_damping = (improvement_ratio * 10.0).clamp(0.0, 1.0);
-    let final_modifier = raw_modifier * phase_damping;
-
-    // Final safety clamp defined by reward physics.
-    let tolerance = SearchRewards::PERF_TOLERANCE;
-    (1.0 + final_modifier).clamp(1.0 - tolerance, 1.0 + tolerance)
+    heuristic_ctx
+        .ranked()
+        .next()
+        .map(|best_known| heuristic_ctx.objective().total_order(solution, best_known))
+        .unwrap_or(Ordering::Less)
 }
 
-/// Returns the normalized distance in `[0.0, 1.0]` (absolute magnitude)
-/// where 1.0 = Improvement on Primary Objective.
-/// Returns a negative value if worse.
-fn get_relative_distance<O, S>(objective: &O, a: &S, b: &S) -> Float
+/// Returns the normalized distance in `[0.0, 1.0]`.
+fn get_relative_distance<S>(a: &S, b: &S) -> Float
 where
-    O: HeuristicObjective<Solution = S>,
     S: HeuristicSolution,
 {
-    let order = objective.total_order(a, b);
-
-    let sign = match order {
-        Ordering::Less => 1.,
-        Ordering::Greater => -1.,
-        Ordering::Equal => return 0.,
-    };
-
+    // Find the first differing fitness component.
     let idx = a
         .fitness()
         .zip(b.fitness())
@@ -515,69 +434,31 @@ where
         .find(|(_, (fitness_a, fitness_b))| fitness_a != fitness_b)
         .map(|(idx, _)| idx);
 
-    let idx = if let Some(idx) = idx {
-        idx
-    } else {
-        return 0.;
+    let idx = match idx {
+        Some(idx) => idx,
+        None => return 0., // All fitness values equal.
     };
 
     let total_objectives = a.fitness().count();
-    assert_ne!(total_objectives, 0, "cannot have an empty objective here");
-    assert_ne!(total_objectives, idx, "cannot have the index equal to total amount of objectives");
+    if total_objectives == 0 || total_objectives == idx {
+        return 0.;
+    }
 
-    // Normalization: Divide by total_objectives to map to [0, 1].
+    // Priority amplifier: earlier objectives matter more.
     let priority_amplifier = (total_objectives - idx) as Float / total_objectives as Float;
 
+    // Relative difference in the differing component.
     let value = a
         .fitness()
         .nth(idx)
         .zip(b.fitness().nth(idx))
-        .map(|(a, b)| (a - b).abs() / a.abs().max(b.abs()))
-        .expect("cannot get fitness by idx");
-
-    value * sign * priority_amplifier
-}
+        .map(|(a, b)| (a - b).abs() / a.abs().max(b.abs()).max(f64::EPSILON))
+        .unwrap_or(0.0);
 
-/// Signal tracker that observes ONLY positive values to establish a baseline for "Success".
-/// Uses exponential moving average for stability in sparse signals.
-#[derive(Clone)]
-struct SignalStats {
-    mean: Float,
-    n: Float,
-}
-
-impl SignalStats {
-    fn new() -> Self {
-        Self { mean: 0.0, n: 0.0 }
-    }
-
-    /// Observes ONLY positive values to establish a baseline for "Success".
-    fn update(&mut self, value: Float) {
-        if value <= f64::EPSILON {
-            return;
-        }
-
-        // Horizon: Adapt to the scale of the last ~200 successful operations.
-        // This is structural (adaptation speed), not problem-specific.
-        let window_size = 200.0;
-        let decay = 1.0 - (1.0 / window_size);
-
-        self.n = self.n * decay + 1.0;
-
-        // Exponential moving average of the magnitude.
-        // We use this instead of Welford's variance for stability in sparse signals.
-        let learning_rate = 1.0 / self.n;
-        self.mean = self.mean * (1.0 - learning_rate) + value * learning_rate;
-    }
-
-    /// Returns the scale factor.
-    /// If we haven't seen enough data, return 1.0 to avoid division by zero.
-    fn scale(&self) -> Float {
-        if self.mean < f64::EPSILON { 1.0 } else { self.mean }
-    }
+    value * priority_amplifier
 }
 
-/// Enhanced diagnostic tracker for Thompson sampling analysis.
+/// Diagnostic tracker for Thompson sampling analysis.
 struct HeuristicTracker {
     total_median: RemedianUsize,
     search_telemetry: Vec<(usize, SearchSample)>,
diff --git a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs
index 4d0449243..43298f42d 100644
--- a/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs
+++ b/rosomaxa/tests/unit/hyper/dynamic_selective_test.rs
@@ -56,58 +56,24 @@ fn can_estimate_median() {
     assert!(median > 0);
 }
 
-parameterized_test! {can_estimate_reward_multiplier, (improvement_ratio, approx_median, duration, expected), {
-    can_estimate_reward_multiplier_impl(improvement_ratio, approx_median, duration, expected);
+parameterized_test! {can_compute_relative_distance, (fitness_a, fitness_b, expected), {
+    can_compute_relative_distance_impl(fitness_a, fitness_b, expected);
 }}
 
-can_estimate_reward_multiplier! {
-    case_01_fast_with_improvement: (0.1, Some(10), 5, 1.104),    // Fast operator during improvement: ln(2.0)*0.15*1.0 ≈ 0.104
-    case_02_slow_with_improvement: (0.1, Some(10), 20, 0.896),   // Slow operator during improvement: ln(0.5)*0.15*1.0 ≈ -0.104
-    case_03_fast_partial_improvement: (0.05, Some(10), 5, 1.052), // Fast with 50% damping: ln(2.0)*0.15*0.5 ≈ 0.052
-    case_04_no_improvement: (0.0, Some(10), 5, 1.0),             // No improvement = no bonus
-    case_05_no_median: (0.1, None, 5, 1.0),                      // No median = baseline
-    case_06_clamped_fast: (0.1, Some(10), 1, 1.2),               // Very fast but clamped to PERF_TOLERANCE (0.2)
-    case_07_clamped_slow: (0.1, Some(10), 100, 0.8),             // Very slow but clamped to -PERF_TOLERANCE (-0.2)
+can_compute_relative_distance! {
+    case_01_improvement: (vec![90.0], vec![100.0], 0.1),           // 10% distance: |100-90|/100 = 0.1
+    case_02_regression: (vec![110.0], vec![100.0], 0.09),          // 9% distance: |110-100|/110 ≈ 0.09
+    case_03_equal: (vec![100.0], vec![100.0], 0.0),                // Equal = no distance
+    case_04_primary_priority: (vec![90.0, 100.0], vec![100.0, 90.0], 0.1), // Primary objective distance
 }
 
-fn can_estimate_reward_multiplier_impl(
-    improvement_ratio: Float,
-    approx_median: Option<usize>,
-    duration: usize,
-    expected: Float,
-) {
-    // Create a mock context with the specified improvement ratio
-    let mut heuristic_ctx = create_default_heuristic_context();
-
-    // Simulate improvement ratio by triggering generations
-    if improvement_ratio > 0.0 {
-        let num_improvements = (improvement_ratio * 1000.0) as usize;
-
-        // Add improving solutions
-        for i in 0..num_improvements {
-            let solution = VectorSolution::new(vec![], -(i as Float), vec![]);
-            heuristic_ctx.on_generation(vec![solution], 0.0, Timer::start());
-        }
-
-        // Add non-improving solutions
-        for _ in num_improvements..1000 {
-            let solution = VectorSolution::new(vec![], 100.0, vec![]);
-            heuristic_ctx.on_generation(vec![solution], 0.0, Timer::start());
-        }
-    }
-
-    let solution = VectorSolution::new(vec![], 0., vec![]);
-    let search_ctx = SearchContext {
-        heuristic_ctx: &heuristic_ctx,
-        from: SearchState::BestKnown,
-        slot_idx: 0,
-        solution: &solution,
-        approx_median,
-    };
+fn can_compute_relative_distance_impl(fitness_a: Vec<Float>, fitness_b: Vec<Float>, expected: Float) {
+    let solution_a = VectorSolution::new(vec![], fitness_a.first().copied().unwrap_or(0.0), fitness_a);
+    let solution_b = VectorSolution::new(vec![], fitness_b.first().copied().unwrap_or(0.0), fitness_b);
 
-    let result = estimate_reward_perf_multiplier(&search_ctx, duration);
+    let result = get_relative_distance(&solution_a, &solution_b);
 
-    assert!((result - expected).abs() < 0.001, "Expected {expected}, got {result}");
+    assert!((result - expected).abs() < 0.02, "Expected ~{expected}, got {result}");
 }
 
 #[test]
@@ -132,18 +98,8 @@ fn can_display_heuristic_info() {
 }
 
 #[test]
-fn can_handle_when_objective_lies() {
-    struct LiarObjective;
-
-    impl HeuristicObjective for LiarObjective {
-        type Solution = TestData;
-
-        fn total_order(&self, _: &Self::Solution, _: &Self::Solution) -> Ordering {
-            // that is where it lies based on some non-fitness related factors for total order
-            Ordering::Greater
-        }
-    }
-
+fn can_handle_equal_fitness_solutions() {
+    // Test that solutions with identical fitness return 0 distance.
     struct TestData;
 
     impl HeuristicSolution for TestData {
@@ -157,7 +113,7 @@ fn can_handle_when_objective_lies() {
         }
     }
 
-    let distance = get_relative_distance(&LiarObjective, &TestData, &TestData);
+    let distance = get_relative_distance(&TestData, &TestData);
 
     assert_eq!(distance, 0.)
 }

From 29f7884d0deaa0a9af289361098cc0dbbb5f03f8 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Thu, 15 Jan 2026 00:02:31 +0100
Subject: [PATCH 28/30] Split decompose search

---
 vrp-core/src/solver/heuristic.rs | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/vrp-core/src/solver/heuristic.rs b/vrp-core/src/solver/heuristic.rs
index a0d59e807..5b05513fa 100644
--- a/vrp-core/src/solver/heuristic.rs
+++ b/vrp-core/src/solver/heuristic.rs
@@ -578,8 +578,12 @@ mod dynamic {
                 "local_swap_star".to_string(),
                 2.,
             ),
-            // combined decompose search with different inner heuristics
-            (create_combined_decompose_search(problem, environment), "decompose_search".to_string(), 2.),
+            (
+                create_variable_search_decompose_search(problem.clone(), environment.clone()),
+                "variable_decompose_search".to_string(),
+                2.,
+            ),
+            (create_composite_decompose_search(problem, environment), "composite_decompose_search".to_string(), 2.),
         ]
     }
 
@@ -707,16 +711,6 @@ mod dynamic {
             2,
         ))
     }
-
-    fn create_combined_decompose_search(problem: Arc<Problem>, environment: Arc<Environment>) -> TargetSearchOperator {
-        Arc::new(WeightedHeuristicOperator::new(
-            vec![
-                create_variable_search_decompose_search(problem.clone(), environment.clone()),
-                create_composite_decompose_search(problem, environment),
-            ],
-            vec![1, 1],
-        ))
-    }
 }
 
 fn get_recreate_with_alternative_goal<T, F>(

From 49e1878bbca315fedd407b5675832fbc9ef27558 Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Thu, 15 Jan 2026 00:03:50 +0100
Subject: [PATCH 29/30] Fix clippy warnings

---
 vrp-core/src/solver/search/decompose_search.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vrp-core/src/solver/search/decompose_search.rs b/vrp-core/src/solver/search/decompose_search.rs
index 139e319d9..a2db2d9fd 100644
--- a/vrp-core/src/solver/search/decompose_search.rs
+++ b/vrp-core/src/solver/search/decompose_search.rs
@@ -81,7 +81,7 @@ impl DecomposeSearch {
 
         // get new and old parts and detect if there was any improvement in any part
         let ((new_parts, old_parts), improvements): ((Vec<_>, Vec<_>), Vec<_>) =
-            decomposed.into_iter().map(|decomposed| get_solution_parts(decomposed)).unzip();
+            decomposed.into_iter().map(get_solution_parts).unzip();
 
         let has_improvements = improvements.iter().any(|is_improvement| *is_improvement);
 
@@ -348,7 +348,7 @@ impl HeuristicPopulation for DecomposePopulation {
             return false;
         }
 
-        individuals.into_iter().fold(false, |acc, individual| acc || self.add(individual))
+        individuals.into_iter().any(|individual| self.add(individual))
     }
 
     fn add(&mut self, individual: Self::Individual) -> bool {
@@ -397,7 +397,7 @@ impl HeuristicPopulation for DecomposePopulation {
         // Contract used by `get_solution_parts`:
         // - Always yield baseline first.
         // - Then yield either best (if any) or last non-improving (if any).
-        Box::new(once(self.baseline).chain(self.best.or(self.last_non_improving).into_iter()))
+        Box::new(once(self.baseline).chain(self.best.or(self.last_non_improving)))
     }
 
     fn size(&self) -> usize {

From 6b50a837be15be6cab491a733819d210574a649a Mon Sep 17 00:00:00 2001
From: reinterpretcat <ilya.builuk@gmail.com>
Date: Thu, 15 Jan 2026 00:25:35 +0100
Subject: [PATCH 30/30] Delete dead code

---
 rosomaxa/src/utils/random.rs | 46 ------------------------------------
 1 file changed, 46 deletions(-)

diff --git a/rosomaxa/src/utils/random.rs b/rosomaxa/src/utils/random.rs
index bb51da416..4bbeb1909 100644
--- a/rosomaxa/src/utils/random.rs
+++ b/rosomaxa/src/utils/random.rs
@@ -219,49 +219,3 @@ where
         })
         .map(|(idx, _)| idx)
 }
-
-/// Selects an index from samples using softmax (Boltzmann) distribution with given temperature.
-pub fn select_softmax_index(samples: &[Float], temperature: Float, random: &dyn Random) -> usize {
-    assert!(!samples.is_empty());
-
-    // If we ever see +inf, treat it as a hard-max tie among +inf arms.
-    if samples.iter().any(|&s| s.is_infinite() && s.is_sign_positive()) {
-        let idxs = samples
-            .iter()
-            .enumerate()
-            .filter(|(_, s)| s.is_infinite() && s.is_sign_positive())
-            .map(|(idx, _)| idx)
-            .collect::<Vec<_>>();
-        let pick = random.uniform_int(0, (idxs.len() - 1) as i32) as usize;
-        return idxs[pick];
-    }
-
-    let temperature = temperature.max(f64::EPSILON);
-    let max_sample = samples.iter().copied().filter(|s| s.is_finite()).max_by(|a, b| a.total_cmp(b)).unwrap_or(0.0);
-
-    // Stable softmax: w_i = exp((s_i - max_s) / T). Since (s_i - max_s) <= 0,
-    // weights are in (0, 1] and never overflow.
-    let mut weights = Vec::with_capacity(samples.len());
-    let mut sum = 0.0;
-    for &s in samples {
-        let w = if s.is_finite() { ((s - max_sample) / temperature).exp() } else { 0.0 };
-        sum += w;
-        weights.push(w);
-    }
-
-    if sum <= f64::EPSILON {
-        // Degenerate case: fall back to argmax tie-break.
-        return random_argmax(samples.iter().copied(), random).unwrap_or(0);
-    }
-
-    let mut r = random.uniform_real(0.0, sum);
-    for (idx, w) in weights.iter().enumerate() {
-        r -= *w;
-        if r <= 0.0 {
-            return idx;
-        }
-    }
-
-    // Numerical edge: return last.
-    samples.len() - 1
-}