From a4bc5ee55d56a19ac834d64247e53e23fde10d82 Mon Sep 17 00:00:00 2001
From: Nick Robinson <npr251@gmail.com>
Date: Sat, 11 Nov 2023 21:44:06 +0000
Subject: [PATCH 01/22] Trigger a profile before terminating worker upon
 timeout

---
 src/ReTestItems.jl |  3 ++-
 src/workers.jl     | 17 +++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index b36bfbbe..d6139421 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -535,7 +535,8 @@ function manage_worker(
             # Handle the exception
             if e isa TimeoutException
                 @debugv 1 "Test item $(repr(testitem.name)) timed out. Terminating worker $worker"
-                terminate!(worker)
+                trigger_profile(worker, :timeout)
+                terminate!(worker, :timeout)
                 wait(worker)
                 @error "$worker timed out running test item $(repr(testitem.name)) after $timeout seconds. \
                     Recording test error."
diff --git a/src/workers.jl b/src/workers.jl
index 93ccde5b..ccfd6bf3 100644
--- a/src/workers.jl
+++ b/src/workers.jl
@@ -3,6 +3,7 @@ module Workers
 using Sockets, Serialization
 
 export Worker, remote_eval, remote_fetch, terminate!, WorkerTerminatedException
+export trigger_profile
 
 function try_with_timeout(f, timeout)
     cond = Threads.Condition()
@@ -104,6 +105,22 @@ function watch_and_terminate!(w::Worker)
     true
 end
 
+# Send signal to the given `Worker` process to trigger a profile.
+# Users can customise this profiling in the usual way, e.g. via
+# `JULIA_PROFILE_PEEK_HEAP_SNAPSHOT`, `Profile.set_peek_duration`, `Profile.peek_report[]`
+# See https://docs.julialang.org/en/v1/stdlib/Profile/#Triggered-During-Execution
+function trigger_profile(w::Worker, from::Symbol=:manual)
+    if !Sys.iswindows()
+        @debug "sending profile request to worker $(w.pid) from $from"
+        if Sys.islinux()
+            kill(w.process, 10)  # SIGUSR1
+        elseif Sys.isbsd()
+            kill(w.process, 29)  # SIGINFO
+        end
+    end
+    return nothing
+end
+
 # gracefully terminate a worker by sending a shutdown message
 # and waiting for the other tasks to perform worker shutdown
 function Base.close(w::Worker)

From 525c176c689c310d6fe1cc18c598949127b3ac5c Mon Sep 17 00:00:00 2001
From: Nathan Daly <NHDaly@gmail.com>
Date: Thu, 18 Apr 2024 08:49:10 -0600
Subject: [PATCH 02/22] sleep after profile

---
 src/workers.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/workers.jl b/src/workers.jl
index ccfd6bf3..4fc4ab3a 100644
--- a/src/workers.jl
+++ b/src/workers.jl
@@ -118,6 +118,7 @@ function trigger_profile(w::Worker, from::Symbol=:manual)
             kill(w.process, 29)  # SIGINFO
         end
     end
+    sleep(120) # Leave time for it to print the profile.
     return nothing
 end
 

From e8e906faf6801f14fb8023060bef82f3ddc0b8c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Drvo=C5=A1t=C4=9Bp?=
 <tomas.drvostep@gmail.com>
Date: Mon, 22 Apr 2024 15:29:45 +0200
Subject: [PATCH 03/22] Introduce `timeout_profile_wait` to control
 timeout-triggered profiles + tests

---
 Project.toml             |  4 ++-
 src/ReTestItems.jl       | 24 +++++++++-----
 src/workers.jl           |  8 +++--
 test/integrationtests.jl | 67 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 91 insertions(+), 12 deletions(-)

diff --git a/Project.toml b/Project.toml
index 03f7e22d..443df93b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -17,6 +17,7 @@ Dates = "1"
 Logging = "1"
 LoggingExtras = "1"
 Pkg = "1"
+Profile = "1"
 Random = "1"
 Serialization = "1"
 Sockets = "1"
@@ -30,9 +31,10 @@ DeepDiffs = "ab62b9b5-e342-54a8-a765-a90f495de1a6"
 IOCapture = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 XMLDict = "228000da-037f-5747-90a9-8195ccbf91a5"
 
 [targets]
-test = ["AutoHashEquals", "DeepDiffs", "IOCapture", "Logging", "Pkg", "Random", "Test", "XMLDict"]
+test = ["AutoHashEquals", "DeepDiffs", "IOCapture", "Logging", "Pkg", "Profile", "Random", "Test", "XMLDict"]
diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index a33bcafb..56563a55 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -192,6 +192,11 @@ will be run.
   `paths` passed to it cannot contain test files, either because the path doesn't exist or
   the path points to a file which is not a test file. Default is `false`.
   Can also be set using the `RETESTITEMS_VALIDATE_PATHS` environment variable.
+- `timeout_profile_wait::Real=0`: When non-zero, a worker that times-out will trigger a CPU profile
+   for which we will wait `timeout_profile_wait` seconds before terminating the worker.
+   Zero means no profile will be taken. Can also be set using the `RETESTITEMS_TIMEOUT_PROFILE_WAIT`
+   environment variable. See the [Profile documentation](https://docs.julialang.org/en/v1/stdlib/Profile/#Triggered-During-Execution)
+   for more information on triggered profiles. Note you can use `worker_init_expr` to tweak the profile settings on workers.
 """
 function runtests end
 
@@ -237,6 +242,7 @@ function runtests(
     verbose_results::Bool=(logs !== :issues && isinteractive()),
     test_end_expr::Expr=Expr(:block),
     validate_paths::Bool=parse(Bool, get(ENV, "RETESTITEMS_VALIDATE_PATHS", "false")),
+    timeout_profile_wait::Real=parse(Int, get(ENV, "RETESTITEMS_TIMEOUT_PROFILE_WAIT", "0")),
 )
     nworker_threads = _validated_nworker_threads(nworker_threads)
     paths′ = _validated_paths(paths, validate_paths)
@@ -253,13 +259,14 @@ function runtests(
     nworkers = max(0, nworkers)
     retries = max(0, retries)
     timeout = ceil(Int, testitem_timeout)
+    timeout_profile_wait = ceil(Int, timeout_profile_wait)
     debuglvl = Int(debug)
     if debuglvl > 0
         LoggingExtras.withlevel(LoggingExtras.Debug; verbosity=debuglvl) do
-            _runtests(shouldrun_combined, paths′, nworkers, nworker_threads, worker_init_expr, test_end_expr, timeout, retries, memory_threshold, verbose_results, debuglvl, report, logs)
+            _runtests(shouldrun_combined, paths′, nworkers, nworker_threads, worker_init_expr, test_end_expr, timeout, retries, memory_threshold, verbose_results, debuglvl, report, logs, timeout_profile_wait)
         end
     else
-        return _runtests(shouldrun_combined, paths′, nworkers, nworker_threads, worker_init_expr, test_end_expr, timeout, retries, memory_threshold, verbose_results, debuglvl, report, logs)
+        return _runtests(shouldrun_combined, paths′, nworkers, nworker_threads, worker_init_expr, test_end_expr, timeout, retries, memory_threshold, verbose_results, debuglvl, report, logs, timeout_profile_wait)
     end
 end
 
@@ -273,7 +280,7 @@ end
 # By tracking and reusing test environments, we can avoid this issue.
 const TEST_ENVS = Dict{String, String}()
 
-function _runtests(shouldrun, paths, nworkers::Int, nworker_threads::String, worker_init_expr::Expr, test_end_expr::Expr, testitem_timeout::Int, retries::Int, memory_threshold::Real, verbose_results::Bool, debug::Int, report::Bool, logs::Symbol)
+function _runtests(shouldrun, paths, nworkers::Int, nworker_threads::String, worker_init_expr::Expr, test_end_expr::Expr, testitem_timeout::Int, retries::Int, memory_threshold::Real, verbose_results::Bool, debug::Int, report::Bool, logs::Symbol, timeout_profile_wait::Int)
     # Don't recursively call `runtests` e.g. if we `include` a file which calls it.
     # So we ignore the `runtests(...)` call in `test/runtests.jl` when `runtests(...)`
     # was called from the command line.
@@ -293,7 +300,7 @@ function _runtests(shouldrun, paths, nworkers::Int, nworker_threads::String, wor
         if is_running_test_runtests_jl(proj_file)
             # Assume this is `Pkg.test`, so test env already active.
             @debugv 2 "Running in current environment `$(Base.active_project())`"
-            return _runtests_in_current_env(shouldrun, paths, proj_file, nworkers, nworker_threads, worker_init_expr, test_end_expr, testitem_timeout, retries, memory_threshold, verbose_results, debug, report, logs)
+            return _runtests_in_current_env(shouldrun, paths, proj_file, nworkers, nworker_threads, worker_init_expr, test_end_expr, testitem_timeout, retries, memory_threshold, verbose_results, debug, report, logs, timeout_profile_wait)
         else
             @debugv 1 "Activating test environment for `$proj_file`"
             orig_proj = Base.active_project()
@@ -306,7 +313,7 @@ function _runtests(shouldrun, paths, nworkers::Int, nworker_threads::String, wor
                     testenv = TestEnv.activate()
                     TEST_ENVS[proj_file] = testenv
                 end
-                _runtests_in_current_env(shouldrun, paths, proj_file, nworkers, nworker_threads, worker_init_expr, test_end_expr, testitem_timeout, retries, memory_threshold, verbose_results, debug, report, logs)
+                _runtests_in_current_env(shouldrun, paths, proj_file, nworkers, nworker_threads, worker_init_expr, test_end_expr, testitem_timeout, retries, memory_threshold, verbose_results, debug, report, logs, timeout_profile_wait)
             finally
                 Base.set_active_project(orig_proj)
             end
@@ -317,6 +324,7 @@ end
 function _runtests_in_current_env(
     shouldrun, paths, projectfile::String, nworkers::Int, nworker_threads, worker_init_expr::Expr, test_end_expr::Expr,
     testitem_timeout::Int, retries::Int, memory_threshold::Real, verbose_results::Bool, debug::Int, report::Bool, logs::Symbol,
+    timeout_profile_wait::Int,
 )
     start_time = time()
     proj_name = something(Pkg.Types.read_project(projectfile).name, "")
@@ -381,7 +389,7 @@ function _runtests_in_current_env(
                 ti = starting[i]
                 @spawn begin
                     with_logger(original_logger) do
-                        manage_worker($w, $proj_name, $testitems, $ti, $nworker_threads, $worker_init_expr, $test_end_expr, $testitem_timeout, $retries, $memory_threshold, $verbose_results, $debug, $report, $logs)
+                        manage_worker($w, $proj_name, $testitems, $ti, $nworker_threads, $worker_init_expr, $test_end_expr, $testitem_timeout, $retries, $memory_threshold, $verbose_results, $debug, $report, $logs, $timeout_profile_wait)
                     end
                 end
             end
@@ -490,7 +498,7 @@ end
 
 function manage_worker(
     worker::Worker, proj_name::AbstractString, testitems::TestItems, testitem::Union{TestItem,Nothing}, nworker_threads, worker_init_expr::Expr, test_end_expr::Expr,
-    default_timeout::Int, retries::Int, memory_threshold::Real, verbose_results::Bool, debug::Int, report::Bool, logs::Symbol
+    default_timeout::Int, retries::Int, memory_threshold::Real, verbose_results::Bool, debug::Int, report::Bool, logs::Symbol, timeout_profile_wait::Int
 )
     ntestitems = length(testitems.testitems)
     run_number = 1
@@ -554,7 +562,7 @@ function manage_worker(
             # Handle the exception
             if e isa TimeoutException
                 @debugv 1 "Test item $(repr(testitem.name)) timed out. Terminating worker $worker"
-                trigger_profile(worker, :timeout)
+                timeout_profile_wait > 0 && trigger_profile(worker, timeout_profile_wait, :timeout)
                 terminate!(worker, :timeout)
                 wait(worker)
                 @error "$worker timed out running test item $(repr(testitem.name)) after $timeout seconds. \
diff --git a/src/workers.jl b/src/workers.jl
index 0a971609..7ef1b9dc 100644
--- a/src/workers.jl
+++ b/src/workers.jl
@@ -108,9 +108,11 @@ end
 
 # Send signal to the given `Worker` process to trigger a profile.
 # Users can customise this profiling in the usual way, e.g. via
-# `JULIA_PROFILE_PEEK_HEAP_SNAPSHOT`, `Profile.set_peek_duration`, `Profile.peek_report[]`
+# `JULIA_PROFILE_PEEK_HEAP_SNAPSHOT`, but `Profile.set_peek_duration`, `Profile.peek_report[]`
+# would have to be modified in the worker process.
 # See https://docs.julialang.org/en/v1/stdlib/Profile/#Triggered-During-Execution
-function trigger_profile(w::Worker, from::Symbol=:manual)
+# Called when timeout_profile_wait is non-zero.
+function trigger_profile(w::Worker, timeout_profile_wait, from::Symbol=:manual)
     if !Sys.iswindows()
         @debug "sending profile request to worker $(w.pid) from $from"
         if Sys.islinux()
@@ -119,7 +121,7 @@ function trigger_profile(w::Worker, from::Symbol=:manual)
             kill(w.process, 29)  # SIGINFO
         end
     end
-    sleep(120) # Leave time for it to print the profile.
+    sleep(timeout_profile_wait) # Leave time for it to print the profile.
     return nothing
 end
 
diff --git a/test/integrationtests.jl b/test/integrationtests.jl
index 42374825..bb628f76 100644
--- a/test/integrationtests.jl
+++ b/test/integrationtests.jl
@@ -819,6 +819,73 @@ end
     @test ts.time_end - ts.time_start ≈ timeout
 end
 
+@testset "CPU profile timeout trigger" begin
+    using Profile
+
+    function capture_timeout_profile(f, timeout_profile_wait; kwargs...)
+        logs = mktemp() do path, io
+            redirect_stdio(stdout=io, stderr=io, stdin=devnull) do
+                encased_testset() do
+                    if isnothing(timeout_profile_wait)
+                        runtests(joinpath(TEST_FILES_DIR, "_timeout_tests.jl"); nworkers=1, testitem_timeout=2, kwargs...)
+                    else
+                        runtests(joinpath(TEST_FILES_DIR, "_timeout_tests.jl"); nworkers=1, testitem_timeout=2, timeout_profile_wait, kwargs...)
+                    end
+                end
+            end
+            flush(io)
+            close(io)
+            read(path, String)
+        end
+        f(logs)
+        return nothing
+    end
+
+    # No profile is collected when timeout_profile_wait is zero.
+    capture_timeout_profile(0) do logs
+        @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 2 seconds", logs)
+        @test !occursin("Information request received", logs)
+    end
+
+    # Profile is collected when timeout_profile_wait is non-zero.
+    default_peektime = Profile.get_peek_duration()
+    capture_timeout_profile(5) do logs
+        @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 2 seconds", logs)
+        @test occursin("Information request received. A stacktrace will print followed by a $(default_peektime) second profile", logs)
+        @test occursin("Profile collected.", logs)
+    end
+
+    # Profile is collected with and set_peek_duration is respected.
+    capture_timeout_profile(5, worker_init_expr=:(using Profile; Profile.set_peek_duration($default_peektime + 1.0))) do logs
+        @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 2 seconds", logs)
+        @test occursin("Information request received. A stacktrace will print followed by a $(default_peektime + 1.0) second profile", logs)
+        @test occursin("Profile collected.", logs)
+    end
+
+
+    # The RETESTITEMS_TIMEOUT_PROFILE_WAIT environment variable can be used to set the timeout_profile_wait.
+    withenv("RETESTITEMS_TIMEOUT_PROFILE_WAIT" => "5") do
+        capture_timeout_profile(nothing) do logs
+            @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 2 seconds", logs)
+            @test occursin("Information request received", logs)
+            @test occursin("Profile collected.", logs)
+        end
+    end
+
+    # The profile is collected for each worker thread.
+    capture_timeout_profile(5, nworker_threads="3,2") do logs
+        @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 2 seconds", logs)
+        @test occursin("Information request received", logs)
+        @test occursin(r"Worker \d+:  Thread 1 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        @test occursin(r"Worker \d+:  Thread 2 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        @test occursin(r"Worker \d+:  Thread 3 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        @test occursin(r"Worker \d+:  Thread 4 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        @test occursin(r"Worker \d+:  Thread 5 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        @test !occursin(r"Worker \d+:  Thread 6 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        @test occursin("Profile collected.", logs)
+    end
+end
+
 @testset "worker always crashes immediately" begin
     file = joinpath(TEST_FILES_DIR, "_happy_tests.jl")
 

From 2679113045452a7e227e857ab36b58241c3a51b2 Mon Sep 17 00:00:00 2001
From: Drvi <tomas.drvostep@gmail.com>
Date: Mon, 22 Apr 2024 17:48:02 +0200
Subject: [PATCH 04/22] Print captured logs after we finish writing the profile

---
 src/ReTestItems.jl       |  8 +++++++-
 test/integrationtests.jl | 16 +++++++++-------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index 56563a55..361e3df1 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -558,23 +558,29 @@ function manage_worker(
         catch e
             @debugv 2 "Error" exception=e
             println(DEFAULT_STDOUT[])
-            _print_captured_logs(DEFAULT_STDOUT[], testitem, run_number)
             # Handle the exception
             if e isa TimeoutException
                 @debugv 1 "Test item $(repr(testitem.name)) timed out. Terminating worker $worker"
                 timeout_profile_wait > 0 && trigger_profile(worker, timeout_profile_wait, :timeout)
                 terminate!(worker, :timeout)
                 wait(worker)
+                # TODO: We print the captured logs after the worker is terminated,
+                # which means that we include an annoying stackrace from the worker termination,
+                # but the profiles don't seem to get flushed properly if we don't do this.
+                # This is not an issue with eager logs, but when going through a file, this seems to help.
+                _print_captured_logs(DEFAULT_STDOUT[], testitem, run_number)
                 @error "$worker timed out running test item $(repr(testitem.name)) after $timeout seconds. \
                     Recording test error."
                 record_timeout!(testitem, run_number, timeout)
             elseif e isa WorkerTerminatedException
+                _print_captured_logs(DEFAULT_STDOUT[], testitem, run_number)
                 @error "$worker died running test item $(repr(testitem.name)). \
                     Recording test error."
                 record_worker_terminated!(testitem, worker, run_number)
             else
                 # We don't expect any other kind of error, so rethrow, which will propagate
                 # back up to the main coordinator task and throw to the user
+                _print_captured_logs(DEFAULT_STDOUT[], testitem, run_number)
                 rethrow()
             end
             # Handle retries
diff --git a/test/integrationtests.jl b/test/integrationtests.jl
index bb628f76..e36c6723 100644
--- a/test/integrationtests.jl
+++ b/test/integrationtests.jl
@@ -873,16 +873,18 @@ end
     end
 
     # The profile is collected for each worker thread.
-    capture_timeout_profile(5, nworker_threads="3,2") do logs
+    for log_capture in (:eager, :batched)
+        capture_timeout_profile(5, nworker_threads="3,2", logs=log_capture) do logs
         @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 2 seconds", logs)
         @test occursin("Information request received", logs)
-        @test occursin(r"Worker \d+:  Thread 1 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
-        @test occursin(r"Worker \d+:  Thread 2 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
-        @test occursin(r"Worker \d+:  Thread 3 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
-        @test occursin(r"Worker \d+:  Thread 4 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
-        @test occursin(r"Worker \d+:  Thread 5 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
-        @test !occursin(r"Worker \d+:  Thread 6 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        @test occursin(r"Thread 1 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        @test occursin(r"Thread 2 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        @test occursin(r"Thread 3 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        @test occursin(r"Thread 4 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        @test occursin(r"Thread 5 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        @test !occursin(r"Thread 6 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
         @test occursin("Profile collected.", logs)
+        end
     end
 end
 

From 3071046875483eeb7b92c6138866a563a9ea6f31 Mon Sep 17 00:00:00 2001
From: Drvi <tomas.drvostep@gmail.com>
Date: Mon, 22 Apr 2024 18:29:51 +0200
Subject: [PATCH 05/22] Try forcing a flush on worker exit

---
 src/workers.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/workers.jl b/src/workers.jl
index 7ef1b9dc..7c172909 100644
--- a/src/workers.jl
+++ b/src/workers.jl
@@ -295,6 +295,7 @@ function startworker()
     # don't need stdin (copied from Distributed.start_worker)
     redirect_stdin(devnull)
     close(stdin)
+    atexit(() -> (flush(stdout); flush(stderr)))
     redirect_stderr(stdout) # redirect stderr so coordinator reads everything from stdout
     port, sock = listenany(UInt16(rand(10000:50000)))
     # send the port to the coordinator

From 251445112e3eb1fe5301b7e4c9ff906207e9b6c3 Mon Sep 17 00:00:00 2001
From: Drvi <tomas.drvostep@gmail.com>
Date: Sat, 27 Apr 2024 13:46:38 +0200
Subject: [PATCH 06/22] Test stacktraces are printed instead of testing the
 profile (which was too flaky)

---
 test/integrationtests.jl | 46 +++++++++++++++++++++++-----------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/test/integrationtests.jl b/test/integrationtests.jl
index e36c6723..efed7837 100644
--- a/test/integrationtests.jl
+++ b/test/integrationtests.jl
@@ -821,15 +821,16 @@ end
 
 @testset "CPU profile timeout trigger" begin
     using Profile
-
+    # We're only testing that the signal was registered and that the stacktrace was printed.
+    # We also tried testing that the CPU profile was displayed, but that was too flaky in CI.
     function capture_timeout_profile(f, timeout_profile_wait; kwargs...)
         logs = mktemp() do path, io
             redirect_stdio(stdout=io, stderr=io, stdin=devnull) do
                 encased_testset() do
                     if isnothing(timeout_profile_wait)
-                        runtests(joinpath(TEST_FILES_DIR, "_timeout_tests.jl"); nworkers=1, testitem_timeout=2, kwargs...)
+                        runtests(joinpath(TEST_FILES_DIR, "_timeout_tests.jl"); nworkers=1, testitem_timeout=3, kwargs...)
                     else
-                        runtests(joinpath(TEST_FILES_DIR, "_timeout_tests.jl"); nworkers=1, testitem_timeout=2, timeout_profile_wait, kwargs...)
+                        runtests(joinpath(TEST_FILES_DIR, "_timeout_tests.jl"); nworkers=1, testitem_timeout=3, timeout_profile_wait, kwargs...)
                     end
                 end
             end
@@ -838,51 +839,56 @@ end
             read(path, String)
         end
         f(logs)
-        return nothing
+        return logs
     end
 
-    # No profile is collected when timeout_profile_wait is zero.
+    @testset "timeout_profile_wait=0 means no CPU profile" begin
     capture_timeout_profile(0) do logs
-        @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 2 seconds", logs)
+            @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 3 seconds", logs)
         @test !occursin("Information request received", logs)
+        end
     end
 
-    # Profile is collected when timeout_profile_wait is non-zero.
+
     default_peektime = Profile.get_peek_duration()
+    @testset "non-zero timeout_profile_wait means we collect a CPU profile" begin
     capture_timeout_profile(5) do logs
-        @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 2 seconds", logs)
+            @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 3 seconds", logs)
         @test occursin("Information request received. A stacktrace will print followed by a $(default_peektime) second profile", logs)
+            @test count(r"pthread_cond_wait|__psych_cvwait", logs) > 0 # the stacktrace was printed (will fail on Windows)
         @test occursin("Profile collected.", logs)
+        end
     end
 
-    # Profile is collected with and set_peek_duration is respected.
+
+    @testset "`set_peek_duration` is respected in `worker_init_expr`" begin
     capture_timeout_profile(5, worker_init_expr=:(using Profile; Profile.set_peek_duration($default_peektime + 1.0))) do logs
-        @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 2 seconds", logs)
+            @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 3 seconds", logs)
         @test occursin("Information request received. A stacktrace will print followed by a $(default_peektime + 1.0) second profile", logs)
+            @test count(r"pthread_cond_wait|__psych_cvwait", logs) > 0 # the stacktrace was printed (will fail on Windows)
         @test occursin("Profile collected.", logs)
+        end
     end
 
 
     # The RETESTITEMS_TIMEOUT_PROFILE_WAIT environment variable can be used to set the timeout_profile_wait.
+    @testset "RETESTITEMS_TIMEOUT_PROFILE_WAIT environment variable" begin
     withenv("RETESTITEMS_TIMEOUT_PROFILE_WAIT" => "5") do
         capture_timeout_profile(nothing) do logs
-            @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 2 seconds", logs)
+                @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 3 seconds", logs)
             @test occursin("Information request received", logs)
+                @test count(r"pthread_cond_wait|__psych_cvwait", logs) > 0 # the stacktrace was printed (will fail on Windows)
             @test occursin("Profile collected.", logs)
+            end
         end
     end
 
     # The profile is collected for each worker thread.
-    for log_capture in (:eager, :batched)
-        capture_timeout_profile(5, nworker_threads="3,2", logs=log_capture) do logs
-        @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 2 seconds", logs)
+    @testset "CPU profile with $(repr(log_capture)) logs" for log_capture in (:eager, :batched)
+        logs = capture_timeout_profile(5, nworker_threads="3,2", logs=log_capture) do logs
+            @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 3 seconds", logs)
         @test occursin("Information request received", logs)
-        @test occursin(r"Thread 1 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
-        @test occursin(r"Thread 2 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
-        @test occursin(r"Thread 3 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
-        @test occursin(r"Thread 4 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
-        @test occursin(r"Thread 5 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
-        @test !occursin(r"Thread 6 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+            @test count(r"pthread_cond_wait|__psych_cvwait", logs) > 0 # the stacktrace was printed (will fail on Windows)
         @test occursin("Profile collected.", logs)
         end
     end

From fcb2b850434801f9263394550e189da4d8684554 Mon Sep 17 00:00:00 2001
From: Drvi <tomas.drvostep@gmail.com>
Date: Sat, 27 Apr 2024 14:22:42 +0200
Subject: [PATCH 07/22] 1.8 doesn't have interactive threads

---
 test/integrationtests.jl | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/test/integrationtests.jl b/test/integrationtests.jl
index efed7837..9bae7dc9 100644
--- a/test/integrationtests.jl
+++ b/test/integrationtests.jl
@@ -839,12 +839,12 @@ end
             read(path, String)
         end
         f(logs)
+        @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 3 seconds", logs)
         return logs
     end
 
     @testset "timeout_profile_wait=0 means no CPU profile" begin
     capture_timeout_profile(0) do logs
-            @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 3 seconds", logs)
         @test !occursin("Information request received", logs)
         end
     end
@@ -853,7 +853,6 @@ end
     default_peektime = Profile.get_peek_duration()
     @testset "non-zero timeout_profile_wait means we collect a CPU profile" begin
     capture_timeout_profile(5) do logs
-            @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 3 seconds", logs)
         @test occursin("Information request received. A stacktrace will print followed by a $(default_peektime) second profile", logs)
             @test count(r"pthread_cond_wait|__psych_cvwait", logs) > 0 # the stacktrace was printed (will fail on Windows)
         @test occursin("Profile collected.", logs)
@@ -863,7 +862,6 @@ end
 
     @testset "`set_peek_duration` is respected in `worker_init_expr`" begin
     capture_timeout_profile(5, worker_init_expr=:(using Profile; Profile.set_peek_duration($default_peektime + 1.0))) do logs
-            @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 3 seconds", logs)
         @test occursin("Information request received. A stacktrace will print followed by a $(default_peektime + 1.0) second profile", logs)
             @test count(r"pthread_cond_wait|__psych_cvwait", logs) > 0 # the stacktrace was printed (will fail on Windows)
         @test occursin("Profile collected.", logs)
@@ -875,7 +873,6 @@ end
     @testset "RETESTITEMS_TIMEOUT_PROFILE_WAIT environment variable" begin
     withenv("RETESTITEMS_TIMEOUT_PROFILE_WAIT" => "5") do
         capture_timeout_profile(nothing) do logs
-                @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 3 seconds", logs)
             @test occursin("Information request received", logs)
                 @test count(r"pthread_cond_wait|__psych_cvwait", logs) > 0 # the stacktrace was printed (will fail on Windows)
             @test occursin("Profile collected.", logs)
@@ -884,9 +881,8 @@ end
     end
 
     # The profile is collected for each worker thread.
-    @testset "CPU profile with $(repr(log_capture)) logs" for log_capture in (:eager, :batched)
-        logs = capture_timeout_profile(5, nworker_threads="3,2", logs=log_capture) do logs
-            @assert occursin("timed out running test item \"Test item takes 60 seconds\" after 3 seconds", logs)
+    @testset "CPU profile with $(repr(log_capture))" for log_capture in (:eager, :batched)
+        capture_timeout_profile(5, nworker_threads=VERSION >= v"1.9" ? "3,2" : "3", logs=log_capture) do logs
         @test occursin("Information request received", logs)
             @test count(r"pthread_cond_wait|__psych_cvwait", logs) > 0 # the stacktrace was printed (will fail on Windows)
         @test occursin("Profile collected.", logs)

From 3407e7e73265206654ba06d08ab1fae351b4a9bd Mon Sep 17 00:00:00 2001
From: Drvi <tomas.drvostep@gmail.com>
Date: Sat, 27 Apr 2024 14:30:18 +0200
Subject: [PATCH 08/22] .

---
 src/workers.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/workers.jl b/src/workers.jl
index 7c172909..7ef1b9dc 100644
--- a/src/workers.jl
+++ b/src/workers.jl
@@ -295,7 +295,6 @@ function startworker()
     # don't need stdin (copied from Distributed.start_worker)
     redirect_stdin(devnull)
     close(stdin)
-    atexit(() -> (flush(stdout); flush(stderr)))
     redirect_stderr(stdout) # redirect stderr so coordinator reads everything from stdout
     port, sock = listenany(UInt16(rand(10000:50000)))
     # send the port to the coordinator

From 83ecc8ccd671b72f9e66716eaf5bf5d850ab5bec Mon Sep 17 00:00:00 2001
From: Drvi <tomas.drvostep@gmail.com>
Date: Sat, 27 Apr 2024 14:48:57 +0200
Subject: [PATCH 09/22] Add unit test

---
 test/integrationtests.jl |  2 +-
 test/workers.jl          | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/test/integrationtests.jl b/test/integrationtests.jl
index 9bae7dc9..d18e005c 100644
--- a/test/integrationtests.jl
+++ b/test/integrationtests.jl
@@ -822,7 +822,7 @@ end
 @testset "CPU profile timeout trigger" begin
     using Profile
     # We're only testing that the signal was registered and that the stacktrace was printed.
-    # We also tried testing that the CPU profile was displayed, but that was too flaky in CI.
+    # We also tried testing that the CPU profile was displayed here, but that was too flaky in CI.
     function capture_timeout_profile(f, timeout_profile_wait; kwargs...)
         logs = mktemp() do path, io
             redirect_stdio(stdout=io, stderr=io, stdin=devnull) do
diff --git a/test/workers.jl b/test/workers.jl
index 40b0e802..4e69fee3 100644
--- a/test/workers.jl
+++ b/test/workers.jl
@@ -93,4 +93,28 @@ using Test
         close(w)
     end
 
+    @testset "CPU profile" begin
+        logs = mktemp() do path, io
+            w = Worker(threads=VERSION > v"1.9" ? "3,2" : "3", worker_redirect_io=io)
+            fut = remote_eval(w, :(sleep(5), yield()))
+            sleep(0.5)
+            trigger_profile(w, 1, :test)
+            fetch(fut)
+            close(w)
+            flush(io)
+            close(io)
+            return read(path, String)
+        end
+
+        @test occursin(r"Thread 1 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        @test occursin(r"Thread 2 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        @test occursin(r"Thread 3 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        if VERSION > v"1.9"
+            @test occursin(r"Thread 4 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+            @test occursin(r"Thread 5 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+            @test !occursin(r"Thread 6 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        else
+            @test !occursin(r"Thread 4 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
+        end
+    end
 end # workers.jl testset

From 6827599e416ce6c47b8bb1183d0fe05420a8485f Mon Sep 17 00:00:00 2001
From: Drvi <tomas.drvostep@gmail.com>
Date: Sat, 27 Apr 2024 14:50:28 +0200
Subject: [PATCH 10/22] .

---
 test/workers.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/workers.jl b/test/workers.jl
index 4e69fee3..65dbd243 100644
--- a/test/workers.jl
+++ b/test/workers.jl
@@ -95,7 +95,7 @@ using Test
 
     @testset "CPU profile" begin
         logs = mktemp() do path, io
-            w = Worker(threads=VERSION > v"1.9" ? "3,2" : "3", worker_redirect_io=io)
+            w = Worker(threads=VERSION >= v"1.9" ? "3,2" : "3", worker_redirect_io=io)
             fut = remote_eval(w, :(sleep(5), yield()))
             sleep(0.5)
             trigger_profile(w, 1, :test)

From 864d6cfa6bb722cc6f99c9ed5e9f9fc9bd6b06f4 Mon Sep 17 00:00:00 2001
From: Drvi <tomas.drvostep@gmail.com>
Date: Sat, 27 Apr 2024 14:55:39 +0200
Subject: [PATCH 11/22] .

---
 test/workers.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/workers.jl b/test/workers.jl
index 65dbd243..6eece79a 100644
--- a/test/workers.jl
+++ b/test/workers.jl
@@ -109,7 +109,7 @@ using Test
         @test occursin(r"Thread 1 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
         @test occursin(r"Thread 2 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
         @test occursin(r"Thread 3 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
-        if VERSION > v"1.9"
+        if VERSION >= v"1.9"
             @test occursin(r"Thread 4 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
             @test occursin(r"Thread 5 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)
             @test !occursin(r"Thread 6 Task 0x\w+ Total snapshots: \d+. Utilization: \d+%", logs)

From d1dc637e575a26d5f3656f9cfcd2b71659f78cc9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Drvo=C5=A1t=C4=9Bp?=
 <tomas.drvostep@gmail.com>
Date: Mon, 29 Apr 2024 14:22:58 +0200
Subject: [PATCH 12/22] Print newline just before the logs

---
 src/ReTestItems.jl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index 97329962..ce052cc8 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -559,10 +559,9 @@ function manage_worker(
             end
         catch e
             @debugv 2 "Error" exception=e
-            println(DEFAULT_STDOUT[])
             # Handle the exception
             if e isa TimeoutException
-                @debugv 1 "Test item $(repr(testitem.name)) timed out. Terminating worker $worker"
+                @debugv 1 "Test item $(repr(testitem.name)) timed out. Terminating worker $worker $(timeout_profile_wait > 0 ? "and triggering profile" : "")."
                 timeout_profile_wait > 0 && trigger_profile(worker, timeout_profile_wait, :timeout)
                 terminate!(worker, :timeout)
                 wait(worker)
@@ -570,11 +569,13 @@ function manage_worker(
                 # which means that we include an annoying stackrace from the worker termination,
                 # but the profiles don't seem to get flushed properly if we don't do this.
                 # This is not an issue with eager logs, but when going through a file, this seems to help.
+                println(DEFAULT_STDOUT[])
                 _print_captured_logs(DEFAULT_STDOUT[], testitem, run_number)
                 @error "$worker timed out running test item $(repr(testitem.name)) after $timeout seconds. \
                     Recording test error."
                 record_timeout!(testitem, run_number, timeout)
             elseif e isa WorkerTerminatedException
+                println(DEFAULT_STDOUT[])
                 _print_captured_logs(DEFAULT_STDOUT[], testitem, run_number)
                 @error "$worker died running test item $(repr(testitem.name)). \
                     Recording test error."
@@ -582,6 +583,7 @@ function manage_worker(
             else
                 # We don't expect any other kind of error, so rethrow, which will propagate
                 # back up to the main coordinator task and throw to the user
+                println(DEFAULT_STDOUT[])
                 _print_captured_logs(DEFAULT_STDOUT[], testitem, run_number)
                 rethrow()
             end

From 931bde42c1165535d8cfbcaf659aad1d7e6a5bda Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Drvo=C5=A1t=C4=9Bp?=
 <tomas.drvostep@gmail.com>
Date: Mon, 29 Apr 2024 14:49:15 +0200
Subject: [PATCH 13/22] Experiment: custom `peek_report`

---
 Project.toml       |  4 ++--
 src/ReTestItems.jl | 13 +++++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 6881085a..3676e463 100644
--- a/Project.toml
+++ b/Project.toml
@@ -7,6 +7,7 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
@@ -31,10 +32,9 @@ DeepDiffs = "ab62b9b5-e342-54a8-a765-a90f495de1a6"
 IOCapture = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 XMLDict = "228000da-037f-5747-90a9-8195ccbf91a5"
 
 [targets]
-test = ["AutoHashEquals", "DeepDiffs", "IOCapture", "Logging", "Pkg", "Profile", "Random", "Test", "XMLDict"]
+test = ["AutoHashEquals", "DeepDiffs", "IOCapture", "Logging", "Pkg", "Random", "Test", "XMLDict"]
diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index ce052cc8..751544f5 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -416,6 +416,19 @@ function start_worker(proj_name, nworker_threads, worker_init_expr, ntestitems;
     # remote_fetch here because we want to make sure the worker is all setup before starting to eval testitems
     remote_fetch(w, quote
         using ReTestItems, Test
+        let
+            import Profile: peek_report
+            # Like https://github.com/JuliaLang/julia/blob/6023ad6718514c15b3297197757ae3d93b85270b/stdlib/Profile/src/Profile.jl#L65-L70
+            # but doesn't limit the IOContext to the default display size
+            function _peek_report()
+                iob = IOBuffer()
+                println(iob, "Worker$i: CPU profile")
+                ioc = IOContext(iob, stdout)
+                print(ioc, groupby = [:thread, :task])
+                Base.print(stdout, String(resize!(iob.data, iob.size)))
+            end
+            peek_report[] = _peek_report
+        end
         Test.TESTSET_PRINT_ENABLE[] = false
         const GLOBAL_TEST_CONTEXT = ReTestItems.TestContext($proj_name, $ntestitems)
         GLOBAL_TEST_CONTEXT.setups_evaled = ReTestItems.TestSetupModules()

From 699d37ff6691b5da627b42f5fa5b7dd63bde38e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Drvo=C5=A1t=C4=9Bp?=
 <tomas.drvostep@gmail.com>
Date: Mon, 29 Apr 2024 15:05:56 +0200
Subject: [PATCH 14/22] .

---
 src/ReTestItems.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index 751544f5..3237bb45 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -5,6 +5,7 @@ using Dates: DateTime, ISODateTimeFormat, format, now, unix2datetime
 using Test: Test, DefaultTestSet, TestSetException
 using .Threads: @spawn, nthreads
 using Pkg: Pkg
+using Profile
 using TestEnv
 using Logging
 using LoggingExtras
@@ -417,7 +418,6 @@ function start_worker(proj_name, nworker_threads, worker_init_expr, ntestitems;
     remote_fetch(w, quote
         using ReTestItems, Test
         let
-            import Profile: peek_report
             # Like https://github.com/JuliaLang/julia/blob/6023ad6718514c15b3297197757ae3d93b85270b/stdlib/Profile/src/Profile.jl#L65-L70
             # but doesn't limit the IOContext to the default display size
             function _peek_report()
@@ -427,7 +427,7 @@ function start_worker(proj_name, nworker_threads, worker_init_expr, ntestitems;
                 print(ioc, groupby = [:thread, :task])
                 Base.print(stdout, String(resize!(iob.data, iob.size)))
             end
-            peek_report[] = _peek_report
+            ReTestItems.Profile.peek_report[] = _peek_report
         end
         Test.TESTSET_PRINT_ENABLE[] = false
         const GLOBAL_TEST_CONTEXT = ReTestItems.TestContext($proj_name, $ntestitems)

From 943ded452d38a5ca77f4aaf37021a9a5a209aed5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Drvo=C5=A1t=C4=9Bp?=
 <tomas.drvostep@gmail.com>
Date: Mon, 29 Apr 2024 15:12:46 +0200
Subject: [PATCH 15/22] .

---
 src/ReTestItems.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index 3237bb45..b4512645 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -420,6 +420,7 @@ function start_worker(proj_name, nworker_threads, worker_init_expr, ntestitems;
         let
             # Like https://github.com/JuliaLang/julia/blob/6023ad6718514c15b3297197757ae3d93b85270b/stdlib/Profile/src/Profile.jl#L65-L70
             # but doesn't limit the IOContext to the default display size
+            Profile = Base.require(@__MODULE__, :Profile)
             function _peek_report()
                 iob = IOBuffer()
                 println(iob, "Worker$i: CPU profile")
@@ -427,7 +428,7 @@ function start_worker(proj_name, nworker_threads, worker_init_expr, ntestitems;
                 print(ioc, groupby = [:thread, :task])
                 Base.print(stdout, String(resize!(iob.data, iob.size)))
             end
-            ReTestItems.Profile.peek_report[] = _peek_report
+            peek_report[] = _peek_report
         end
         Test.TESTSET_PRINT_ENABLE[] = false
         const GLOBAL_TEST_CONTEXT = ReTestItems.TestContext($proj_name, $ntestitems)

From c72cbeab3794bbce168148719ca1113486eacbd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Drvo=C5=A1t=C4=9Bp?=
 <tomas.drvostep@gmail.com>
Date: Mon, 29 Apr 2024 15:14:04 +0200
Subject: [PATCH 16/22] .

---
 src/ReTestItems.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index b4512645..bea9a81a 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -428,7 +428,7 @@ function start_worker(proj_name, nworker_threads, worker_init_expr, ntestitems;
                 print(ioc, groupby = [:thread, :task])
                 Base.print(stdout, String(resize!(iob.data, iob.size)))
             end
-            peek_report[] = _peek_report
+            Profile.peek_report[] = _peek_report
         end
         Test.TESTSET_PRINT_ENABLE[] = false
         const GLOBAL_TEST_CONTEXT = ReTestItems.TestContext($proj_name, $ntestitems)

From 52dbcce2e2f7ff362a39343eda661e2db4df2af9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Drvo=C5=A1t=C4=9Bp?=
 <tomas.drvostep@gmail.com>
Date: Mon, 29 Apr 2024 15:44:06 +0200
Subject: [PATCH 17/22] Well, that didn't work, just increase the displaysize

---
 Project.toml       |  4 ++--
 src/ReTestItems.jl | 14 --------------
 src/log_capture.jl |  4 +++-
 3 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/Project.toml b/Project.toml
index 3676e463..6881085a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -7,7 +7,6 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
-Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
@@ -32,9 +31,10 @@ DeepDiffs = "ab62b9b5-e342-54a8-a765-a90f495de1a6"
 IOCapture = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 XMLDict = "228000da-037f-5747-90a9-8195ccbf91a5"
 
 [targets]
-test = ["AutoHashEquals", "DeepDiffs", "IOCapture", "Logging", "Pkg", "Random", "Test", "XMLDict"]
+test = ["AutoHashEquals", "DeepDiffs", "IOCapture", "Logging", "Pkg", "Profile", "Random", "Test", "XMLDict"]
diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index bea9a81a..ce052cc8 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -5,7 +5,6 @@ using Dates: DateTime, ISODateTimeFormat, format, now, unix2datetime
 using Test: Test, DefaultTestSet, TestSetException
 using .Threads: @spawn, nthreads
 using Pkg: Pkg
-using Profile
 using TestEnv
 using Logging
 using LoggingExtras
@@ -417,19 +416,6 @@ function start_worker(proj_name, nworker_threads, worker_init_expr, ntestitems;
     # remote_fetch here because we want to make sure the worker is all setup before starting to eval testitems
     remote_fetch(w, quote
         using ReTestItems, Test
-        let
-            # Like https://github.com/JuliaLang/julia/blob/6023ad6718514c15b3297197757ae3d93b85270b/stdlib/Profile/src/Profile.jl#L65-L70
-            # but doesn't limit the IOContext to the default display size
-            Profile = Base.require(@__MODULE__, :Profile)
-            function _peek_report()
-                iob = IOBuffer()
-                println(iob, "Worker$i: CPU profile")
-                ioc = IOContext(iob, stdout)
-                print(ioc, groupby = [:thread, :task])
-                Base.print(stdout, String(resize!(iob.data, iob.size)))
-            end
-            Profile.peek_report[] = _peek_report
-        end
         Test.TESTSET_PRINT_ENABLE[] = false
         const GLOBAL_TEST_CONTEXT = ReTestItems.TestContext($proj_name, $ntestitems)
         GLOBAL_TEST_CONTEXT.setups_evaled = ReTestItems.TestSetupModules()
diff --git a/src/log_capture.jl b/src/log_capture.jl
index 968cee70..6d36ac9d 100644
--- a/src/log_capture.jl
+++ b/src/log_capture.jl
@@ -107,7 +107,9 @@ If target is String it is assumed it is a file path.
 _redirect_logs(f, path::String) = open(io->_redirect_logs(f, io), path, "w")
 function _redirect_logs(f, target::IO)
     target === DEFAULT_STDOUT[] && return f()
-    colored_io = IOContext(target, :color => get(DEFAULT_STDOUT[], :color, false))
+    # If we're not doing :eager logs, make sure the displaysize is large so we don't truncate
+    # CPU profiles.
+    colored_io = IOContext(target, :color => get(DEFAULT_STDOUT[], :color, false), :displaysize => (10000,10000))
     # In case the default logger was changed by the user, we need to make sure the new logstate
     # is poinitng to the new stderr.
     # Adapted from https://github.com/JuliaIO/Suppressor.jl/blob/cbfc46f1450b03d6b69dad4c35de739290ff0aff/src/Suppressor.jl#L158-L161

From 1a29db0757f36d17b904d3e96d9266e77fb51314 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Drvo=C5=A1t=C4=9Bp?=
 <tomas.drvostep@gmail.com>
Date: Tue, 30 Apr 2024 12:21:10 +0200
Subject: [PATCH 18/22] Give the user a heads up about the CPU profile being
 collected

---
 src/ReTestItems.jl | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index ce052cc8..feb9b821 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -561,8 +561,11 @@ function manage_worker(
             @debugv 2 "Error" exception=e
             # Handle the exception
             if e isa TimeoutException
-                @debugv 1 "Test item $(repr(testitem.name)) timed out. Terminating worker $worker $(timeout_profile_wait > 0 ? "and triggering profile" : "")."
-                timeout_profile_wait > 0 && trigger_profile(worker, timeout_profile_wait, :timeout)
+                if timeout_profile_wait > 0
+                    @warn "$worker timed out running test item $(repr(testitem.name)) after $timeout seconds. \
+                        A CPU profile will be triggered on the worker and then it will be terminated."
+                    trigger_profile(worker, timeout_profile_wait, :timeout)
+                end
                 terminate!(worker, :timeout)
                 wait(worker)
                 # TODO: We print the captured logs after the worker is terminated,

From d42ae31a2456a3beb1c1cdee35957b65701081bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Drvo=C5=A1t=C4=9Bp?=
 <tomas.drvostep@gmail.com>
Date: Tue, 30 Apr 2024 14:06:49 +0200
Subject: [PATCH 19/22] v1.24.0

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 6881085a..9cc91392 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "ReTestItems"
 uuid = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
-version = "1.23.3"
+version = "1.24.0"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"

From 43df1a8483cc6b887c06daacdb72c7c07c0c9a02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Drvo=C5=A1t=C4=9Bp?=
 <tomas.drvostep@gmail.com>
Date: Mon, 6 May 2024 17:25:58 +0200
Subject: [PATCH 20/22] PR feedback

---
 Project.toml       | 2 +-
 src/ReTestItems.jl | 4 +++-
 src/workers.jl     | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/Project.toml b/Project.toml
index 9cc91392..6881085a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "ReTestItems"
 uuid = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
-version = "1.24.0"
+version = "1.23.3"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index feb9b821..070790f2 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -250,7 +250,8 @@ function runtests(
     logs in LOG_DISPLAY_MODES || throw(ArgumentError("`logs` must be one of $LOG_DISPLAY_MODES, got $(repr(logs))"))
     report && logs == :eager && throw(ArgumentError("`report=true` is not compatible with `logs=:eager`"))
     (0 ≤ memory_threshold ≤ 1) || throw(ArgumentError("`memory_threshold` must be between 0 and 1, got $(repr(memory_threshold))"))
-    testitem_timeout > 0 || throw(ArgumentError("`testitem_timeout` must be a postive number, got $(repr(testitem_timeout))"))
+    testitem_timeout > 0 || throw(ArgumentError("`testitem_timeout` must be a positive number, got $(repr(testitem_timeout))"))
+    timeout_profile_wait > 0 || throw(ArgumentError("`timeout_profile_wait` must be a positive number, got $(repr(timeout_profile_wait))"))
     # If we were given paths but none were valid, then nothing to run.
     !isempty(paths) && isempty(paths′) && return nothing
     shouldrun_combined(ti) = shouldrun(ti) && _shouldrun(name, ti.name) && _shouldrun(tags, ti.tags)
@@ -260,6 +261,7 @@ function runtests(
     retries = max(0, retries)
     timeout = ceil(Int, testitem_timeout)
     timeout_profile_wait = ceil(Int, timeout_profile_wait)
+    (timeout_profile_wait > 0 && Sys.iswindows()) && @warn "CPU profiles on timeout is not supported on Windows, ignoring `timeout_profile_wait`"
     debuglvl = Int(debug)
     if debuglvl > 0
         LoggingExtras.withlevel(LoggingExtras.Debug; verbosity=debuglvl) do
diff --git a/src/workers.jl b/src/workers.jl
index 7ef1b9dc..ebaac6f3 100644
--- a/src/workers.jl
+++ b/src/workers.jl
@@ -120,8 +120,8 @@ function trigger_profile(w::Worker, timeout_profile_wait, from::Symbol=:manual)
         elseif Sys.isbsd()
             kill(w.process, 29)  # SIGINFO
         end
+        sleep(timeout_profile_wait) # Leave time for it to print the profile.
     end
-    sleep(timeout_profile_wait) # Leave time for it to print the profile.
     return nothing
 end
 

From 9d4f1b97a580f25d7974ed3e8de1fc841929da79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Drvo=C5=A1t=C4=9Bp?=
 <tomas.drvostep@gmail.com>
Date: Mon, 6 May 2024 17:28:43 +0200
Subject: [PATCH 21/22] .

---
 src/ReTestItems.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ReTestItems.jl b/src/ReTestItems.jl
index 070790f2..878e9934 100644
--- a/src/ReTestItems.jl
+++ b/src/ReTestItems.jl
@@ -251,7 +251,7 @@ function runtests(
     report && logs == :eager && throw(ArgumentError("`report=true` is not compatible with `logs=:eager`"))
     (0 ≤ memory_threshold ≤ 1) || throw(ArgumentError("`memory_threshold` must be between 0 and 1, got $(repr(memory_threshold))"))
     testitem_timeout > 0 || throw(ArgumentError("`testitem_timeout` must be a positive number, got $(repr(testitem_timeout))"))
-    timeout_profile_wait > 0 || throw(ArgumentError("`timeout_profile_wait` must be a positive number, got $(repr(timeout_profile_wait))"))
+    timeout_profile_wait >= 0 || throw(ArgumentError("`timeout_profile_wait` must be a non-negative number, got $(repr(timeout_profile_wait))"))
     # If we were given paths but none were valid, then nothing to run.
     !isempty(paths) && isempty(paths′) && return nothing
     shouldrun_combined(ti) = shouldrun(ti) && _shouldrun(name, ti.name) && _shouldrun(tags, ti.tags)

From 9c72a0d7d45e677be46584090f060ee26c90f59c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Drvo=C5=A1t=C4=9Bp?=
 <tomas.drvostep@gmail.com>
Date: Mon, 6 May 2024 17:44:40 +0200
Subject: [PATCH 22/22] :)

---
 test/integrationtests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/integrationtests.jl b/test/integrationtests.jl
index d18e005c..20761260 100644
--- a/test/integrationtests.jl
+++ b/test/integrationtests.jl
@@ -683,7 +683,7 @@ end
     @test err.value == string(ErrorException("Timed out after 4s running test item \"Test item takes 60 seconds\" (run=1)"))
 
     for t in (0, -1.1)
-        expected = ArgumentError("`testitem_timeout` must be a postive number, got $t")
+        expected = ArgumentError("`testitem_timeout` must be a positive number, got $t")
         @test_throws expected runtests(file; nworkers, testitem_timeout=t)
     end
 end