From 989d4788e5e54bd0755eba26c12c8e6e2b723288 Mon Sep 17 00:00:00 2001 From: Rob Norris Date: Wed, 12 Nov 2025 17:27:33 +1100 Subject: [PATCH] taskq: deadman: log a message if a taskq has not made progress It is very difficult to debug situations where all threads on a taskq go to sleep waiting for some condition that will not be satisfied until some task queued on that taskq performs some task. Its usually easy to see that the threads are waiting, but less easy to see that unscheduled tasks exist. This adds a simple deadman logging function to each taskq. The first time a taskq picks up a thread, it arms the timer to expire in spl_taskq_deadman_timeout seconds (default 20s). If another thread picks up a new task, the timer is rearmed. When the last active thread completes its task, the timer is disarmed. All together, this means the deadman will fire if no new tasks have started or existing tasks have completed within the configured time. As long as the taskq is making progress, everything will be silent. When it fires, it will log a notice to the kernel log: [ 28.715019] spl: taskq stuck for 20s: z_null_int.0 [1/1 threads active, 35 tasks queued] If it clears by itself, that is, this was a genuinely long-running task, a second message will be logged: [ 38.819171] spl: taskq resumed after s105s: z_null_iss.0 spl_taskq_deadman_timeout=0 will disable the facility entirely. Sponsored-by: Klara, Inc. Sponsored-by: Wasabi Technology, Inc. Signed-off-by: Rob Norris --- config/kernel-timer.m4 | 27 ++++++++++++- include/os/linux/spl/sys/taskq.h | 4 +- man/man4/spl.4 | 11 +++++- module/os/linux/spl/spl-taskq.c | 67 +++++++++++++++++++++++++++++++- 4 files changed, 103 insertions(+), 6 deletions(-) diff --git a/config/kernel-timer.m4 b/config/kernel-timer.m4 index c89ea204e83d..da035dba6183 100644 --- a/config/kernel-timer.m4 +++ b/config/kernel-timer.m4 @@ -1,8 +1,19 @@ dnl # -dnl # 6.2: timer_delete_sync introduced, del_timer_sync deprecated and made -dnl # into a simple wrapper +dnl # 6.2: timer_delete & timer_delete_sync introduced, del_timer & +dnl del_timer_sync deprecated and made into a simple wrapper dnl # 6.15: del_timer_sync removed dnl # +dnl # We test for them separately as they appear to have not always been +dnl # backported together +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_DELETE], [ + ZFS_LINUX_TEST_SRC([timer_delete], [ + #include + ],[ + struct timer_list *timer __attribute__((unused)) = NULL; + timer_delete(timer); + ]) +]) AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_DELETE_SYNC], [ ZFS_LINUX_TEST_SRC([timer_delete_sync], [ #include @@ -12,6 +23,16 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER_DELETE_SYNC], [ ]) ]) +AC_DEFUN([ZFS_AC_KERNEL_TIMER_DELETE], [ + AC_MSG_CHECKING([whether timer_delete() is available]) + ZFS_LINUX_TEST_RESULT([timer_delete], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_TIMER_DELETE, 1, + [timer_delete is available]) + ],[ + AC_MSG_RESULT(no) + ]) +]) AC_DEFUN([ZFS_AC_KERNEL_TIMER_DELETE_SYNC], [ AC_MSG_CHECKING([whether timer_delete_sync() is available]) ZFS_LINUX_TEST_RESULT([timer_delete_sync], [ @@ -24,9 +45,11 @@ AC_DEFUN([ZFS_AC_KERNEL_TIMER_DELETE_SYNC], [ ]) AC_DEFUN([ZFS_AC_KERNEL_SRC_TIMER], [ + ZFS_AC_KERNEL_SRC_TIMER_DELETE ZFS_AC_KERNEL_SRC_TIMER_DELETE_SYNC ]) AC_DEFUN([ZFS_AC_KERNEL_TIMER], [ + ZFS_AC_KERNEL_TIMER_DELETE ZFS_AC_KERNEL_TIMER_DELETE_SYNC ]) diff --git a/include/os/linux/spl/sys/taskq.h b/include/os/linux/spl/sys/taskq.h index c9b2bc994c8c..fcb436ac1e3c 100644 --- a/include/os/linux/spl/sys/taskq.h +++ b/include/os/linux/spl/sys/taskq.h @@ -22,7 +22,7 @@ * with the SPL. If not, see . */ /* - * Copyright (c) 2024, Klara Inc. + * Copyright (c) 2024, 2025, Klara, Inc. * Copyright (c) 2024, Syneto */ @@ -134,6 +134,8 @@ typedef struct taskq { wait_queue_head_t tq_work_waitq; /* new work waitq */ wait_queue_head_t tq_wait_waitq; /* wait waitq */ tq_lock_role_t tq_lock_class; /* class when taking tq_lock */ + struct timer_list tq_deadman; /* deadman timer */ + unsigned long tq_deadman_at; /* time of last deadman trip */ /* list node for the cpu hotplug callback */ struct hlist_node tq_hp_cb_node; boolean_t tq_hp_support; diff --git a/man/man4/spl.4 b/man/man4/spl.4 index 61dfe42e463d..8904e46c0bb6 100644 --- a/man/man4/spl.4 +++ b/man/man4/spl.4 @@ -14,8 +14,9 @@ .\" Portions Copyright [yyyy] [name of copyright owner] .\" .\" Copyright 2013 Turbo Fredriksson . All rights reserved. +.\" Copyright (c) 2025, Klara, Inc. .\" -.Dd May 7, 2025 +.Dd November 12, 2025 .Dt SPL 4 .Os . @@ -130,6 +131,14 @@ When not enabled, the thread is halted to facilitate further debugging. .Pp Set to a non-zero value to enable. . +.It Sy spl_taskq_deadman_timeout Ns = Ns Sy 20 Pq uint +Log a warning if a taskq has not made progress in N seconds. +"Progress" here means a taskq thread has not picked up a new task in this +time, +or all threads have not completed in this time. +This can be useful for deadlock debugging. +Setting this value to 0 will disable this function. +. .It Sy spl_taskq_kick Ns = Ns Sy 0 Pq uint Kick stuck taskq to spawn threads. When writing a non-zero value to it, it will scan all the taskqs. diff --git a/module/os/linux/spl/spl-taskq.c b/module/os/linux/spl/spl-taskq.c index 092f090d934b..625cbb84be87 100644 --- a/module/os/linux/spl/spl-taskq.c +++ b/module/os/linux/spl/spl-taskq.c @@ -24,7 +24,7 @@ * Solaris Porting Layer (SPL) Task Queue Implementation. */ /* - * Copyright (c) 2024, Klara Inc. + * Copyright (c) 2024, 2025, Klara, Inc. * Copyright (c) 2024, Syneto */ @@ -39,7 +39,14 @@ #include #include -/* Linux 6.2 renamed timer_delete_sync(); point it at its old name for those. */ +/* + * Linux 6.2 renamed del_timer()/del_timer_sync() to + * timer_delete()/timer_delete_sync(). For kernels before that, point the new + * names to the old. + */ +#ifndef HAVE_TIMER_DELETE +#define timer_delete(t) del_timer(t) +#endif #ifndef HAVE_TIMER_DELETE_SYNC #define timer_delete_sync(t) del_timer_sync(t) #endif @@ -142,6 +149,11 @@ module_param(spl_taskq_thread_sequential, uint, 0644); MODULE_PARM_DESC(spl_taskq_thread_sequential, "Create new taskq threads after N sequential tasks"); +static uint_t spl_taskq_deadman_timeout = 20; +module_param(spl_taskq_deadman_timeout, uint, 0644); +MODULE_PARM_DESC(spl_taskq_deadman_timeout, + "Log a warning if the taskq has not made progress in N seconds"); + /* * Global system-wide dynamic task queue available for all consumers. This * taskq is not intended for long-running tasks; instead, a dedicated taskq @@ -357,6 +369,34 @@ task_expire(struct timer_list *tl) task_expire_impl(t); } +static void +taskq_deadman(struct timer_list *tl) +{ + unsigned long irqflags; + taskq_t *tq = container_of(tl, taskq_t, tq_deadman); + + spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class); + if (tq->tq_nactive == 0 || spl_taskq_deadman_timeout == 0) { + spin_unlock_irqrestore(&tq->tq_lock, irqflags); + return; + } + + unsigned long nqueued = 0; + struct list_head *pos; + list_for_each(pos, &tq->tq_pend_list) + nqueued++; + list_for_each(pos, &tq->tq_prio_list) + nqueued++; + + printk(KERN_INFO "spl: taskq stuck for %us: %s.%d " + "[%d/%d threads active, %lu tasks queued]\n", + spl_taskq_deadman_timeout, tq->tq_name, tq->tq_instance, + tq->tq_nthreads, tq->tq_nactive, nqueued); + + tq->tq_deadman_at = jiffies; + spin_unlock_irqrestore(&tq->tq_lock, irqflags); +} + /* * Returns the lowest incomplete taskqid_t. The taskqid_t may * be queued on the pending list, on the priority list, on the @@ -1071,6 +1111,11 @@ taskq_thread(void *args) taskq_insert_in_order(tq, tqt); tq->tq_nactive++; + + if (spl_taskq_deadman_timeout > 0) + mod_timer(&tq->tq_deadman, + jiffies + spl_taskq_deadman_timeout * HZ); + spin_unlock_irqrestore(&tq->tq_lock, flags); TQSTAT_INC(tq, threads_active); @@ -1096,6 +1141,21 @@ taskq_thread(void *args) list_del_init(&tqt->tqt_active_list); tqt->tqt_task = NULL; + if (tq->tq_nactive == 0 || + spl_taskq_deadman_timeout == 0) + timer_delete(&tq->tq_deadman); + + if (tq->tq_deadman_at > 0) { + unsigned long stuck_for = + jiffies - tq->tq_deadman_at; + tq->tq_deadman_at = 0; + + printk(KERN_INFO + "spl: taskq resumed after %lus: %s.%d\n", + stuck_for / HZ, tq->tq_name, + tq->tq_instance); + } + /* For prealloc'd tasks, we don't free anything. */ if (!(tqt->tqt_flags & TQENT_FLAG_PREALLOC)) task_done(tq, t); @@ -1375,6 +1435,9 @@ taskq_create(const char *name, int threads_arg, pri_t pri, tq->tq_next_id = TASKQID_INITIAL; tq->tq_lowest_id = TASKQID_INITIAL; tq->lastspawnstop = jiffies; + timer_setup(&tq->tq_deadman, NULL, 0); + tq->tq_deadman.function = taskq_deadman; + tq->tq_deadman_at = 0; INIT_LIST_HEAD(&tq->tq_free_list); INIT_LIST_HEAD(&tq->tq_pend_list); INIT_LIST_HEAD(&tq->tq_prio_list);