INFO: task filebench:7143 blocked for more than 120 seconds. 21794 Oct 24 13:21:33 localhost kernel: "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
/* * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for * a really long time (120 seconds). If that happens, print out * a warning. */ staticvoidcheck_hung_uninterruptible_tasks(unsignedlong timeout) { /*hung task检测是检查的最大进程数,默认为最大的进程号*/ int max_count = sysctl_hung_task_check_count; /* * 每次遍历进程数的上限,默认为1024,这样做的目的是为了: * 1、防止rcu_read_lock的占用时间太长。 * 2、hung task的watchdog占用CPU时间太长。如果没开内核抢占,则如果内核线程不主动调度的话,是不能发生进程切换的 */ /* * 如果系统中的进程数比较多,那么就可能检测不到部分D状态进程了?不会,因为这里只是会调度一次,调度回来后,会继续遍历后面的进程 */ int batch_count = HUNG_TASK_BATCHING; structtask_struct *g, *t;
/* * If the system crashed already then all bets are off, * do not report extra hung tasks: */ /*如果系统已经处于crash状态了,就不再报hung task了。*/ if (test_taint(TAINT_DIE) || did_panic) return;
rcu_read_lock(); /*遍历系统中的所有进程*/ do_each_thread(g, t) { if (!max_count--) goto unlock; /*如果每次检测的进程数量超过1024了,则需要发起调度,结束rcu优雅周期*/ if (!--batch_count) { batch_count = HUNG_TASK_BATCHING; /*释放rcu,并主动调度,调度回来后检查相应进程是否还在,如果不在了,则退出遍历,否则继续*/ if (!rcu_lock_break(g, t)) goto unlock; } /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ /*检测进程状态是否为D*/ if (t->state == TASK_UNINTERRUPTIBLE) /*检测进程处于D状态的时间是否超过120s。*/ check_hung_task(t, timeout); } while_each_thread(g, t); unlock: rcu_read_unlock(); }
/* * Ensure the task is not frozen. * Also, skip vfork and any other user process that freezer should skip. */ if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP))) return;
/* * When a freshly created task is scheduled once, changes its state to * TASK_UNINTERRUPTIBLE without having ever been switched out once, it * musn't be checked. */ if (unlikely(!switch_count)) return;
if (sysctl_hung_task_warnings > 0) sysctl_hung_task_warnings--;
/* * Ok, the task did not get scheduled for more than 2 minutes, * complain: */ /*如下就是我们平常常见的hung task打印了*/ pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", t->comm, t->pid, timeout); pr_err(" %s %s %.*s\n", print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" " disables this message.\n"); /*打印堆栈*/ sched_show_task(t); /*如果开启了debug_lock,则打印锁的占用情况*/ debug_show_held_locks(t);