关键词：watchdog、soft lockup、percpu thread、lockdep等。

近日遇到一个soft lockup问题，打印类似“[ 56.032356] NMI watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [cat:153]“。

这是lockup检测机制在起作用，lockup检测机制包括soft lockup detector和hard lockup detector。

借机分析下soft lockup机制以及什么情况下导致soft watchdog异常、对watchdog的配置、如何定位异常点。

这里跳过hard lockup detector的分析。

1. soft lockup机制分析

lockup_detector_init()函数首先获取sample_period以及watchdog_cpumask，然后根据情况创建线程，启动喂狗程序；创建hrtimer启动看门狗。

然后有两个重点一个是创建内核线程的API以及struct smp_hotplug_thread结构体。

void __init lockup_detector_init(void)
{set_sample_period();----------------------------------------获取变量sample_period，为watchdog_thresh*2/5，即4秒喂一次狗。
...cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
if (watchdog_enabled)watchdog_enable_all_cpus();
}static int watchdog_enable_all_cpus(void)
{int err = 0;if (!watchdog_running) {----------------------------------如果当前watchdog_running没有再运行，那么为每个CPU创建一个watchdog/x线程，这些线程每隔sample_period时间喂一次狗。watchdog_threads时watchdog/x线程的主要输入参数，watchdog_cpumask规定了为哪些CPU创建线程。err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,&watchdog_cpumask);if (err)pr_err("Failed to create watchdog threads, disabled\n");elsewatchdog_running = 1;} else {
        err = update_watchdog_all_cpus();if (err) {watchdog_disable_all_cpus();pr_err("Failed to update lockup detectors, disabled\n");}}if (err)watchdog_enabled = 0;return err;
}static void watchdog_disable_all_cpus(void)
{if (watchdog_running) {watchdog_running = 0;smpboot_unregister_percpu_thread(&watchdog_threads);}
}static int update_watchdog_all_cpus(void)
{int ret;ret = watchdog_park_threads();if (ret)return ret;watchdog_unpark_threads();return 0;
}static int watchdog_park_threads(void)
{int cpu, ret = 0;atomic_set(&watchdog_park_in_progress, 1);for_each_watchdog_cpu(cpu) {ret = kthread_park(per_cpu(softlockup_watchdog, cpu));---------------------------设置struct kthread->flags的KTHREAD_SHOULD_PARK位，在watchdog/x线程中会调用unpark成员函数进行处理。if (ret)break;}atomic_set(&watchdog_park_in_progress, 0);return ret;
}static void watchdog_unpark_threads(void)
{int cpu;for_each_watchdog_cpu(cpu)kthread_unpark(per_cpu(softlockup_watchdog, cpu));-------------------------------清空struct kthread->flags的KTHREAD_SHOULD_PARK位，在watchdog/x线程中会调用park成员函数。
}

1.1 watchdog_threads结构体介绍

在介绍如何创建watchdog/x线程之前，有必要先介绍一些struct smp_hotplug_thread线程。

struct smp_hotplug_thread {struct task_struct __percpu    **store;--------------------------存放percpu strcut task_strcut指针的指针。struct list_head        list;int                (*thread_should_run)(unsigned int cpu);-------检查是否应该运行watchdog/x线程。void                (*thread_fn)(unsigned int cpu);--------------watchdog/x线程的主函数。void                (*create)(unsigned int cpu);void                (*setup)(unsigned int cpu);------------------在运行watchdog/x线程之前的准备工作。void                (*cleanup)(unsigned int cpu, bool online);---在退出watchdog/x线程之后的清楚工作。void                (*park)(unsigned int cpu);-------------------当CPU offline时，需要临时停止。void                (*unpark)(unsigned int cpu);-----------------当CPU变成online时，进行准备工作。cpumask_var_t            cpumask;--------------------------------允许哪些CPU online。bool                selfparking;const char            *thread_comm;------------------------------watchdog/x线程名称。
};

watchdog_threads是soft lockup监控线程的实体，基于此创建 watchdog/x线程。

static struct smp_hotplug_thread watchdog_threads = {.store            = &softlockup_watchdog,.thread_should_run    = watchdog_should_run,.thread_fn        = watchdog,.thread_comm        = "watchdog/%u",.setup            = watchdog_enable,.cleanup        = watchdog_cleanup,.park            = watchdog_disable,.unpark            = watchdog_enable,
};static void watchdog_enable(unsigned int cpu)
{struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);/* kick off the timer for the hardlockup detector */hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);hrtimer->function = watchdog_timer_fn;------------------------------------------创建一个hrtimer，超时函数为watchdog_timer_fn，这里面会检查watchdog_touch_ts变量是否超过20秒没有被更新。如果是，则有soft lockup。/* Enable the perf event */watchdog_nmi_enable(cpu);/* done here because hrtimer_start can only pin to smp_processor_id() */hrtimer_start(hrtimer, ns_to_ktime(sample_period),HRTIMER_MODE_REL_PINNED);---------------------------------------------启动一个超时为sample_period(4秒)的hrtimer，HRTIMER_MODE_REL_PINNED表示此hrtimer和当前CPU绑定。/* initialize timestamp */watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1);---------------------------------设置当前线程为实时FIFO，并且优先级为实时99.这个优先级表示高于所有的非实时线程，但是实时优先级最低的。__touch_watchdog();-------------------------------------------------------------更新watchdog_touch_ts变量，相当于喂狗操作。
}static void watchdog_set_prio(unsigned int policy, unsigned int prio)
{struct sched_param param = { .sched_priority = prio };sched_setscheduler(current, policy, &param);
}/* Commands for resetting the watchdog */
static void __touch_watchdog(void)
{__this_cpu_write(watchdog_touch_ts, get_timestamp());----------------------------喂狗的操作就是更新watchdog_touch_ts变量，也即当前时间戳。
}static void watchdog_disable(unsigned int cpu)-------------------------------------相当于watchdog_enable()反操作，将线程恢复为普通线程；取消hrtimer。
{struct hrtimer *hrtimer = raw_cpu_ptr(&watchdog_hrtimer);watchdog_set_prio(SCHED_NORMAL, 0);hrtimer_cancel(hrtimer);/* disable the perf event */watchdog_nmi_disable(cpu);
}static void watchdog_cleanup(unsigned int cpu, bool online)
{watchdog_disable(cpu);
}

static int watchdog_should_run(unsigned int cpu)
{return __this_cpu_read(hrtimer_interrupts) !=__this_cpu_read(soft_lockup_hrtimer_cnt);------------------------------------hrtimer_interrupts记录了产生hrtimer的次数；在watchdog()中，将hrtimer_interrupts赋给soft_lockup_hrtimer_cnt。两者相等表示没有hrtimer产生，不需要运行watchdog/x线程；相反不等，则需要watchdog/x线程运行。
}
static void watchdog(unsigned int cpu)
{__this_cpu_write(soft_lockup_hrtimer_cnt,__this_cpu_read(hrtimer_interrupts));-----------------------------------更新soft_lockup_hrtimer_cnt，在watch_should_run()中就返回false，表示线程不需要运行，即不需要喂狗。__touch_watchdog();--------------------------------------------------------------虽然就是一句话，但是却很重要的喂狗操作。    if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))watchdog_nmi_disable(cpu);
}

1.2 创建喂狗线程watchdog/x

在分析了watchdog_threads之后，再来看看如何创建watchdog/x线程。

int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread,const struct cpumask *cpumask)
{unsigned int cpu;int ret = 0;if (!alloc_cpumask_var(&plug_thread->cpumask, GFP_KERNEL))return -ENOMEM;cpumask_copy(plug_thread->cpumask, cpumask);get_online_cpus();mutex_lock(&smpboot_threads_lock);for_each_online_cpu(cpu) {------------------------------------------------遍历所有online CPU，为每个CPU创建一个percpu的watchdog/x线程。ret = __smpboot_create_thread(plug_thread, cpu);if (ret) {smpboot_destroy_threads(plug_thread);-----------------------------创建失败则释放相关资源。free_cpumask_var(plug_thread->cpumask);goto out;}if (cpumask_test_cpu(cpu, cpumask))smpboot_unpark_thread(plug_thread, cpu);--------------------------如果当前CPU不在cpumask中，则清空KTHREAD_SHOULD_PARK，进而调用watchdog_therads的umpark成员函数。}list_add(&plug_thread->list, &hotplug_threads);
out:mutex_unlock(&smpboot_threads_lock);put_online_cpus();return ret;
}static int
__smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
{struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);struct smpboot_thread_data *td;if (tsk)return 0;td = kzalloc_node(sizeof(*td), GFP_KERNEL, cpu_to_node(cpu));if (!td)return -ENOMEM;td->cpu = cpu;td->ht = ht;tsk =kthread_create_on_cpu(smpboot_thread_fn, td, cpu,ht->thread_comm);-----------------------------------------在指定CPU上创建watchdog/x线程，处理函数为smpboot_thread_fn()。if (IS_ERR(tsk)) {kfree(td);return PTR_ERR(tsk);}/** Park the thread so that it could start right on the CPU* when it is available.*/kthread_park(tsk);--------------------------------------------------------在CPU上立即启动watchdog/x线程。get_task_struct(tsk);-----------------------------------------------------增加对线程的引用计数。*per_cpu_ptr(ht->store, cpu) = tsk;---------------------------------------store存放线程结构体指针的指针。if (ht->create) {
        if (!wait_task_inactive(tsk, TASK_PARKED))WARN_ON(1);elseht->create(cpu);}return 0;
}

static int smpboot_thread_fn(void *data)
{struct smpboot_thread_data *td = data;struct smp_hotplug_thread *ht = td->ht;while (1) {set_current_state(TASK_INTERRUPTIBLE);preempt_disable();if (kthread_should_stop()) {----------------------------------------如果可以终止线程，调用cleanup，退出线程。__set_current_state(TASK_RUNNING);preempt_enable();/* cleanup must mirror setup */if (ht->cleanup && td->status != HP_THREAD_NONE)ht->cleanup(td->cpu, cpu_online(td->cpu));kfree(td);return 0;}if (kthread_should_park()) {----------------------------------------如果KTHREAD_SHOULD_PARK置位，调用park()暂停进程执行。__set_current_state(TASK_RUNNING);preempt_enable();if (ht->park && td->status == HP_THREAD_ACTIVE) {BUG_ON(td->cpu != smp_processor_id());ht->park(td->cpu);td->status = HP_THREAD_PARKED;}kthread_parkme();/* We might have been woken for stop */continue;}BUG_ON(td->cpu != smp_processor_id());/* Check for state change setup */switch (td->status) {case HP_THREAD_NONE:-----------------------------------------------相当于第一次运行，调用setup()进行初始化操作。__set_current_state(TASK_RUNNING);preempt_enable();if (ht->setup)ht->setup(td->cpu);td->status = HP_THREAD_ACTIVE;continue;case HP_THREAD_PARKED:---------------------------------------------从parked状态恢复。__set_current_state(TASK_RUNNING);preempt_enable();if (ht->unpark)ht->unpark(td->cpu);td->status = HP_THREAD_ACTIVE;continue;}if (!ht->thread_should_run(td->cpu)) {-----------------------------如果不需要进程运行，schedule()主动放弃CPU给其他线程使用。preempt_enable_no_resched();schedule();} else {__set_current_state(TASK_RUNNING);preempt_enable();ht->thread_fn(td->cpu);----------------------------------------调用struct smpboot_thread_fn->thread_fn及watchdog()，进行喂狗操作。}}
}void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread)----将创建的内核线程移除操作。
{get_online_cpus();mutex_lock(&smpboot_threads_lock);list_del(&plug_thread->list);smpboot_destroy_threads(plug_thread);mutex_unlock(&smpboot_threads_lock);put_online_cpus();free_cpumask_var(plug_thread->cpumask);
}static void smpboot_destroy_threads(struct smp_hotplug_thread *ht)
{unsigned int cpu;/* We need to destroy also the parked threads of offline cpus */for_each_possible_cpu(cpu) {struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);if (tsk) {kthread_stop(tsk);put_task_struct(tsk);*per_cpu_ptr(ht->store, cpu) = NULL;}}
}

1.3 hrtimer看门狗

在分析了喂狗线程watchdog/x之后，再来分析看门狗是如何实现的？

看门狗是通过启动一个周期为4秒的hrtimer来实现的，这个hrtimer和CPU绑定，使用的变量都是percpu的。确保每个CPU之间不相互干扰。

每次hrtimer超时，都会唤醒watchdog/x线程，并进行一次喂狗操作。

因为hrtimer超时函数在软中断中调用，在中断产生后会比线程优先得到执行。

所以在watchdog/x线程没有得到执行的情况下，通过is_softlockup()来判断看门狗是否超过20秒没有得到喂狗。

static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);struct pt_regs *regs = get_irq_regs();int duration;int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;if (atomic_read(&watchdog_park_in_progress) != 0)return HRTIMER_NORESTART;/* kick the hardlockup detector */watchdog_interrupt_count();------------------------------------------------------------------没产生一次中断，hrtimer_interrupts计数加1.hrtimer_interrupts记录了产生hrtimer的次数。/* kick the softlockup detector */wake_up_process(__this_cpu_read(softlockup_watchdog));---------------------------------------唤醒watchdog/x线程，进行喂狗操作。/* .. and repeat */hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));------------------------------------重新设置超时点，形成周期性时钟。
...
    duration = is_softlockup(touch_ts);----------------------------------------------------------返回非0表示，看门狗超时。if (unlikely(duration)) {--------------------------------------------------------------------看门狗超时情况的处理。
        if (kvm_check_and_clear_guest_paused())return HRTIMER_RESTART;/* only warn once */if (__this_cpu_read(soft_watchdog_warn) == true) {
            if (__this_cpu_read(softlockup_task_ptr_saved) !=current) {__this_cpu_write(soft_watchdog_warn, false);__touch_watchdog();}return HRTIMER_RESTART;}if (softlockup_all_cpu_backtrace) {
            if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {/* Someone else will report us. Let's give up */__this_cpu_write(soft_watchdog_warn, true);return HRTIMER_RESTART;}}pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",smp_processor_id(), duration,current->comm, task_pid_nr(current));-------------------------------------------------打印哪个CPU被卡死duration秒，以及死在哪个进程。__this_cpu_write(softlockup_task_ptr_saved, current);print_modules();print_irqtrace_events(current);-----------------------------------------------------------显示开关中断、软中断信息，禁止中断和软中断也是造成soft lockup的一个原因。if (regs)---------------------------------------------------------------------------------有寄存器显示寄存器信息，同时显示栈信息。show_regs(regs);elsedump_stack();if (softlockup_all_cpu_backtrace) {
            trigger_allbutself_cpu_backtrace();clear_bit(0, &soft_lockup_nmi_warn);/* Barrier to sync with other cpus */smp_mb__after_atomic();}add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);if (softlockup_panic)---------------------------------------------------------------------如果定义softlockup_panic则进入panic()。panic("softlockup: hung tasks");__this_cpu_write(soft_watchdog_warn, true);} else__this_cpu_write(soft_watchdog_warn, false);return HRTIMER_RESTART;
}

static void watchdog_interrupt_count(void)
{
__this_cpu_inc(hrtimer_interrupts);
}

static int is_softlockup(unsigned long touch_ts)
{unsigned long now = get_timestamp();if ((watchdog_enabled & SOFT_WATCHDOG_ENABLED) && watchdog_thresh){/* Warn about unreasonable delays. */if (time_after(now, touch_ts + get_softlockup_thresh()))return now - touch_ts;}return 0;
}

2. 对watchdog的设置

对watchdog行为的设置有两个途径：通过命令行传入参数和通过proc设置。

2.1 通过命令行设置

通过命令行传入参数，可以对soft lockup进行开关设置、超时过后是否panic等等行为。

static int __init softlockup_panic_setup(char *str)
{softlockup_panic = simple_strtoul(str, NULL, 0);return 1;
}
__setup("softlockup_panic=", softlockup_panic_setup);static int __init nowatchdog_setup(char *str)
{watchdog_enabled = 0;return 1;
}
__setup("nowatchdog", nowatchdog_setup);static int __init nosoftlockup_setup(char *str)
{watchdog_enabled &= ~SOFT_WATCHDOG_ENABLED;return 1;
}
__setup("nosoftlockup", nosoftlockup_setup);#ifdef CONFIG_SMP
static int __init softlockup_all_cpu_backtrace_setup(char *str)
{sysctl_softlockup_all_cpu_backtrace =!!simple_strtol(str, NULL, 0);return 1;
}
__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
static int __init hardlockup_all_cpu_backtrace_setup(char *str)
{sysctl_hardlockup_all_cpu_backtrace =!!simple_strtol(str, NULL, 0);return 1;
}
__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
#endif

2.2 通过sysfs节点调节watchdog

watchdog相关的配置还可以通过proc文件系统进行配置。

/proc/sys/kernel/nmi_watchdog-------------------------hard lockup开关，proc_nmi_watchdog()。
/proc/sys/kernel/soft_watchdog------------------------soft lockup开关，proc_soft_watchdog()。
/proc/sys/kernel/watchdog-----------------------------watchdog总开关，proc_watchdog()。
/proc/sys/kernel/watchdog_cpumask---------------------watchdog cpumaks，proc_watchdog_cpumask()。
/proc/sys/kernel/watchdog_thresh----------------------watchdog超时阈值设置，proc_watchdog_thresh()。

3. 定位soft lockup异常

引起soft lockup的原因一般是死循环或者死锁，死循环可以通过栈回溯找到问题点；死锁问题需要打开内核的lockdep功能。

打开内核的lockdep功能可以参考《Linux死锁检测-Lockdep》。

下面看一个while(1)引起的soft lockup异常分析：

[ 5656.032325] NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [cat:157]-----------------------CPU、进程等信息粗略定位。
[ 5656.039314] Modules linked in:
[ 5656.042386]
[ 5656.042386] CURRENT PROCESS:
[ 5656.042386]
[ 5656.048229] COMM=cat PID=157
[ 5656.051117] TEXT=00008000-000c5a68 DATA=000c6f1c-000c7175 BSS=000c7175-000c8000
[ 5656.058432] USER-STACK=7fc1ee50  KERNEL-STACK=bd0b7080
[ 5656.058432]
[ 5656.065069] PC: 0x8032a1b2 (clk_summary_show+0x62/0xb4)--------------------------------------------PC指向出问题的点，更加精确的定位。
[ 5656.070302] LR: 0x8032a186 (clk_summary_show+0x36/0xb4)
[ 5656.075531] SP: 0xbd8b1b74...
[ 5656.217622]
Call Trace:-----------------------------------------------------------------------------------------通过Call Trace，可以了解如何做到PC指向的问题点的。来龙去脉一目了然。
[<80155c5e>] seq_read+0xc2/0x46c
[<802826ac>] full_proxy_read+0x58/0x98
[<8013239c>] do_readv_writev+0x31c/0x384
[<80132458>] vfs_readv+0x54/0x8c
[<80160b52>] default_file_splice_read+0x166/0x2b0
[<801606ee>] do_splice_to+0x76/0xb0
[<801607de>] splice_direct_to_actor+0xb6/0x21c
[<801609c2>] do_splice_direct+0x7e/0xa8
[<80132a5a>] do_sendfile+0x21a/0x45c
[<80133776>] SyS_sendfile64+0xf6/0xfc
[<80046186>] csky_systemcall+0x96/0xe0

转载于:https://www.cnblogs.com/arnoldlu/p/10338850.html

Linux soft lockup分析相关推荐

安装linux系统报softlock,soft lockup 解决思路
一. 前言前几天,帮同事一起查一个机器老是挂死无法进入问题,说有一台虚拟机时不时登陆不上挂死,同时甲方竟然没有这些主机监控,判断不了当时的cpu,内存,网络等的基础数据信息,那就只能看看内核信息了. ...
Linux 在 soft lockup 时，可以远程调试吗？
[CSDN 编者按]事件陷入死地无可挽救之际,可能会有人选择不了了之,有人选择就此放弃--但换个思路想一想,既然都无可挽回了,那干嘛不试试弄点有价值的信息回来? 作者 | dog250 责编 | 张 ...
linux服务器关不了机,解决Linux关不了机开机,报错NMI watchdog: BUG: soft lockup - CPU#2 stuck for 22s的bug...
问题描述在安装完Ubuntu或者其他Linux, 关机时会卡死, 循环报错NMI watchdog: BUG: soft lockup - CPU#2 stuck for 22s!, 无法关机. 在 ...
Linux 内核软死锁（soft lockup）记录
问题背景在特定服务器资源中运行高负载程序造成NMI watchdog: BUG: soft lockup - CPU#4 stuck for 24s 名词解释 Soft lockup名称解释:所谓 ...
Linux系列之soft lockup机制浅析
Linux系列之soft lockup机制浅析 1.背景 2.什么是lockup? 2.1 lockup检测机制 2.2 softlockup的工作原理 3.soft lockup机制分析 3.1 ...
smp_call_function_many死锁问题分析2 -- soft lockup hard lockup
smp_call_function_many死锁问题分析2 -- soft lockup & hard lockup 上篇回顾 1.找出没有响应IPI的CPU 2.问题现象分析 2.1 har ...
安装linux系统报softlock,客户系统报错：soft lockup的解决办法
redhat os日志如下:服务器会重启,负载重的时候会这样 Sep 28 02:14:00 hpdb1 kernel: BUG: soft lockup - CPU#40 stuck for 23s ...
遇到“BUG: soft lockup - CPU#0 stuck for 22s”的解决思路
之前开发的抓包模块上线后有客户反馈有丢包问题,这两天在定位这个丢包问题,抓包模块由我和另一名"队友"负责,我负责底层抓包开发,他负责接收处理.在测试丢包问题的时候,他遇到一个板子连 ...
NMI watchdog: BUG: soft lockup - CPU#2 stuck for 23s!
<NMI watchdog: BUG: soft lockup> <kernel:NMI watchdog: BUG: soft lockup - CPU#6 stuck for 2 ...

Linux soft lockup分析