preempt_notifier本质上是一种Linux kernel notifier chain,监听的事件是线程被调度(sched out)出去(例如时间片用完了或者被强占)、线程被重新调度(sched in)。

本文参考内核版本为v5.0

1. 线程sched out

1
2
3
4
5
6
schedule
└── __schedule
└── context_switch
└── prepare_task_switch
└── fire_sched_out_preempt_notifiers
└── __fire_sched_out_preempt_notifiers
1
2
3
4
5
6
7
8
9
static void
__fire_sched_out_preempt_notifiers(struct task_struct *curr,
struct task_struct *next)
{
struct preempt_notifier *notifier;
/*调用curr注册的notifier,通知当前线程被sched out */
hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
notifier->ops->sched_out(notifier, next);
}

2. 线程sched in

1
2
3
4
5
6
schedule
└── __schedule
└── context_switch
└── finish_task_switch
└── fire_sched_in_preempt_notifiers
└── __fire_sched_in_preempt_notifiers
1
2
3
4
5
6
7
static void __fire_sched_in_preempt_notifiers(struct task_struct *curr)
{
struct preempt_notifier *notifier;
/*通知线程sched in */
hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
notifier->ops->sched_in(notifier, raw_smp_processor_id());
}

3. notifier注册

Linux内核提供API为当前线程注册调度notifier

1
2
3
void preempt_notifier_register(struct preempt_notifier *notifier)

void preempt_notifier_unregister(struct preempt_notifier *notifier)

1
2
3
4
5
6
7
8
9
10
11
/**
* preempt_notifier_register - tell me when current is being preempted & rescheduled
* @notifier: notifier struct to register
*/
void preempt_notifier_register(struct preempt_notifier *notifier)
{
if (!static_key_false(&preempt_notifier_key))
WARN(1, "registering preempt_notifier while notifiers disabled\n");

hlist_add_head(&notifier->link, &current->preempt_notifiers);
}

4. KVM Example

原始patch

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
static __read_mostly struct preempt_ops kvm_preempt_ops;

static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
{
...
preempt_notifier_init(&vcpu->preempt_notifier, &kvm_preempt_ops);
...
}

int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
struct module *module)
{
...
kvm_preempt_ops.sched_in = kvm_sched_in;
kvm_preempt_ops.sched_out = kvm_sched_out;
...
}

static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
{
struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);

if (vcpu->preempted)
vcpu->preempted = false;

kvm_arch_sched_in(vcpu, cpu);

kvm_arch_vcpu_load(vcpu, cpu);
}

static void kvm_sched_out(struct preempt_notifier *pn,
struct task_struct *next)
{
struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);

if (current->state == TASK_RUNNING)
vcpu->preempted = true;
kvm_arch_vcpu_put(vcpu);
}
1
2
3
4
5
6
int vcpu_load(struct kvm_vcpu *vcpu)
{
...
preempt_notifier_register(&vcpu->preempt_notifier);
...
}

参考资料:

  1. Linux内核线程实时获取调度状态的方法
  2. Linux 内核 schedule时的preemption notify机制
  3. Linux kernel preempt_ops
  4. SCHED: Generic hooks for trapping task preemption