本文将mark下eventfd_signal的实现。

Overview

1
2
3
4
eventfd_signal
└── eventfd_signal_mask
└── wake_up_locked_poll[__wake_up_locked_key]
└── __wake_up_common
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
int nr_exclusive, int wake_flags, void *key,
wait_queue_entry_t *bookmark)
{
wait_queue_entry_t *curr, *next;
int cnt = 0;
...

list_for_each_entry_safe_from(curr, next, &wq_head->head, entry) {
unsigned flags = curr->flags;
int ret;

if (flags & WQ_FLAG_BOOKMARK)
continue;

ret = curr->func(curr, mode, wake_flags, key);
...
}

...
}

__wake_up_common的实现可知,最终eventfd_signal调用了wait_queue_entryfunc回调。

1
2
3
4
5
6
7
8
9
/*
* A single wait-queue entry structure:
*/
struct wait_queue_entry {
unsigned int flags;
void *private;
wait_queue_func_t func;
struct list_head entry;
};

vhost_poll_wakeup

源码解析:vhost ioeventfd与irqfd中提到过vhost_poll_wakeup,那么这个函数又是如何与eventfd_signal关联起来的呢?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
__poll_t mask, struct vhost_dev *dev,
struct vhost_virtqueue *vq)
{
...
init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
...
}

static inline void
init_waitqueue_func_entry(struct wait_queue_entry *wq_entry, wait_queue_func_t func)
{
wq_entry->flags = 0;
wq_entry->private = NULL;
wq_entry->func = func;
}

由上述代码片段可知,vhost_poll_wakeup被设置为了wait_queue_entryfunc回调。

由此可知,eventfd_signal最终调用了vhost_poll_wakeup函数;因此,vhost_poll_wakeup函数运行上下文是vCPU线程(kvm调用了eventfd_signal,而kvm的运行上下文是vCPU线程)。

1
2
3
4
5
6
ioeventfd_write
└── eventfd_signal
└── eventfd_signal_mask
└── wake_up_locked_poll[__wake_up_locked_key]
└── __wake_up_common
└── vhost_poll_wakeup

select/poll/epoll wait_queue_entryfunc回调

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// for select and poll
static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
poll_table *p)
{
struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
struct poll_table_entry *entry = poll_get_entry(pwq);
if (!entry)
return;
entry->filp = get_file(filp);
entry->wait_address = wait_address;
entry->key = p->_key;
init_waitqueue_func_entry(&entry->wait, pollwake);
entry->wait.private = pwq;
add_wait_queue(wait_address, &entry->wait);
}

对于selectpollwait_queue_entryfunc回调是pollwake

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
// for epoll
static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
poll_table *pt)
{
struct ep_pqueue *epq = container_of(pt, struct ep_pqueue, pt);
struct epitem *epi = epq->epi;
struct eppoll_entry *pwq;

if (unlikely(!epi)) // an earlier allocation has failed
return;

pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL);
if (unlikely(!pwq)) {
epq->epi = NULL;
return;
}

init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
pwq->whead = whead;
pwq->base = epi;
if (epi->event.events & EPOLLEXCLUSIVE)
add_wait_queue_exclusive(whead, &pwq->wait);
else
add_wait_queue(whead, &pwq->wait);
pwq->next = epi->pwqlist;
epi->pwqlist = pwq;
}

对于epollwait_queue_entryfunc回调是ep_poll_callback

为了方便起见,本文只详细介绍下pollwake

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
// 在等待队列(wait_queue_t)上回调函数(func)  
// 文件就绪后被调用,唤醒调用进程,其中key是文件提供的当前状态掩码
static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
struct poll_table_entry *entry;
// 取得文件对应的poll_table_entry
entry = container_of(wait, struct poll_table_entry, wait);
// 过滤不关注的事件
if (key && !((unsigned long)key & entry->key)) {
return 0;
}
// 唤醒
return __pollwake(wait, mode, sync, key);
}
static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
struct poll_wqueues *pwq = wait->private;
// 将调用进程 pwq->polling_task 关联到 dummy_wait
DECLARE_WAITQUEUE(dummy_wait, pwq->polling_task);
smp_wmb();
pwq->triggered = 1;// 标记为已触发
// 唤醒调用进程
return default_wake_function(&dummy_wait, mode, sync, key);
}

// 默认的唤醒函数,poll/select 设置的回调函数会调用此函数唤醒
// 直接唤醒等待队列上的线程,即将线程移到运行队列(rq)
int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags,
void *key)
{
// 这个函数比较复杂, 这里就不具体分析了
return try_to_wake_up(curr->private, mode, wake_flags);
}


参考资料:

  1. linux 内核poll/select/epoll实现剖析(经典)-上