Skip to content

Commit cfbe7e8

Browse files
axboeXiaoguang Wang
authored andcommitted
io_uring: allow POLL_ADD with double poll_wait() users
to #28736503 commit 18bceab upstream Some file descriptors use separate waitqueues for their f_ops->poll() handler, most commonly one for read and one for write. The io_uring poll implementation doesn't work with that, as the 2nd poll_wait() call will cause the io_uring poll request to -EINVAL. This affects (at least) tty devices and /dev/random as well. This is a big problem for event loops where some file descriptors work, and others don't. With this fix, io_uring handles multiple waitqueues. Signed-off-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com> Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
1 parent d3101fc commit cfbe7e8

File tree

1 file changed

+146
-72
lines changed

1 file changed

+146
-72
lines changed

fs/io_uring.c

Lines changed: 146 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -4035,27 +4035,6 @@ struct io_poll_table {
40354035
int error;
40364036
};
40374037

4038-
static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
4039-
struct wait_queue_head *head)
4040-
{
4041-
if (unlikely(poll->head)) {
4042-
pt->error = -EINVAL;
4043-
return;
4044-
}
4045-
4046-
pt->error = 0;
4047-
poll->head = head;
4048-
add_wait_queue(head, &poll->wait);
4049-
}
4050-
4051-
static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
4052-
struct poll_table_struct *p)
4053-
{
4054-
struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
4055-
4056-
__io_queue_proc(&pt->req->apoll->poll, pt, head);
4057-
}
4058-
40594038
static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
40604039
__poll_t mask, task_work_func_t func)
40614040
{
@@ -4109,6 +4088,144 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
41094088
return false;
41104089
}
41114090

4091+
static void io_poll_remove_double(struct io_kiocb *req)
4092+
{
4093+
struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
4094+
4095+
lockdep_assert_held(&req->ctx->completion_lock);
4096+
4097+
if (poll && poll->head) {
4098+
struct wait_queue_head *head = poll->head;
4099+
4100+
spin_lock(&head->lock);
4101+
list_del_init(&poll->wait.entry);
4102+
if (poll->wait.private)
4103+
refcount_dec(&req->refs);
4104+
poll->head = NULL;
4105+
spin_unlock(&head->lock);
4106+
}
4107+
}
4108+
4109+
static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
4110+
{
4111+
struct io_ring_ctx *ctx = req->ctx;
4112+
4113+
io_poll_remove_double(req);
4114+
req->poll.done = true;
4115+
io_cqring_fill_event(req, error ? error : mangle_poll(mask));
4116+
io_commit_cqring(ctx);
4117+
}
4118+
4119+
static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
4120+
{
4121+
struct io_ring_ctx *ctx = req->ctx;
4122+
4123+
if (io_poll_rewait(req, &req->poll)) {
4124+
spin_unlock_irq(&ctx->completion_lock);
4125+
return;
4126+
}
4127+
4128+
hash_del(&req->hash_node);
4129+
io_poll_complete(req, req->result, 0);
4130+
req->flags |= REQ_F_COMP_LOCKED;
4131+
io_put_req_find_next(req, nxt);
4132+
spin_unlock_irq(&ctx->completion_lock);
4133+
4134+
io_cqring_ev_posted(ctx);
4135+
}
4136+
4137+
static void io_poll_task_func(struct callback_head *cb)
4138+
{
4139+
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
4140+
struct io_kiocb *nxt = NULL;
4141+
4142+
io_poll_task_handler(req, &nxt);
4143+
if (nxt) {
4144+
struct io_ring_ctx *ctx = nxt->ctx;
4145+
4146+
mutex_lock(&ctx->uring_lock);
4147+
__io_queue_sqe(nxt, NULL);
4148+
mutex_unlock(&ctx->uring_lock);
4149+
}
4150+
}
4151+
4152+
static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
4153+
int sync, void *key)
4154+
{
4155+
struct io_kiocb *req = wait->private;
4156+
struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
4157+
__poll_t mask = key_to_poll(key);
4158+
4159+
/* for instances that support it check for an event match first: */
4160+
if (mask && !(mask & poll->events))
4161+
return 0;
4162+
4163+
if (req->poll.head) {
4164+
bool done;
4165+
4166+
spin_lock(&req->poll.head->lock);
4167+
done = list_empty(&req->poll.wait.entry);
4168+
if (!done)
4169+
list_del_init(&req->poll.wait.entry);
4170+
spin_unlock(&req->poll.head->lock);
4171+
if (!done)
4172+
__io_async_wake(req, poll, mask, io_poll_task_func);
4173+
}
4174+
refcount_dec(&req->refs);
4175+
return 1;
4176+
}
4177+
4178+
static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
4179+
wait_queue_func_t wake_func)
4180+
{
4181+
poll->head = NULL;
4182+
poll->done = false;
4183+
poll->canceled = false;
4184+
poll->events = events;
4185+
INIT_LIST_HEAD(&poll->wait.entry);
4186+
init_waitqueue_func_entry(&poll->wait, wake_func);
4187+
}
4188+
4189+
static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
4190+
struct wait_queue_head *head)
4191+
{
4192+
struct io_kiocb *req = pt->req;
4193+
4194+
/*
4195+
* If poll->head is already set, it's because the file being polled
4196+
* uses multiple waitqueues for poll handling (eg one for read, one
4197+
* for write). Setup a separate io_poll_iocb if this happens.
4198+
*/
4199+
if (unlikely(poll->head)) {
4200+
/* already have a 2nd entry, fail a third attempt */
4201+
if (req->io) {
4202+
pt->error = -EINVAL;
4203+
return;
4204+
}
4205+
poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
4206+
if (!poll) {
4207+
pt->error = -ENOMEM;
4208+
return;
4209+
}
4210+
io_init_poll_iocb(poll, req->poll.events, io_poll_double_wake);
4211+
refcount_inc(&req->refs);
4212+
poll->wait.private = req;
4213+
req->io = (void *) poll;
4214+
}
4215+
4216+
pt->error = 0;
4217+
poll->head = head;
4218+
add_wait_queue(head, &poll->wait);
4219+
}
4220+
4221+
static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
4222+
struct poll_table_struct *p)
4223+
{
4224+
struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
4225+
4226+
__io_queue_proc(&pt->req->apoll->poll, pt, head);
4227+
}
4228+
41124229
static void io_async_task_func(struct callback_head *cb)
41134230
{
41144231
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
@@ -4183,18 +4300,13 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
41834300
bool cancel = false;
41844301

41854302
poll->file = req->file;
4186-
poll->head = NULL;
4187-
poll->done = poll->canceled = false;
4188-
poll->events = mask;
4303+
io_init_poll_iocb(poll, mask, wake_func);
4304+
poll->wait.private = req;
41894305

41904306
ipt->pt._key = mask;
41914307
ipt->req = req;
41924308
ipt->error = -EINVAL;
41934309

4194-
INIT_LIST_HEAD(&poll->wait.entry);
4195-
init_waitqueue_func_entry(&poll->wait, wake_func);
4196-
poll->wait.private = req;
4197-
41984310
mask = vfs_poll(req->file, &ipt->pt) & poll->events;
41994311

42004312
spin_lock_irq(&ctx->completion_lock);
@@ -4225,6 +4337,7 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
42254337
struct async_poll *apoll;
42264338
struct io_poll_table ipt;
42274339
__poll_t mask, ret;
4340+
bool had_io;
42284341

42294342
if (!req->file || !file_can_poll(req->file))
42304343
return false;
@@ -4239,6 +4352,7 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
42394352

42404353
req->flags |= REQ_F_POLLED;
42414354
memcpy(&apoll->work, &req->work, sizeof(req->work));
4355+
had_io = req->io != NULL;
42424356

42434357
get_task_struct(current);
42444358
req->task = current;
@@ -4258,7 +4372,9 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
42584372
io_async_wake);
42594373
if (ret) {
42604374
ipt.error = 0;
4261-
apoll->poll.done = true;
4375+
/* only remove double add if we did it here */
4376+
if (!had_io)
4377+
io_poll_remove_double(req);
42624378
spin_unlock_irq(&ctx->completion_lock);
42634379
memcpy(&req->work, &apoll->work, sizeof(req->work));
42644380
kfree(apoll);
@@ -4291,6 +4407,7 @@ static bool io_poll_remove_one(struct io_kiocb *req)
42914407
bool do_complete;
42924408

42934409
if (req->opcode == IORING_OP_POLL_ADD) {
4410+
io_poll_remove_double(req);
42944411
do_complete = __io_poll_remove_one(req, &req->poll);
42954412
} else {
42964413
struct async_poll *apoll = req->apoll;
@@ -4391,49 +4508,6 @@ static int io_poll_remove(struct io_kiocb *req)
43914508
return 0;
43924509
}
43934510

4394-
static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
4395-
{
4396-
struct io_ring_ctx *ctx = req->ctx;
4397-
4398-
req->poll.done = true;
4399-
io_cqring_fill_event(req, error ? error : mangle_poll(mask));
4400-
io_commit_cqring(ctx);
4401-
}
4402-
4403-
static void io_poll_task_handler(struct io_kiocb *req, struct io_kiocb **nxt)
4404-
{
4405-
struct io_ring_ctx *ctx = req->ctx;
4406-
struct io_poll_iocb *poll = &req->poll;
4407-
4408-
if (io_poll_rewait(req, poll)) {
4409-
spin_unlock_irq(&ctx->completion_lock);
4410-
return;
4411-
}
4412-
4413-
hash_del(&req->hash_node);
4414-
io_poll_complete(req, req->result, 0);
4415-
req->flags |= REQ_F_COMP_LOCKED;
4416-
io_put_req_find_next(req, nxt);
4417-
spin_unlock_irq(&ctx->completion_lock);
4418-
4419-
io_cqring_ev_posted(ctx);
4420-
}
4421-
4422-
static void io_poll_task_func(struct callback_head *cb)
4423-
{
4424-
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
4425-
struct io_kiocb *nxt = NULL;
4426-
4427-
io_poll_task_handler(req, &nxt);
4428-
if (nxt) {
4429-
struct io_ring_ctx *ctx = nxt->ctx;
4430-
4431-
mutex_lock(&ctx->uring_lock);
4432-
__io_queue_sqe(nxt, NULL);
4433-
mutex_unlock(&ctx->uring_lock);
4434-
}
4435-
}
4436-
44374511
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
44384512
void *key)
44394513
{

0 commit comments

Comments
 (0)