Skip to content

Commit 4fb6ac3

Browse files
committed
io-wq: improve manager/worker handling over exec
exec will cancel any threads, including the ones that io-wq is using. This isn't a problem, in fact we'd prefer it to be that way since it means we know that any async work cancels naturally without having to handle it proactively. But it does mean that we need to setup a new manager, as the manager and workers are gone. Handle this at queue time, and cancel work if we fail. Since the manager can go away without us noticing, ensure that the manager itself holds a reference to the 'wq' as well. Rename io_wq_destroy() to io_wq_put() to reflect that. In the future we can now simplify exec cancelation handling, for now just leave it the same. Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent eb85890 commit 4fb6ac3

3 files changed

Lines changed: 45 additions & 23 deletions

File tree

fs/io-wq.c

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,7 @@ static void io_worker_exit(struct io_worker *worker)
189189
raw_spin_unlock_irq(&wqe->lock);
190190

191191
kfree_rcu(worker, rcu);
192-
if (refcount_dec_and_test(&wqe->wq->refs))
193-
complete(&wqe->wq->done);
192+
io_wq_put(wqe->wq);
194193
}
195194

196195
static inline bool io_wqe_run_queue(struct io_wqe *wqe)
@@ -654,8 +653,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
654653
else
655654
pid = io_wq_fork_thread(task_thread_unbound, worker);
656655
if (pid < 0) {
657-
if (refcount_dec_and_test(&wq->refs))
658-
complete(&wq->done);
656+
io_wq_put(wq);
659657
kfree(worker);
660658
return false;
661659
}
@@ -754,11 +752,6 @@ static int io_wq_manager(void *data)
754752

755753
io_wq_check_workers(wq);
756754

757-
if (refcount_dec_and_test(&wq->refs)) {
758-
wq->manager = NULL;
759-
complete(&wq->done);
760-
do_exit(0);
761-
}
762755
/* if ERROR is set and we get here, we have workers to wake */
763756
if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
764757
rcu_read_lock();
@@ -767,6 +760,7 @@ static int io_wq_manager(void *data)
767760
rcu_read_unlock();
768761
}
769762
wq->manager = NULL;
763+
io_wq_put(wq);
770764
do_exit(0);
771765
}
772766

@@ -801,12 +795,40 @@ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
801795
wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
802796
}
803797

798+
static int io_wq_fork_manager(struct io_wq *wq)
799+
{
800+
int ret;
801+
802+
if (wq->manager)
803+
return 0;
804+
805+
clear_bit(IO_WQ_BIT_EXIT, &wq->state);
806+
refcount_inc(&wq->refs);
807+
current->flags |= PF_IO_WORKER;
808+
ret = io_wq_fork_thread(io_wq_manager, wq);
809+
current->flags &= ~PF_IO_WORKER;
810+
if (ret >= 0) {
811+
wait_for_completion(&wq->done);
812+
return 0;
813+
}
814+
815+
io_wq_put(wq);
816+
return ret;
817+
}
818+
804819
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
805820
{
806821
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
807822
int work_flags;
808823
unsigned long flags;
809824

825+
/* Can only happen if manager creation fails after exec */
826+
if (unlikely(io_wq_fork_manager(wqe->wq))) {
827+
work->flags |= IO_WQ_WORK_CANCEL;
828+
wqe->wq->do_work(work);
829+
return;
830+
}
831+
810832
work_flags = work->flags;
811833
raw_spin_lock_irqsave(&wqe->lock, flags);
812834
io_wqe_insert_work(wqe, work);
@@ -1034,16 +1056,11 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
10341056
init_completion(&wq->done);
10351057
refcount_set(&wq->refs, 1);
10361058

1037-
current->flags |= PF_IO_WORKER;
1038-
ret = io_wq_fork_thread(io_wq_manager, wq);
1039-
current->flags &= ~PF_IO_WORKER;
1040-
if (ret >= 0) {
1041-
wait_for_completion(&wq->done);
1059+
ret = io_wq_fork_manager(wq);
1060+
if (!ret)
10421061
return wq;
1043-
}
10441062

1045-
if (refcount_dec_and_test(&wq->refs))
1046-
complete(&wq->done);
1063+
io_wq_put(wq);
10471064
io_wq_put_hash(data->hash);
10481065
err:
10491066
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
@@ -1056,7 +1073,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
10561073
return ERR_PTR(ret);
10571074
}
10581075

1059-
void io_wq_destroy(struct io_wq *wq)
1076+
static void io_wq_destroy(struct io_wq *wq)
10601077
{
10611078
int node;
10621079

@@ -1071,8 +1088,6 @@ void io_wq_destroy(struct io_wq *wq)
10711088
io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
10721089
rcu_read_unlock();
10731090

1074-
wait_for_completion(&wq->done);
1075-
10761091
spin_lock_irq(&wq->hash->wait.lock);
10771092
for_each_node(node) {
10781093
struct io_wqe *wqe = wq->wqes[node];
@@ -1084,6 +1099,13 @@ void io_wq_destroy(struct io_wq *wq)
10841099
io_wq_put_hash(wq->hash);
10851100
kfree(wq->wqes);
10861101
kfree(wq);
1102+
1103+
}
1104+
1105+
void io_wq_put(struct io_wq *wq)
1106+
{
1107+
if (refcount_dec_and_test(&wq->refs))
1108+
io_wq_destroy(wq);
10871109
}
10881110

10891111
static bool io_wq_worker_affinity(struct io_worker *worker, void *data)

fs/io-wq.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ struct io_wq_data {
113113
};
114114

115115
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
116-
void io_wq_destroy(struct io_wq *wq);
116+
void io_wq_put(struct io_wq *wq);
117117

118118
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
119119
void io_wq_hash_work(struct io_wq_work *work, void *val);

fs/io_uring.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2024,7 +2024,7 @@ static void __io_req_task_submit(struct io_kiocb *req)
20242024

20252025
/* ctx stays valid until unlock, even if we drop all ours ctx->refs */
20262026
mutex_lock(&ctx->uring_lock);
2027-
if (!ctx->sqo_dead && !(current->flags & PF_EXITING))
2027+
if (!ctx->sqo_dead && !(current->flags & PF_EXITING) && !current->in_execve)
20282028
__io_queue_sqe(req);
20292029
else
20302030
__io_req_task_cancel(req, -EFAULT);
@@ -8821,7 +8821,7 @@ void __io_uring_files_cancel(struct files_struct *files)
88218821
if (files) {
88228822
io_uring_remove_task_files(tctx);
88238823
if (tctx->io_wq) {
8824-
io_wq_destroy(tctx->io_wq);
8824+
io_wq_put(tctx->io_wq);
88258825
tctx->io_wq = NULL;
88268826
}
88278827
}

0 commit comments

Comments
 (0)