Skip to content

Commit dfe48ea

Browse files
hailan94axboe
authored andcommitted
blk-mq: use NOIO context to prevent deadlock during debugfs creation
Creating debugfs entries can trigger fs reclaim, which can enter back into the block layer request_queue. This can cause deadlock if the queue is frozen. Previously, a WARN_ON_ONCE check was used in debugfs_create_files() to detect this condition, but it was racy since the queue can be frozen from another context at any time. Introduce blk_debugfs_lock()/blk_debugfs_unlock() helpers that combine the debugfs_mutex with memalloc_noio_save()/restore() to prevent fs reclaim from triggering block I/O. Also add blk_debugfs_lock_nomemsave() and blk_debugfs_unlock_nomemrestore() variants for callers that don't need NOIO protection (e.g., debugfs removal or read-only operations). Replace all raw debugfs_mutex lock/unlock pairs with these helpers, using the _nomemsave/_nomemrestore variants where appropriate. Reported-by: Yi Zhang <yi.zhang@redhat.com> Closes: https://lore.kernel.org/all/CAHj4cs9gNKEYAPagD9JADfO5UH+OiCr4P7OO2wjpfOYeM-RV=A@mail.gmail.com/ Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com> Closes: https://lore.kernel.org/all/aYWQR7CtYdk3K39g@shinmob/ Suggested-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Yu Kuai <yukuai@fnnas.com> Reviewed-by: Nilay Shroff <nilay@linux.ibm.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 3678a33 commit dfe48ea

6 files changed

Lines changed: 71 additions & 36 deletions

File tree

block/blk-mq-debugfs.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -613,11 +613,6 @@ static void debugfs_create_files(struct request_queue *q, struct dentry *parent,
613613
const struct blk_mq_debugfs_attr *attr)
614614
{
615615
lockdep_assert_held(&q->debugfs_mutex);
616-
/*
617-
* Creating new debugfs entries with queue freezed has the risk of
618-
* deadlock.
619-
*/
620-
WARN_ON_ONCE(q->mq_freeze_depth != 0);
621616
/*
622617
* debugfs_mutex should not be nested under other locks that can be
623618
* grabbed while queue is frozen.
@@ -693,12 +688,13 @@ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx)
693688
void blk_mq_debugfs_register_hctxs(struct request_queue *q)
694689
{
695690
struct blk_mq_hw_ctx *hctx;
691+
unsigned int memflags;
696692
unsigned long i;
697693

698-
mutex_lock(&q->debugfs_mutex);
694+
memflags = blk_debugfs_lock(q);
699695
queue_for_each_hw_ctx(q, hctx, i)
700696
blk_mq_debugfs_register_hctx(q, hctx);
701-
mutex_unlock(&q->debugfs_mutex);
697+
blk_debugfs_unlock(q, memflags);
702698
}
703699

704700
void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)

block/blk-mq-sched.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -390,25 +390,26 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int fla
390390
void blk_mq_sched_reg_debugfs(struct request_queue *q)
391391
{
392392
struct blk_mq_hw_ctx *hctx;
393+
unsigned int memflags;
393394
unsigned long i;
394395

395-
mutex_lock(&q->debugfs_mutex);
396+
memflags = blk_debugfs_lock(q);
396397
blk_mq_debugfs_register_sched(q);
397398
queue_for_each_hw_ctx(q, hctx, i)
398399
blk_mq_debugfs_register_sched_hctx(q, hctx);
399-
mutex_unlock(&q->debugfs_mutex);
400+
blk_debugfs_unlock(q, memflags);
400401
}
401402

402403
void blk_mq_sched_unreg_debugfs(struct request_queue *q)
403404
{
404405
struct blk_mq_hw_ctx *hctx;
405406
unsigned long i;
406407

407-
mutex_lock(&q->debugfs_mutex);
408+
blk_debugfs_lock_nomemsave(q);
408409
queue_for_each_hw_ctx(q, hctx, i)
409410
blk_mq_debugfs_unregister_sched_hctx(hctx);
410411
blk_mq_debugfs_unregister_sched(q);
411-
mutex_unlock(&q->debugfs_mutex);
412+
blk_debugfs_unlock_nomemrestore(q);
412413
}
413414

414415
void blk_mq_free_sched_tags(struct elevator_tags *et,

block/blk-sysfs.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -892,13 +892,13 @@ static void blk_debugfs_remove(struct gendisk *disk)
892892
{
893893
struct request_queue *q = disk->queue;
894894

895-
mutex_lock(&q->debugfs_mutex);
895+
blk_debugfs_lock_nomemsave(q);
896896
blk_trace_shutdown(q);
897897
debugfs_remove_recursive(q->debugfs_dir);
898898
q->debugfs_dir = NULL;
899899
q->sched_debugfs_dir = NULL;
900900
q->rqos_debugfs_dir = NULL;
901-
mutex_unlock(&q->debugfs_mutex);
901+
blk_debugfs_unlock_nomemrestore(q);
902902
}
903903

904904
/**
@@ -908,6 +908,7 @@ static void blk_debugfs_remove(struct gendisk *disk)
908908
int blk_register_queue(struct gendisk *disk)
909909
{
910910
struct request_queue *q = disk->queue;
911+
unsigned int memflags;
911912
int ret;
912913

913914
ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue");
@@ -921,11 +922,11 @@ int blk_register_queue(struct gendisk *disk)
921922
}
922923
mutex_lock(&q->sysfs_lock);
923924

924-
mutex_lock(&q->debugfs_mutex);
925+
memflags = blk_debugfs_lock(q);
925926
q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root);
926927
if (queue_is_mq(q))
927928
blk_mq_debugfs_register(q);
928-
mutex_unlock(&q->debugfs_mutex);
929+
blk_debugfs_unlock(q, memflags);
929930

930931
ret = disk_register_independent_access_ranges(disk);
931932
if (ret)

block/blk-wbt.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,7 @@ void wbt_init_enable_default(struct gendisk *disk)
776776
{
777777
struct request_queue *q = disk->queue;
778778
struct rq_wb *rwb;
779+
unsigned int memflags;
779780

780781
if (!__wbt_enable_default(disk))
781782
return;
@@ -789,9 +790,9 @@ void wbt_init_enable_default(struct gendisk *disk)
789790
return;
790791
}
791792

792-
mutex_lock(&q->debugfs_mutex);
793+
memflags = blk_debugfs_lock(q);
793794
blk_mq_debugfs_register_rq_qos(q);
794-
mutex_unlock(&q->debugfs_mutex);
795+
blk_debugfs_unlock(q, memflags);
795796
}
796797

797798
static u64 wbt_default_latency_nsec(struct request_queue *q)
@@ -1015,9 +1016,10 @@ int wbt_set_lat(struct gendisk *disk, s64 val)
10151016
blk_mq_unquiesce_queue(q);
10161017
out:
10171018
blk_mq_unfreeze_queue(q, memflags);
1018-
mutex_lock(&q->debugfs_mutex);
1019+
1020+
memflags = blk_debugfs_lock(q);
10191021
blk_mq_debugfs_register_rq_qos(q);
1020-
mutex_unlock(&q->debugfs_mutex);
1022+
blk_debugfs_unlock(q, memflags);
10211023

10221024
return ret;
10231025
}

block/blk.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,4 +729,35 @@ static inline void blk_unfreeze_release_lock(struct request_queue *q)
729729
}
730730
#endif
731731

732+
/*
733+
* debugfs directory and file creation can trigger fs reclaim, which can enter
734+
* back into the block layer request_queue. This can cause deadlock if the
735+
* queue is frozen. Use NOIO context together with debugfs_mutex to prevent fs
736+
* reclaim from triggering block I/O.
737+
*/
738+
static inline void blk_debugfs_lock_nomemsave(struct request_queue *q)
739+
{
740+
mutex_lock(&q->debugfs_mutex);
741+
}
742+
743+
static inline void blk_debugfs_unlock_nomemrestore(struct request_queue *q)
744+
{
745+
mutex_unlock(&q->debugfs_mutex);
746+
}
747+
748+
static inline unsigned int __must_check blk_debugfs_lock(struct request_queue *q)
749+
{
750+
unsigned int memflags = memalloc_noio_save();
751+
752+
blk_debugfs_lock_nomemsave(q);
753+
return memflags;
754+
}
755+
756+
static inline void blk_debugfs_unlock(struct request_queue *q,
757+
unsigned int memflags)
758+
{
759+
blk_debugfs_unlock_nomemrestore(q);
760+
memalloc_noio_restore(memflags);
761+
}
762+
732763
#endif /* BLK_INTERNAL_H */

kernel/trace/blktrace.c

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -559,9 +559,9 @@ int blk_trace_remove(struct request_queue *q)
559559
{
560560
int ret;
561561

562-
mutex_lock(&q->debugfs_mutex);
562+
blk_debugfs_lock_nomemsave(q);
563563
ret = __blk_trace_remove(q);
564-
mutex_unlock(&q->debugfs_mutex);
564+
blk_debugfs_unlock_nomemrestore(q);
565565

566566
return ret;
567567
}
@@ -767,6 +767,7 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
767767
struct blk_user_trace_setup2 buts2;
768768
struct blk_user_trace_setup buts;
769769
struct blk_trace *bt;
770+
unsigned int memflags;
770771
int ret;
771772

772773
ret = copy_from_user(&buts, arg, sizeof(buts));
@@ -785,16 +786,16 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
785786
.pid = buts.pid,
786787
};
787788

788-
mutex_lock(&q->debugfs_mutex);
789+
memflags = blk_debugfs_lock(q);
789790
bt = blk_trace_setup_prepare(q, name, dev, buts.buf_size, buts.buf_nr,
790791
bdev);
791792
if (IS_ERR(bt)) {
792-
mutex_unlock(&q->debugfs_mutex);
793+
blk_debugfs_unlock(q, memflags);
793794
return PTR_ERR(bt);
794795
}
795796
blk_trace_setup_finalize(q, name, 1, bt, &buts2);
796797
strscpy(buts.name, buts2.name, BLKTRACE_BDEV_SIZE);
797-
mutex_unlock(&q->debugfs_mutex);
798+
blk_debugfs_unlock(q, memflags);
798799

799800
if (copy_to_user(arg, &buts, sizeof(buts))) {
800801
blk_trace_remove(q);
@@ -809,6 +810,7 @@ static int blk_trace_setup2(struct request_queue *q, char *name, dev_t dev,
809810
{
810811
struct blk_user_trace_setup2 buts2;
811812
struct blk_trace *bt;
813+
unsigned int memflags;
812814

813815
if (copy_from_user(&buts2, arg, sizeof(buts2)))
814816
return -EFAULT;
@@ -819,15 +821,15 @@ static int blk_trace_setup2(struct request_queue *q, char *name, dev_t dev,
819821
if (buts2.flags != 0)
820822
return -EINVAL;
821823

822-
mutex_lock(&q->debugfs_mutex);
824+
memflags = blk_debugfs_lock(q);
823825
bt = blk_trace_setup_prepare(q, name, dev, buts2.buf_size, buts2.buf_nr,
824826
bdev);
825827
if (IS_ERR(bt)) {
826-
mutex_unlock(&q->debugfs_mutex);
828+
blk_debugfs_unlock(q, memflags);
827829
return PTR_ERR(bt);
828830
}
829831
blk_trace_setup_finalize(q, name, 2, bt, &buts2);
830-
mutex_unlock(&q->debugfs_mutex);
832+
blk_debugfs_unlock(q, memflags);
831833

832834
if (copy_to_user(arg, &buts2, sizeof(buts2))) {
833835
blk_trace_remove(q);
@@ -844,6 +846,7 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
844846
struct blk_user_trace_setup2 buts2;
845847
struct compat_blk_user_trace_setup cbuts;
846848
struct blk_trace *bt;
849+
unsigned int memflags;
847850

848851
if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
849852
return -EFAULT;
@@ -860,15 +863,15 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
860863
.pid = cbuts.pid,
861864
};
862865

863-
mutex_lock(&q->debugfs_mutex);
866+
memflags = blk_debugfs_lock(q);
864867
bt = blk_trace_setup_prepare(q, name, dev, buts2.buf_size, buts2.buf_nr,
865868
bdev);
866869
if (IS_ERR(bt)) {
867-
mutex_unlock(&q->debugfs_mutex);
870+
blk_debugfs_unlock(q, memflags);
868871
return PTR_ERR(bt);
869872
}
870873
blk_trace_setup_finalize(q, name, 1, bt, &buts2);
871-
mutex_unlock(&q->debugfs_mutex);
874+
blk_debugfs_unlock(q, memflags);
872875

873876
if (copy_to_user(arg, &buts2.name, ARRAY_SIZE(buts2.name))) {
874877
blk_trace_remove(q);
@@ -898,9 +901,9 @@ int blk_trace_startstop(struct request_queue *q, int start)
898901
{
899902
int ret;
900903

901-
mutex_lock(&q->debugfs_mutex);
904+
blk_debugfs_lock_nomemsave(q);
902905
ret = __blk_trace_startstop(q, start);
903-
mutex_unlock(&q->debugfs_mutex);
906+
blk_debugfs_unlock_nomemrestore(q);
904907

905908
return ret;
906909
}
@@ -2020,7 +2023,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
20202023
struct blk_trace *bt;
20212024
ssize_t ret = -ENXIO;
20222025

2023-
mutex_lock(&q->debugfs_mutex);
2026+
blk_debugfs_lock_nomemsave(q);
20242027

20252028
bt = rcu_dereference_protected(q->blk_trace,
20262029
lockdep_is_held(&q->debugfs_mutex));
@@ -2041,7 +2044,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
20412044
ret = sprintf(buf, "%llu\n", bt->end_lba);
20422045

20432046
out_unlock_bdev:
2044-
mutex_unlock(&q->debugfs_mutex);
2047+
blk_debugfs_unlock_nomemrestore(q);
20452048
return ret;
20462049
}
20472050

@@ -2052,6 +2055,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
20522055
struct block_device *bdev = dev_to_bdev(dev);
20532056
struct request_queue *q = bdev_get_queue(bdev);
20542057
struct blk_trace *bt;
2058+
unsigned int memflags;
20552059
u64 value;
20562060
ssize_t ret = -EINVAL;
20572061

@@ -2071,7 +2075,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
20712075
goto out;
20722076
}
20732077

2074-
mutex_lock(&q->debugfs_mutex);
2078+
memflags = blk_debugfs_lock(q);
20752079

20762080
bt = rcu_dereference_protected(q->blk_trace,
20772081
lockdep_is_held(&q->debugfs_mutex));
@@ -2106,7 +2110,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
21062110
}
21072111

21082112
out_unlock_bdev:
2109-
mutex_unlock(&q->debugfs_mutex);
2113+
blk_debugfs_unlock(q, memflags);
21102114
out:
21112115
return ret ? ret : count;
21122116
}

0 commit comments

Comments
 (0)