Skip to content

Commit ebaeabf

Browse files
committed
Merge tag 'vfs-6.19-rc1.writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull writeback updates from Christian Brauner: "Features: - Allow file systems to increase the minimum writeback chunk size. The relatively low minimal writeback size of 4MiB means that written back inodes on rotational media are switched a lot. Besides introducing additional seeks, this also can lead to extreme file fragmentation on zoned devices when a lot of files are cached relative to the available writeback bandwidth. This adds a superblock field that allows the file system to override the default size, and sets it to the zone size for zoned XFS. - Add logging for slow writeback when it exceeds sysctl_hung_task_timeout_secs. This helps identify tasks waiting for a long time and pinpoint potential issues. Recording the starting jiffies is also useful when debugging a crashed vmcore. - Wake up waiting tasks when finishing the writeback of a chunk Cleanups: - filemap_* writeback interface cleanups. Adding filemap_fdatawrite_wbc ended up being a mistake, as all but the original btrfs caller should be using better high level interfaces instead. This series removes all these low-level interfaces, switches btrfs to a more specific interface, and cleans up other too low-level interfaces. With this the writeback_control that is passed to the writeback code is only initialized in three places. - Remove __filemap_fdatawrite, __filemap_fdatawrite_range, and filemap_fdatawrite_wbc - Add filemap_flush_nr helper for btrfs - Push struct writeback_control into start_delalloc_inodes in btrfs - Rename filemap_fdatawrite_range_kick to filemap_flush_range - Stop opencoding filemap_fdatawrite_range in 9p, ocfs2, and mm - Make wbc_to_tag() inline and use it in fs" * tag 'vfs-6.19-rc1.writeback' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: fs: Make wbc_to_tag() inline and use it in fs. xfs: set s_min_writeback_pages for zoned file systems writeback: allow the file system to override MIN_WRITEBACK_PAGES writeback: cleanup writeback_chunk_size mm: rename filemap_fdatawrite_range_kick to filemap_flush_range mm: remove __filemap_fdatawrite_range mm: remove filemap_fdatawrite_wbc mm: remove __filemap_fdatawrite mm,btrfs: add a filemap_flush_nr helper btrfs: push struct writeback_control into start_delalloc_inodes btrfs: use the local tmp_inode variable in start_delalloc_inodes ocfs2: don't opencode filemap_fdatawrite_range in ocfs2_journal_submit_inode_data_buffers 9p: don't opencode filemap_fdatawrite_range in v9fs_mmap_vm_close mm: don't opencode filemap_fdatawrite_range in filemap_invalidate_inode writeback: Add logging for slow writeback (exceeds sysctl_hung_task_timeout_secs) writeback: Wake up waiting tasks when finishing the writeback of a chunk.
2 parents 9368f0f + 4952f35 commit ebaeabf

19 files changed

Lines changed: 154 additions & 184 deletions

File tree

fs/9p/vfs_file.c

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -483,24 +483,15 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf)
483483

484484
static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
485485
{
486-
struct inode *inode;
487-
488-
struct writeback_control wbc = {
489-
.nr_to_write = LONG_MAX,
490-
.sync_mode = WB_SYNC_ALL,
491-
.range_start = (loff_t)vma->vm_pgoff * PAGE_SIZE,
492-
/* absolute end, byte at end included */
493-
.range_end = (loff_t)vma->vm_pgoff * PAGE_SIZE +
494-
(vma->vm_end - vma->vm_start - 1),
495-
};
496-
497486
if (!(vma->vm_flags & VM_SHARED))
498487
return;
499488

500489
p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma);
501490

502-
inode = file_inode(vma->vm_file);
503-
filemap_fdatawrite_wbc(inode->i_mapping, &wbc);
491+
filemap_fdatawrite_range(file_inode(vma->vm_file)->i_mapping,
492+
(loff_t)vma->vm_pgoff * PAGE_SIZE,
493+
(loff_t)vma->vm_pgoff * PAGE_SIZE +
494+
(vma->vm_end - vma->vm_start - 1));
504495
}
505496

506497
static const struct vm_operations_struct v9fs_mmap_file_vm_ops = {

fs/btrfs/extent_io.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2468,10 +2468,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
24682468
&BTRFS_I(inode)->runtime_flags))
24692469
wbc->tagged_writepages = 1;
24702470

2471-
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2472-
tag = PAGECACHE_TAG_TOWRITE;
2473-
else
2474-
tag = PAGECACHE_TAG_DIRTY;
2471+
tag = wbc_to_tag(wbc);
24752472
retry:
24762473
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
24772474
tag_pages_for_writeback(mapping, index, end);

fs/btrfs/inode.c

Lines changed: 14 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -8715,15 +8715,13 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
87158715
* some fairly slow code that needs optimization. This walks the list
87168716
* of all the inodes with pending delalloc and forces them to disk.
87178717
*/
8718-
static int start_delalloc_inodes(struct btrfs_root *root,
8719-
struct writeback_control *wbc, bool snapshot,
8720-
bool in_reclaim_context)
8718+
static int start_delalloc_inodes(struct btrfs_root *root, long *nr_to_write,
8719+
bool snapshot, bool in_reclaim_context)
87218720
{
87228721
struct btrfs_delalloc_work *work, *next;
87238722
LIST_HEAD(works);
87248723
LIST_HEAD(splice);
87258724
int ret = 0;
8726-
bool full_flush = wbc->nr_to_write == LONG_MAX;
87278725

87288726
mutex_lock(&root->delalloc_mutex);
87298727
spin_lock(&root->delalloc_lock);
@@ -8749,20 +8747,22 @@ static int start_delalloc_inodes(struct btrfs_root *root,
87498747

87508748
if (snapshot)
87518749
set_bit(BTRFS_INODE_SNAPSHOT_FLUSH, &inode->runtime_flags);
8752-
if (full_flush) {
8753-
work = btrfs_alloc_delalloc_work(&inode->vfs_inode);
8750+
if (nr_to_write == NULL) {
8751+
work = btrfs_alloc_delalloc_work(tmp_inode);
87548752
if (!work) {
8755-
iput(&inode->vfs_inode);
8753+
iput(tmp_inode);
87568754
ret = -ENOMEM;
87578755
goto out;
87588756
}
87598757
list_add_tail(&work->list, &works);
87608758
btrfs_queue_work(root->fs_info->flush_workers,
87618759
&work->work);
87628760
} else {
8763-
ret = filemap_fdatawrite_wbc(inode->vfs_inode.i_mapping, wbc);
8761+
ret = filemap_flush_nr(tmp_inode->i_mapping,
8762+
nr_to_write);
87648763
btrfs_add_delayed_iput(inode);
8765-
if (ret || wbc->nr_to_write <= 0)
8764+
8765+
if (ret || *nr_to_write <= 0)
87668766
goto out;
87678767
}
87688768
cond_resched();
@@ -8788,29 +8788,17 @@ static int start_delalloc_inodes(struct btrfs_root *root,
87888788

87898789
int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
87908790
{
8791-
struct writeback_control wbc = {
8792-
.nr_to_write = LONG_MAX,
8793-
.sync_mode = WB_SYNC_NONE,
8794-
.range_start = 0,
8795-
.range_end = LLONG_MAX,
8796-
};
87978791
struct btrfs_fs_info *fs_info = root->fs_info;
87988792

87998793
if (BTRFS_FS_ERROR(fs_info))
88008794
return -EROFS;
8801-
8802-
return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
8795+
return start_delalloc_inodes(root, NULL, true, in_reclaim_context);
88038796
}
88048797

88058798
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
88068799
bool in_reclaim_context)
88078800
{
8808-
struct writeback_control wbc = {
8809-
.nr_to_write = nr,
8810-
.sync_mode = WB_SYNC_NONE,
8811-
.range_start = 0,
8812-
.range_end = LLONG_MAX,
8813-
};
8801+
long *nr_to_write = nr == LONG_MAX ? NULL : &nr;
88148802
struct btrfs_root *root;
88158803
LIST_HEAD(splice);
88168804
int ret;
@@ -8822,13 +8810,6 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
88228810
spin_lock(&fs_info->delalloc_root_lock);
88238811
list_splice_init(&fs_info->delalloc_roots, &splice);
88248812
while (!list_empty(&splice)) {
8825-
/*
8826-
* Reset nr_to_write here so we know that we're doing a full
8827-
* flush.
8828-
*/
8829-
if (nr == LONG_MAX)
8830-
wbc.nr_to_write = LONG_MAX;
8831-
88328813
root = list_first_entry(&splice, struct btrfs_root,
88338814
delalloc_root);
88348815
root = btrfs_grab_root(root);
@@ -8837,9 +8818,10 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
88378818
&fs_info->delalloc_roots);
88388819
spin_unlock(&fs_info->delalloc_root_lock);
88398820

8840-
ret = start_delalloc_inodes(root, &wbc, false, in_reclaim_context);
8821+
ret = start_delalloc_inodes(root, nr_to_write, false,
8822+
in_reclaim_context);
88418823
btrfs_put_root(root);
8842-
if (ret < 0 || wbc.nr_to_write <= 0)
8824+
if (ret < 0 || nr <= 0)
88438825
goto out;
88448826
spin_lock(&fs_info->delalloc_root_lock);
88458827
}

fs/ceph/addr.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,11 +1045,7 @@ void ceph_init_writeback_ctl(struct address_space *mapping,
10451045
ceph_wbc->index = ceph_wbc->start_index;
10461046
ceph_wbc->end = -1;
10471047

1048-
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) {
1049-
ceph_wbc->tag = PAGECACHE_TAG_TOWRITE;
1050-
} else {
1051-
ceph_wbc->tag = PAGECACHE_TAG_DIRTY;
1052-
}
1048+
ceph_wbc->tag = wbc_to_tag(wbc);
10531049

10541050
ceph_wbc->op_idx = -1;
10551051
ceph_wbc->num_ops = 0;

fs/ext4/inode.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2618,10 +2618,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
26182618
handle_t *handle = NULL;
26192619
int bpp = ext4_journal_blocks_per_folio(mpd->inode);
26202620

2621-
if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages)
2622-
tag = PAGECACHE_TAG_TOWRITE;
2623-
else
2624-
tag = PAGECACHE_TAG_DIRTY;
2621+
tag = wbc_to_tag(mpd->wbc);
26252622

26262623
mpd->map.m_len = 0;
26272624
mpd->next_pos = mpd->start_pos;

fs/f2fs/data.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2986,10 +2986,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
29862986
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
29872987
range_whole = 1;
29882988
}
2989-
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2990-
tag = PAGECACHE_TAG_TOWRITE;
2991-
else
2992-
tag = PAGECACHE_TAG_DIRTY;
2989+
tag = wbc_to_tag(wbc);
29932990
retry:
29942991
retry = 0;
29952992
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)

fs/fs-writeback.c

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
* Additions for address_space-based writeback
1515
*/
1616

17+
#include <linux/sched/sysctl.h>
1718
#include <linux/kernel.h>
1819
#include <linux/export.h>
1920
#include <linux/spinlock.h>
@@ -31,11 +32,6 @@
3132
#include <linux/memcontrol.h>
3233
#include "internal.h"
3334

34-
/*
35-
* 4MB minimal write chunk size
36-
*/
37-
#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
38-
3935
/*
4036
* Passed into wb_writeback(), essentially a subset of writeback_control
4137
*/
@@ -200,6 +196,19 @@ static void wb_queue_work(struct bdi_writeback *wb,
200196
spin_unlock_irq(&wb->work_lock);
201197
}
202198

199+
static bool wb_wait_for_completion_cb(struct wb_completion *done)
200+
{
201+
unsigned long waited_secs = (jiffies - done->wait_start) / HZ;
202+
203+
done->progress_stamp = jiffies;
204+
if (waited_secs > sysctl_hung_task_timeout_secs)
205+
pr_info("INFO: The task %s:%d has been waiting for writeback "
206+
"completion for more than %lu seconds.",
207+
current->comm, current->pid, waited_secs);
208+
209+
return !atomic_read(&done->cnt);
210+
}
211+
203212
/**
204213
* wb_wait_for_completion - wait for completion of bdi_writeback_works
205214
* @done: target wb_completion
@@ -212,8 +221,9 @@ static void wb_queue_work(struct bdi_writeback *wb,
212221
*/
213222
void wb_wait_for_completion(struct wb_completion *done)
214223
{
224+
done->wait_start = jiffies;
215225
atomic_dec(&done->cnt); /* put down the initial count */
216-
wait_event(*done->waitq, !atomic_read(&done->cnt));
226+
wait_event(*done->waitq, wb_wait_for_completion_cb(done));
217227
}
218228

219229
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -808,9 +818,9 @@ static void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
808818
* @wbc: writeback_control of interest
809819
* @inode: target inode
810820
*
811-
* This function is to be used by __filemap_fdatawrite_range(), which is an
812-
* alternative entry point into writeback code, and first ensures @inode is
813-
* associated with a bdi_writeback and attaches it to @wbc.
821+
* This function is to be used by filemap_writeback(), which is an alternative
822+
* entry point into writeback code, and first ensures @inode is associated with
823+
* a bdi_writeback and attaches it to @wbc.
814824
*/
815825
void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
816826
struct inode *inode)
@@ -1882,8 +1892,8 @@ static int writeback_single_inode(struct inode *inode,
18821892
return ret;
18831893
}
18841894

1885-
static long writeback_chunk_size(struct bdi_writeback *wb,
1886-
struct wb_writeback_work *work)
1895+
static long writeback_chunk_size(struct super_block *sb,
1896+
struct bdi_writeback *wb, struct wb_writeback_work *work)
18871897
{
18881898
long pages;
18891899

@@ -1901,16 +1911,13 @@ static long writeback_chunk_size(struct bdi_writeback *wb,
19011911
* (maybe slowly) sync all tagged pages
19021912
*/
19031913
if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
1904-
pages = LONG_MAX;
1905-
else {
1906-
pages = min(wb->avg_write_bandwidth / 2,
1907-
global_wb_domain.dirty_limit / DIRTY_SCOPE);
1908-
pages = min(pages, work->nr_pages);
1909-
pages = round_down(pages + MIN_WRITEBACK_PAGES,
1910-
MIN_WRITEBACK_PAGES);
1911-
}
1914+
return LONG_MAX;
19121915

1913-
return pages;
1916+
pages = min(wb->avg_write_bandwidth / 2,
1917+
global_wb_domain.dirty_limit / DIRTY_SCOPE);
1918+
pages = min(pages, work->nr_pages);
1919+
return round_down(pages + sb->s_min_writeback_pages,
1920+
sb->s_min_writeback_pages);
19141921
}
19151922

19161923
/*
@@ -2012,7 +2019,7 @@ static long writeback_sb_inodes(struct super_block *sb,
20122019
inode_state_set(inode, I_SYNC);
20132020
wbc_attach_and_unlock_inode(&wbc, inode);
20142021

2015-
write_chunk = writeback_chunk_size(wb, work);
2022+
write_chunk = writeback_chunk_size(inode->i_sb, wb, work);
20162023
wbc.nr_to_write = write_chunk;
20172024
wbc.pages_skipped = 0;
20182025

@@ -2022,6 +2029,12 @@ static long writeback_sb_inodes(struct super_block *sb,
20222029
*/
20232030
__writeback_single_inode(inode, &wbc);
20242031

2032+
/* Report progress to inform the hung task detector of the progress. */
2033+
if (work->done && work->done->progress_stamp &&
2034+
(jiffies - work->done->progress_stamp) > HZ *
2035+
sysctl_hung_task_timeout_secs / 2)
2036+
wake_up_all(work->done->waitq);
2037+
20252038
wbc_detach_inode(&wbc);
20262039
work->nr_pages -= write_chunk - wbc.nr_to_write;
20272040
wrote = write_chunk - wbc.nr_to_write - wbc.pages_skipped;

fs/gfs2/aops.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -311,10 +311,7 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
311311
range_whole = 1;
312312
cycled = 1; /* ignore range_cyclic tests */
313313
}
314-
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
315-
tag = PAGECACHE_TAG_TOWRITE;
316-
else
317-
tag = PAGECACHE_TAG_DIRTY;
314+
tag = wbc_to_tag(wbc);
318315

319316
retry:
320317
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)

fs/ocfs2/journal.c

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -902,15 +902,8 @@ int ocfs2_journal_alloc(struct ocfs2_super *osb)
902902

903903
static int ocfs2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
904904
{
905-
struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
906-
struct writeback_control wbc = {
907-
.sync_mode = WB_SYNC_ALL,
908-
.nr_to_write = mapping->nrpages * 2,
909-
.range_start = jinode->i_dirty_start,
910-
.range_end = jinode->i_dirty_end,
911-
};
912-
913-
return filemap_fdatawrite_wbc(mapping, &wbc);
905+
return filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
906+
jinode->i_dirty_start, jinode->i_dirty_end);
914907
}
915908

916909
int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)

fs/super.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
389389
goto fail;
390390
if (list_lru_init_memcg(&s->s_inode_lru, s->s_shrink))
391391
goto fail;
392+
s->s_min_writeback_pages = MIN_WRITEBACK_PAGES;
392393
return s;
393394

394395
fail:

0 commit comments

Comments
 (0)