Skip to content

Commit 195a49e

Browse files
fdmananakdave
authored andcommitted
btrfs: fix race between writes to swap files and scrub
When we active a swap file, at btrfs_swap_activate(), we acquire the exclusive operation lock to prevent the physical location of the swap file extents to be changed by operations such as balance and device replace/resize/remove. We also call there can_nocow_extent() which, among other things, checks if the block group of a swap file extent is currently RO, and if it is we can not use the extent, since a write into it would result in COWing the extent. However we have no protection against a scrub operation running after we activate the swap file, which can result in the swap file extents to be COWed while the scrub is running and operating on the respective block group, because scrub turns a block group into RO before it processes it and then back again to RW mode after processing it. That means an attempt to write into a swap file extent while scrub is processing the respective block group, will result in COWing the extent, changing its physical location on disk. Fix this by making sure that block groups that have extents that are used by active swap files can not be turned into RO mode, therefore making it not possible for a scrub to turn them into RO mode. When a scrub finds a block group that can not be turned to RO due to the existence of extents used by swap files, it proceeds to the next block group and logs a warning message that mentions the block group was skipped due to active swap files - this is the same approach we currently use for balance. Fixes: ed46ff3 ("Btrfs: support swap files") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Anand Jain <anand.jain@oracle.com> Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 2090303 commit 195a49e

5 files changed

Lines changed: 72 additions & 3 deletions

File tree

fs/btrfs/block-group.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1162,6 +1162,11 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
11621162
spin_lock(&sinfo->lock);
11631163
spin_lock(&cache->lock);
11641164

1165+
if (cache->swap_extents) {
1166+
ret = -ETXTBSY;
1167+
goto out;
1168+
}
1169+
11651170
if (cache->ro) {
11661171
cache->ro++;
11671172
ret = 0;
@@ -2307,7 +2312,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
23072312
}
23082313

23092314
ret = inc_block_group_ro(cache, 0);
2310-
if (!do_chunk_alloc)
2315+
if (!do_chunk_alloc || ret == -ETXTBSY)
23112316
goto unlock_out;
23122317
if (!ret)
23132318
goto out;
@@ -2316,6 +2321,8 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
23162321
if (ret < 0)
23172322
goto out;
23182323
ret = inc_block_group_ro(cache, 0);
2324+
if (ret == -ETXTBSY)
2325+
goto unlock_out;
23192326
out:
23202327
if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
23212328
alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
@@ -3406,6 +3413,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
34063413
ASSERT(list_empty(&block_group->io_list));
34073414
ASSERT(list_empty(&block_group->bg_list));
34083415
ASSERT(refcount_read(&block_group->refs) == 1);
3416+
ASSERT(block_group->swap_extents == 0);
34093417
btrfs_put_block_group(block_group);
34103418

34113419
spin_lock(&info->block_group_cache_lock);
@@ -3472,3 +3480,26 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
34723480
__btrfs_remove_free_space_cache(block_group->free_space_ctl);
34733481
}
34743482
}
3483+
3484+
bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg)
3485+
{
3486+
bool ret = true;
3487+
3488+
spin_lock(&bg->lock);
3489+
if (bg->ro)
3490+
ret = false;
3491+
else
3492+
bg->swap_extents++;
3493+
spin_unlock(&bg->lock);
3494+
3495+
return ret;
3496+
}
3497+
3498+
void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount)
3499+
{
3500+
spin_lock(&bg->lock);
3501+
ASSERT(!bg->ro);
3502+
ASSERT(bg->swap_extents >= amount);
3503+
bg->swap_extents -= amount;
3504+
spin_unlock(&bg->lock);
3505+
}

fs/btrfs/block-group.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,12 @@ struct btrfs_block_group {
186186
/* Flag indicating this block group is placed on a sequential zone */
187187
bool seq_zone;
188188

189+
/*
190+
* Number of extents in this block group used for swap files.
191+
* All accesses protected by the spinlock 'lock'.
192+
*/
193+
int swap_extents;
194+
189195
/* Record locked full stripes for RAID5/6 block group */
190196
struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
191197

@@ -312,4 +318,7 @@ static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
312318
void btrfs_freeze_block_group(struct btrfs_block_group *cache);
313319
void btrfs_unfreeze_block_group(struct btrfs_block_group *cache);
314320

321+
bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg);
322+
void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount);
323+
315324
#endif /* BTRFS_BLOCK_GROUP_H */

fs/btrfs/ctree.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,11 @@ struct btrfs_swapfile_pin {
524524
* points to a struct btrfs_device.
525525
*/
526526
bool is_block_group;
527+
/*
528+
* Only used when 'is_block_group' is true and it is the number of
529+
* extents used by a swapfile for this block group ('ptr' field).
530+
*/
531+
int bg_extent_count;
527532
};
528533

529534
bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);

fs/btrfs/inode.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10192,6 +10192,7 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
1019210192
sp->ptr = ptr;
1019310193
sp->inode = inode;
1019410194
sp->is_block_group = is_block_group;
10195+
sp->bg_extent_count = 1;
1019510196

1019610197
spin_lock(&fs_info->swapfile_pins_lock);
1019710198
p = &fs_info->swapfile_pins.rb_node;
@@ -10205,6 +10206,8 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
1020510206
(sp->ptr == entry->ptr && sp->inode > entry->inode)) {
1020610207
p = &(*p)->rb_right;
1020710208
} else {
10209+
if (is_block_group)
10210+
entry->bg_extent_count++;
1020810211
spin_unlock(&fs_info->swapfile_pins_lock);
1020910212
kfree(sp);
1021010213
return 1;
@@ -10230,8 +10233,11 @@ static void btrfs_free_swapfile_pins(struct inode *inode)
1023010233
sp = rb_entry(node, struct btrfs_swapfile_pin, node);
1023110234
if (sp->inode == inode) {
1023210235
rb_erase(&sp->node, &fs_info->swapfile_pins);
10233-
if (sp->is_block_group)
10236+
if (sp->is_block_group) {
10237+
btrfs_dec_block_group_swap_extents(sp->ptr,
10238+
sp->bg_extent_count);
1023410239
btrfs_put_block_group(sp->ptr);
10240+
}
1023510241
kfree(sp);
1023610242
}
1023710243
node = next;
@@ -10446,6 +10452,17 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
1044610452
goto out;
1044710453
}
1044810454

10455+
if (!btrfs_inc_block_group_swap_extents(bg)) {
10456+
btrfs_warn(fs_info,
10457+
"block group for swapfile at %llu is read-only%s",
10458+
bg->start,
10459+
atomic_read(&fs_info->scrubs_running) ?
10460+
" (scrub running)" : "");
10461+
btrfs_put_block_group(bg);
10462+
ret = -EINVAL;
10463+
goto out;
10464+
}
10465+
1044910466
ret = btrfs_add_swapfile_pin(inode, bg, true);
1045010467
if (ret) {
1045110468
btrfs_put_block_group(bg);

fs/btrfs/scrub.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3767,6 +3767,13 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
37673767
* commit_transactions.
37683768
*/
37693769
ro_set = 0;
3770+
} else if (ret == -ETXTBSY) {
3771+
btrfs_warn(fs_info,
3772+
"skipping scrub of block group %llu due to active swapfile",
3773+
cache->start);
3774+
scrub_pause_off(fs_info);
3775+
ret = 0;
3776+
goto skip_unfreeze;
37703777
} else {
37713778
btrfs_warn(fs_info,
37723779
"failed setting block group ro: %d", ret);
@@ -3862,7 +3869,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
38623869
} else {
38633870
spin_unlock(&cache->lock);
38643871
}
3865-
3872+
skip_unfreeze:
38663873
btrfs_unfreeze_block_group(cache);
38673874
btrfs_put_block_group(cache);
38683875
if (ret)

0 commit comments

Comments
 (0)