Commit 2187f215 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-5.5-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "A mix of regression fixes and regular fixes for stable trees:

   - fix swapped error messages for qgroup enable/rescan

   - fixes for NO_HOLES feature with clone range

   - fix deadlock between iget/srcu lock/synchronize srcu while freeing
     an inode

   - fix double lock on subvolume cross-rename

   - tree log fixes
      * fix missing data checksums after replaying a log tree
      * also teach tree-checker about this problem
      * skip log replay on orphaned roots

   - fix maximum devices constraints for RAID1C -3 and -4

   - send: don't print warning on read-only mount regarding orphan
     cleanup

   - error handling fixes"

* tag 'for-5.5-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: send: remove WARN_ON for readonly mount
  btrfs: do not leak reloc root if we fail to read the fs root
  btrfs: skip log replay on orphaned roots
  btrfs: handle ENOENT in btrfs_uuid_tree_iterate
  btrfs: abort transaction after failed inode updates in create_subvol
  Btrfs: fix hole extent items with a zero size after range cloning
  Btrfs: fix removal logic of the tree mod log that leads to use-after-free issues
  Btrfs: make tree checker detect checksum items with overlapping ranges
  Btrfs: fix missing data checksums after replaying a log tree
  btrfs: return error pointer from alloc_test_extent_buffer
  btrfs: fix devs_max constraints for raid1c3 and raid1c4
  btrfs: tree-checker: Fix error format string for size_t
  btrfs: don't double lock the subvol_sem for rename exchange
  btrfs: handle error in btrfs_cache_block_group
  btrfs: do not call synchronize_srcu() in inode_tree_del
  Btrfs: fix cloning range with a hole when using the NO_HOLES feature
  btrfs: Fix error messages in qgroup_rescan_init
parents 2d3145f8 fbd54297
......@@ -379,7 +379,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
for (node = rb_first(tm_root); node; node = next) {
next = rb_next(node);
tm = rb_entry(node, struct tree_mod_elem, node);
if (tm->seq > min_seq)
if (tm->seq >= min_seq)
continue;
rb_erase(node, tm_root);
kfree(tm);
......
......@@ -2787,7 +2787,7 @@ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
/* file-item.c */
struct btrfs_dio_private;
int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
struct btrfs_root *root, u64 bytenr, u64 len);
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
u8 *dst);
blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
......
......@@ -1869,8 +1869,8 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
btrfs_pin_extent(fs_info, head->bytenr,
head->num_bytes, 1);
if (head->is_data) {
ret = btrfs_del_csums(trans, fs_info, head->bytenr,
head->num_bytes);
ret = btrfs_del_csums(trans, fs_info->csum_root,
head->bytenr, head->num_bytes);
}
}
......@@ -3175,7 +3175,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
if (is_data) {
ret = btrfs_del_csums(trans, info, bytenr, num_bytes);
ret = btrfs_del_csums(trans, info->csum_root, bytenr,
num_bytes);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
......@@ -3799,6 +3800,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
u64 flags, int delalloc)
{
int ret = 0;
int cache_block_group_error = 0;
struct btrfs_free_cluster *last_ptr = NULL;
struct btrfs_block_group *block_group = NULL;
struct find_free_extent_ctl ffe_ctl = {0};
......@@ -3958,7 +3960,20 @@ have_block_group:
if (unlikely(!ffe_ctl.cached)) {
ffe_ctl.have_caching_bg = true;
ret = btrfs_cache_block_group(block_group, 0);
BUG_ON(ret < 0);
/*
* If we get ENOMEM here or something else we want to
* try other block groups, because it may not be fatal.
* However if we can't find anything else we need to
* save our return here so that we return the actual
* error that caused problems, not ENOSPC.
*/
if (ret < 0) {
if (!cache_block_group_error)
cache_block_group_error = ret;
ret = 0;
goto loop;
}
ret = 0;
}
......@@ -4045,7 +4060,7 @@ loop:
if (ret > 0)
goto search;
if (ret == -ENOSPC) {
if (ret == -ENOSPC && !cache_block_group_error) {
/*
* Use ffe_ctl->total_free_space as fallback if we can't find
* any contiguous hole.
......@@ -4056,6 +4071,8 @@ loop:
space_info->max_extent_size = ffe_ctl.max_extent_size;
spin_unlock(&space_info->lock);
ins->offset = ffe_ctl.max_extent_size;
} else if (ret == -ENOSPC) {
ret = cache_block_group_error;
}
return ret;
}
......
......@@ -5074,12 +5074,14 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
return eb;
eb = alloc_dummy_extent_buffer(fs_info, start);
if (!eb)
return NULL;
return ERR_PTR(-ENOMEM);
eb->fs_info = fs_info;
again:
ret = radix_tree_preload(GFP_NOFS);
if (ret)
if (ret) {
exists = ERR_PTR(ret);
goto free_eb;
}
spin_lock(&fs_info->buffer_lock);
ret = radix_tree_insert(&fs_info->buffer_radix,
start >> PAGE_SHIFT, eb);
......
......@@ -590,9 +590,9 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
* range of bytes.
*/
int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr, u64 len)
struct btrfs_root *root, u64 bytenr, u64 len)
{
struct btrfs_root *root = fs_info->csum_root;
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_path *path;
struct btrfs_key key;
u64 end_byte = bytenr + len;
......@@ -602,6 +602,9 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
int blocksize_bits = fs_info->sb->s_blocksize_bits;
ASSERT(root == fs_info->csum_root ||
root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
......
......@@ -2599,8 +2599,8 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
}
}
if (clone_info) {
u64 clone_len = drop_end - cur_offset;
if (clone_info && drop_end > clone_info->file_offset) {
u64 clone_len = drop_end - clone_info->file_offset;
ret = btrfs_insert_clone_extent(trans, inode, path,
clone_info, clone_len);
......
......@@ -5728,7 +5728,6 @@ static void inode_tree_add(struct inode *inode)
static void inode_tree_del(struct inode *inode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
int empty = 0;
......@@ -5741,7 +5740,6 @@ static void inode_tree_del(struct inode *inode)
spin_unlock(&root->inode_lock);
if (empty && btrfs_root_refs(&root->root_item) == 0) {
synchronize_srcu(&fs_info->subvol_srcu);
spin_lock(&root->inode_lock);
empty = RB_EMPTY_ROOT(&root->inode_tree);
spin_unlock(&root->inode_lock);
......@@ -9556,9 +9554,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
btrfs_init_log_ctx(&ctx_dest, new_inode);
/* close the race window with snapshot create/destroy ioctl */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
down_read(&fs_info->subvol_sem);
if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
if (old_ino == BTRFS_FIRST_FREE_OBJECTID ||
new_ino == BTRFS_FIRST_FREE_OBJECTID)
down_read(&fs_info->subvol_sem);
/*
......@@ -9792,9 +9789,8 @@ out_fail:
ret = ret ? ret : ret2;
}
out_notrans:
if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&fs_info->subvol_sem);
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
if (new_ino == BTRFS_FIRST_FREE_OBJECTID ||
old_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&fs_info->subvol_sem);
ASSERT(list_empty(&ctx_root.list));
......
......@@ -704,11 +704,17 @@ static noinline int create_subvol(struct inode *dir,
btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
ret = btrfs_update_inode(trans, root, dir);
BUG_ON(ret);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid,
btrfs_ino(BTRFS_I(dir)), index, name, namelen);
BUG_ON(ret);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
ret = btrfs_uuid_tree_add(trans, root_item->uuid,
BTRFS_UUID_KEY_SUBVOL, objectid);
......@@ -3720,24 +3726,18 @@ process_slot:
ret = 0;
if (last_dest_end < destoff + len) {
struct btrfs_clone_extent_info clone_info = { 0 };
/*
* We have an implicit hole (NO_HOLES feature is enabled) that
* fully or partially overlaps our cloning range at its end.
* We have an implicit hole that fully or partially overlaps our
* cloning range at its end. This means that we either have the
* NO_HOLES feature enabled or the implicit hole happened due to
* mixing buffered and direct IO writes against this file.
*/
btrfs_release_path(path);
path->leave_spinning = 0;
/*
* We are dealing with a hole and our clone_info already has a
* disk_offset of 0, we only need to fill the data length and
* file offset.
*/
clone_info.data_len = destoff + len - last_dest_end;
clone_info.file_offset = last_dest_end;
ret = btrfs_punch_hole_range(inode, path,
last_dest_end, destoff + len - 1,
&clone_info, &trans);
NULL, &trans);
if (ret)
goto out;
......
......@@ -3232,12 +3232,12 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
if (!(fs_info->qgroup_flags &
BTRFS_QGROUP_STATUS_FLAG_RESCAN)) {
btrfs_warn(fs_info,
"qgroup rescan init failed, qgroup is not enabled");
"qgroup rescan init failed, qgroup rescan is not queued");
ret = -EINVAL;
} else if (!(fs_info->qgroup_flags &
BTRFS_QGROUP_STATUS_FLAG_ON)) {
btrfs_warn(fs_info,
"qgroup rescan init failed, qgroup rescan is not queued");
"qgroup rescan init failed, qgroup is not enabled");
ret = -EINVAL;
}
......
......@@ -4552,6 +4552,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
fs_root = read_fs_root(fs_info, reloc_root->root_key.offset);
if (IS_ERR(fs_root)) {
err = PTR_ERR(fs_root);
list_add_tail(&reloc_root->root_list, &reloc_roots);
goto out_free;
}
......
......@@ -7083,12 +7083,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
send_root->send_in_progress++;
spin_unlock(&send_root->root_item_lock);
/*
* This is done when we lookup the root, it should already be complete
* by the time we get here.
*/
WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
/*
* Userspace tools do the checks and warn the user if it's
* not RO.
......
......@@ -452,9 +452,9 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
root->fs_info->tree_root = root;
root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
if (!root->node) {
if (IS_ERR(root->node)) {
test_std_err(TEST_ALLOC_EXTENT_BUFFER);
ret = -ENOMEM;
ret = PTR_ERR(root->node);
goto out;
}
btrfs_set_header_level(root->node, 0);
......
......@@ -484,9 +484,9 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
* *cough*backref walking code*cough*
*/
root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
if (!root->node) {
if (IS_ERR(root->node)) {
test_err("couldn't allocate dummy buffer");
ret = -ENOMEM;
ret = PTR_ERR(root->node);
goto out;
}
btrfs_set_header_level(root->node, 0);
......
......@@ -227,7 +227,7 @@ static int check_extent_data_item(struct extent_buffer *leaf,
*/
if (item_size < BTRFS_FILE_EXTENT_INLINE_DATA_START) {
file_extent_err(leaf, slot,
"invalid item size, have %u expect [%lu, %u)",
"invalid item size, have %u expect [%zu, %u)",
item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START,
SZ_4K);
return -EUCLEAN;
......@@ -332,7 +332,7 @@ static int check_extent_data_item(struct extent_buffer *leaf,
}
static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
int slot)
int slot, struct btrfs_key *prev_key)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
u32 sectorsize = fs_info->sectorsize;
......@@ -356,6 +356,20 @@ static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
btrfs_item_size_nr(leaf, slot), csumsize);
return -EUCLEAN;
}
if (slot > 0 && prev_key->type == BTRFS_EXTENT_CSUM_KEY) {
u64 prev_csum_end;
u32 prev_item_size;
prev_item_size = btrfs_item_size_nr(leaf, slot - 1);
prev_csum_end = (prev_item_size / csumsize) * sectorsize;
prev_csum_end += prev_key->offset;
if (prev_csum_end > key->offset) {
generic_err(leaf, slot - 1,
"csum end range (%llu) goes beyond the start range (%llu) of the next csum item",
prev_csum_end, key->offset);
return -EUCLEAN;
}
}
return 0;
}
......@@ -1355,7 +1369,7 @@ static int check_leaf_item(struct extent_buffer *leaf,
ret = check_extent_data_item(leaf, key, slot, prev_key);
break;
case BTRFS_EXTENT_CSUM_KEY:
ret = check_csum_item(leaf, key, slot);
ret = check_csum_item(leaf, key, slot, prev_key);
break;
case BTRFS_DIR_ITEM_KEY:
case BTRFS_DIR_INDEX_KEY:
......
......@@ -808,7 +808,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
ret = btrfs_del_csums(trans, fs_info,
ret = btrfs_del_csums(trans,
fs_info->csum_root,
sums->bytenr,
sums->len);
if (!ret)
......@@ -3909,6 +3910,28 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
return 0;
}
static int log_csums(struct btrfs_trans_handle *trans,
struct btrfs_root *log_root,
struct btrfs_ordered_sum *sums)
{
int ret;
/*
* Due to extent cloning, we might have logged a csum item that covers a
* subrange of a cloned extent, and later we can end up logging a csum
* item for a larger subrange of the same extent or the entire range.
* This would leave csum items in the log tree that cover the same range
* and break the searches for checksums in the log tree, resulting in
* some checksums missing in the fs/subvolume tree. So just delete (or
* trim and adjust) any existing csum items in the log for this range.
*/
ret = btrfs_del_csums(trans, log_root, sums->bytenr, sums->len);
if (ret)
return ret;
return btrfs_csum_file_blocks(trans, log_root, sums);
}
static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_path *dst_path,
......@@ -4054,7 +4077,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
ret = btrfs_csum_file_blocks(trans, log, sums);
ret = log_csums(trans, log, sums);
list_del(&sums->list);
kfree(sums);
}
......@@ -4274,7 +4297,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
ret = btrfs_csum_file_blocks(trans, log_root, sums);
ret = log_csums(trans, log_root, sums);
list_del(&sums->list);
kfree(sums);
}
......@@ -6294,9 +6317,28 @@ again:
wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
if (IS_ERR(wc.replay_dest)) {
ret = PTR_ERR(wc.replay_dest);
/*
* We didn't find the subvol, likely because it was
* deleted. This is ok, simply skip this log and go to
* the next one.
*
* We need to exclude the root because we can't have
* other log replays overwriting this log as we'll read
* it back in a few more times. This will keep our
* block from being modified, and we'll just bail for
* each subsequent pass.
*/
if (ret == -ENOENT)
ret = btrfs_pin_extent_for_log_replay(fs_info,
log->node->start,
log->node->len);
free_extent_buffer(log->node);
free_extent_buffer(log->commit_root);
kfree(log);
if (!ret)
goto next;
btrfs_handle_fs_error(fs_info, ret,
"Couldn't read target root for tree log recovery.");
goto error;
......@@ -6328,7 +6370,6 @@ again:
&root->highest_objectid);
}
key.offset = found_key.offset - 1;
wc.replay_dest->log_root = NULL;
free_extent_buffer(log->node);
free_extent_buffer(log->commit_root);
......@@ -6336,9 +6377,10 @@ again:
if (ret)
goto error;
next:
if (found_key.offset == 0)
break;
key.offset = found_key.offset - 1;
}
btrfs_release_path(path);
......
......@@ -324,6 +324,8 @@ again_search_slot:
}
if (ret < 0 && ret != -ENOENT)
goto out;
key.offset++;
goto again_search_slot;
}
item_size -= sizeof(subid_le);
offset += sizeof(subid_le);
......
......@@ -61,7 +61,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID1C3] = {
.sub_stripes = 1,
.dev_stripes = 1,
.devs_max = 0,
.devs_max = 3,
.devs_min = 3,
.tolerated_failures = 2,
.devs_increment = 3,
......@@ -73,7 +73,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID1C4] = {
.sub_stripes = 1,
.dev_stripes = 1,
.devs_max = 0,
.devs_max = 4,
.devs_min = 4,
.tolerated_failures = 3,
.devs_increment = 4,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment