ioctl.c 137 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
Christoph Hellwig's avatar
Christoph Hellwig committed
2
3
4
5
6
7
8
9
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 */

#include <linux/kernel.h>
#include <linux/bio.h>
#include <linux/file.h>
#include <linux/fs.h>
10
#include <linux/fsnotify.h>
Christoph Hellwig's avatar
Christoph Hellwig committed
11
12
13
14
15
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
16
17
#include <linux/mount.h>
#include <linux/namei.h>
Christoph Hellwig's avatar
Christoph Hellwig committed
18
19
#include <linux/writeback.h>
#include <linux/compat.h>
20
#include <linux/security.h>
Christoph Hellwig's avatar
Christoph Hellwig committed
21
#include <linux/xattr.h>
22
#include <linux/mm.h>
23
#include <linux/slab.h>
24
#include <linux/blkdev.h>
25
#include <linux/uuid.h>
26
#include <linux/btrfs.h>
Mark Fasheh's avatar
Mark Fasheh committed
27
#include <linux/uaccess.h>
28
#include <linux/iversion.h>
Christoph Hellwig's avatar
Christoph Hellwig committed
29
30
31
32
33
34
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "print-tree.h"
#include "volumes.h"
35
#include "locking.h"
36
#include "inode-map.h"
37
#include "backref.h"
38
#include "rcu-string.h"
39
#include "send.h"
40
#include "dev-replace.h"
41
#include "props.h"
42
#include "sysfs.h"
Josef Bacik's avatar
Josef Bacik committed
43
#include "qgroup.h"
44
#include "tree-log.h"
45
#include "compression.h"
46
#include "space-info.h"
47
#include "delalloc-space.h"
48
#include "block-group.h"
Christoph Hellwig's avatar
Christoph Hellwig committed
49

50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#ifdef CONFIG_64BIT
/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
 * structures are incorrect, as the timespec structure from userspace
 * is 4 bytes too small. We define these alternatives here to teach
 * the kernel about the 32-bit struct packing.
 */
struct btrfs_ioctl_timespec_32 {
	__u64 sec;
	__u32 nsec;
} __attribute__ ((__packed__));

struct btrfs_ioctl_received_subvol_args_32 {
	char	uuid[BTRFS_UUID_SIZE];	/* in */
	__u64	stransid;		/* in */
	__u64	rtransid;		/* out */
	struct btrfs_ioctl_timespec_32 stime; /* in */
	struct btrfs_ioctl_timespec_32 rtime; /* out */
	__u64	flags;			/* in */
	__u64	reserved[16];		/* in */
} __attribute__ ((__packed__));

#define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \
				struct btrfs_ioctl_received_subvol_args_32)
#endif

75
76
77
78
79
80
81
82
83
84
85
86
87
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_send_args_32 {
	__s64 send_fd;			/* in */
	__u64 clone_sources_count;	/* in */
	compat_uptr_t clone_sources;	/* in */
	__u64 parent_root;		/* in */
	__u64 flags;			/* in */
	__u64 reserved[4];		/* in */
} __attribute__ ((__packed__));

#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
			       struct btrfs_ioctl_send_args_32)
#endif
88

Mark Fasheh's avatar
Mark Fasheh committed
89
static int btrfs_clone(struct inode *src, struct inode *inode,
90
91
		       u64 off, u64 olen, u64 olen_aligned, u64 destoff,
		       int no_time_update);
Mark Fasheh's avatar
Mark Fasheh committed
92

93
/* Mask out flags that are inappropriate for the given type of inode. */
94
95
static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
		unsigned int flags)
96
{
97
	if (S_ISDIR(inode->i_mode))
98
		return flags;
99
	else if (S_ISREG(inode->i_mode))
100
101
102
103
104
105
		return flags & ~FS_DIRSYNC_FL;
	else
		return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
}

/*
106
107
 * Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
 * ioctl.
108
 */
109
static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags)
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
{
	unsigned int iflags = 0;

	if (flags & BTRFS_INODE_SYNC)
		iflags |= FS_SYNC_FL;
	if (flags & BTRFS_INODE_IMMUTABLE)
		iflags |= FS_IMMUTABLE_FL;
	if (flags & BTRFS_INODE_APPEND)
		iflags |= FS_APPEND_FL;
	if (flags & BTRFS_INODE_NODUMP)
		iflags |= FS_NODUMP_FL;
	if (flags & BTRFS_INODE_NOATIME)
		iflags |= FS_NOATIME_FL;
	if (flags & BTRFS_INODE_DIRSYNC)
		iflags |= FS_DIRSYNC_FL;
Li Zefan's avatar
Li Zefan committed
125
126
127
	if (flags & BTRFS_INODE_NODATACOW)
		iflags |= FS_NOCOW_FL;

128
	if (flags & BTRFS_INODE_NOCOMPRESS)
Li Zefan's avatar
Li Zefan committed
129
		iflags |= FS_NOCOMP_FL;
130
131
	else if (flags & BTRFS_INODE_COMPRESS)
		iflags |= FS_COMPR_FL;
132
133
134
135
136
137
138

	return iflags;
}

/*
 * Update inode->i_flags based on the btrfs internal flags.
 */
139
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
140
{
141
	struct btrfs_inode *binode = BTRFS_I(inode);
142
	unsigned int new_fl = 0;
143

144
	if (binode->flags & BTRFS_INODE_SYNC)
145
		new_fl |= S_SYNC;
146
	if (binode->flags & BTRFS_INODE_IMMUTABLE)
147
		new_fl |= S_IMMUTABLE;
148
	if (binode->flags & BTRFS_INODE_APPEND)
149
		new_fl |= S_APPEND;
150
	if (binode->flags & BTRFS_INODE_NOATIME)
151
		new_fl |= S_NOATIME;
152
	if (binode->flags & BTRFS_INODE_DIRSYNC)
153
154
155
156
157
		new_fl |= S_DIRSYNC;

	set_mask_bits(&inode->i_flags,
		      S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
		      new_fl);
158
159
160
161
}

static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
{
162
163
	struct btrfs_inode *binode = BTRFS_I(file_inode(file));
	unsigned int flags = btrfs_inode_flags_to_fsflags(binode->flags);
164
165
166
167
168
169

	if (copy_to_user(arg, &flags, sizeof(flags)))
		return -EFAULT;
	return 0;
}

170
171
/* Check if @flags are a supported and valid set of FS_*_FL flags */
static int check_fsflags(unsigned int flags)
172
173
174
175
{
	if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
		      FS_NOATIME_FL | FS_NODUMP_FL | \
		      FS_SYNC_FL | FS_DIRSYNC_FL | \
Li Zefan's avatar
Li Zefan committed
176
177
		      FS_NOCOMP_FL | FS_COMPR_FL |
		      FS_NOCOW_FL))
178
179
180
181
182
183
184
185
		return -EOPNOTSUPP;

	if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
		return -EINVAL;

	return 0;
}

186
187
static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
{
Al Viro's avatar
Al Viro committed
188
	struct inode *inode = file_inode(file);
189
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
190
191
	struct btrfs_inode *binode = BTRFS_I(inode);
	struct btrfs_root *root = binode->root;
192
	struct btrfs_trans_handle *trans;
193
	unsigned int fsflags, old_fsflags;
194
	int ret;
195
	const char *comp = NULL;
196
	u32 binode_flags = binode->flags;
197

198
199
200
	if (!inode_owner_or_capable(inode))
		return -EPERM;

201
202
203
	if (btrfs_root_readonly(root))
		return -EROFS;

204
	if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
205
206
		return -EFAULT;

207
	ret = check_fsflags(fsflags);
208
209
	if (ret)
		return ret;
Christoph Hellwig's avatar
Christoph Hellwig committed
210

211
212
213
214
	ret = mnt_want_write_file(file);
	if (ret)
		return ret;

Al Viro's avatar
Al Viro committed
215
	inode_lock(inode);
216

217
	fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
218
219
220
221
	old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
	ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
	if (ret)
		goto out_unlock;
222

223
	if (fsflags & FS_SYNC_FL)
224
		binode_flags |= BTRFS_INODE_SYNC;
225
	else
226
		binode_flags &= ~BTRFS_INODE_SYNC;
227
	if (fsflags & FS_IMMUTABLE_FL)
228
		binode_flags |= BTRFS_INODE_IMMUTABLE;
229
	else
230
		binode_flags &= ~BTRFS_INODE_IMMUTABLE;
231
	if (fsflags & FS_APPEND_FL)
232
		binode_flags |= BTRFS_INODE_APPEND;
233
	else
234
		binode_flags &= ~BTRFS_INODE_APPEND;
235
	if (fsflags & FS_NODUMP_FL)
236
		binode_flags |= BTRFS_INODE_NODUMP;
237
	else
238
		binode_flags &= ~BTRFS_INODE_NODUMP;
239
	if (fsflags & FS_NOATIME_FL)
240
		binode_flags |= BTRFS_INODE_NOATIME;
241
	else
242
		binode_flags &= ~BTRFS_INODE_NOATIME;
243
	if (fsflags & FS_DIRSYNC_FL)
244
		binode_flags |= BTRFS_INODE_DIRSYNC;
245
	else
246
		binode_flags &= ~BTRFS_INODE_DIRSYNC;
247
	if (fsflags & FS_NOCOW_FL) {
248
		if (S_ISREG(inode->i_mode)) {
249
250
251
252
253
254
			/*
			 * It's safe to turn csums off here, no extents exist.
			 * Otherwise we want the flag to reflect the real COW
			 * status of the file and will not set it.
			 */
			if (inode->i_size == 0)
255
256
				binode_flags |= BTRFS_INODE_NODATACOW |
						BTRFS_INODE_NODATASUM;
257
		} else {
258
			binode_flags |= BTRFS_INODE_NODATACOW;
259
260
261
		}
	} else {
		/*
262
		 * Revert back under same assumptions as above
263
		 */
264
		if (S_ISREG(inode->i_mode)) {
265
			if (inode->i_size == 0)
266
267
				binode_flags &= ~(BTRFS_INODE_NODATACOW |
						  BTRFS_INODE_NODATASUM);
268
		} else {
269
			binode_flags &= ~BTRFS_INODE_NODATACOW;
270
271
		}
	}
272

273
274
275
276
277
	/*
	 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
	 * flag may be changed automatically if compression code won't make
	 * things smaller.
	 */
278
	if (fsflags & FS_NOCOMP_FL) {
279
280
		binode_flags &= ~BTRFS_INODE_COMPRESS;
		binode_flags |= BTRFS_INODE_NOCOMPRESS;
281
	} else if (fsflags & FS_COMPR_FL) {
282

283
284
285
286
287
		if (IS_SWAPFILE(inode)) {
			ret = -ETXTBSY;
			goto out_unlock;
		}

288
289
		binode_flags |= BTRFS_INODE_COMPRESS;
		binode_flags &= ~BTRFS_INODE_NOCOMPRESS;
290

291
292
293
		comp = btrfs_compress_type2str(fs_info->compress_type);
		if (!comp || comp[0] == 0)
			comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB);
Li Zefan's avatar
Li Zefan committed
294
	} else {
295
		binode_flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
296
	}
297

298
299
300
301
302
	/*
	 * 1 for inode item
	 * 2 for properties
	 */
	trans = btrfs_start_transaction(root, 3);
303
304
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
305
		goto out_unlock;
306
	}
307

308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
	if (comp) {
		ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp,
				     strlen(comp), 0);
		if (ret) {
			btrfs_abort_transaction(trans, ret);
			goto out_end_trans;
		}
	} else {
		ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL,
				     0, 0);
		if (ret && ret != -ENODATA) {
			btrfs_abort_transaction(trans, ret);
			goto out_end_trans;
		}
	}

324
	binode->flags = binode_flags;
325
	btrfs_sync_inode_flags_to_i_flags(inode);
326
	inode_inc_iversion(inode);
327
	inode->i_ctime = current_time(inode);
328
329
	ret = btrfs_update_inode(trans, root, inode);

330
 out_end_trans:
331
	btrfs_end_transaction(trans);
332
 out_unlock:
Al Viro's avatar
Al Viro committed
333
	inode_unlock(inode);
334
	mnt_drop_write_file(file);
335
	return ret;
336
337
}

338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
/*
 * Translate btrfs internal inode flags to xflags as expected by the
 * FS_IOC_FSGETXATT ioctl. Filter only the supported ones, unknown flags are
 * silently dropped.
 */
static unsigned int btrfs_inode_flags_to_xflags(unsigned int flags)
{
	unsigned int xflags = 0;

	if (flags & BTRFS_INODE_APPEND)
		xflags |= FS_XFLAG_APPEND;
	if (flags & BTRFS_INODE_IMMUTABLE)
		xflags |= FS_XFLAG_IMMUTABLE;
	if (flags & BTRFS_INODE_NOATIME)
		xflags |= FS_XFLAG_NOATIME;
	if (flags & BTRFS_INODE_NODUMP)
		xflags |= FS_XFLAG_NODUMP;
	if (flags & BTRFS_INODE_SYNC)
		xflags |= FS_XFLAG_SYNC;

	return xflags;
}

/* Check if @flags are a supported and valid set of FS_XFLAGS_* flags */
static int check_xflags(unsigned int flags)
{
	if (flags & ~(FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE | FS_XFLAG_NOATIME |
		      FS_XFLAG_NODUMP | FS_XFLAG_SYNC))
		return -EOPNOTSUPP;
	return 0;
}

370
371
372
373
374
375
376
377
378
/*
 * Set the xflags from the internal inode flags. The remaining items of fsxattr
 * are zeroed.
 */
static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg)
{
	struct btrfs_inode *binode = BTRFS_I(file_inode(file));
	struct fsxattr fa;

379
	simple_fill_fsxattr(&fa, btrfs_inode_flags_to_xflags(binode->flags));
380
381
382
383
384
385
	if (copy_to_user(arg, &fa, sizeof(fa)))
		return -EFAULT;

	return 0;
}

386
387
388
389
390
391
static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
{
	struct inode *inode = file_inode(file);
	struct btrfs_inode *binode = BTRFS_I(inode);
	struct btrfs_root *root = binode->root;
	struct btrfs_trans_handle *trans;
392
	struct fsxattr fa, old_fa;
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
	unsigned old_flags;
	unsigned old_i_flags;
	int ret = 0;

	if (!inode_owner_or_capable(inode))
		return -EPERM;

	if (btrfs_root_readonly(root))
		return -EROFS;

	if (copy_from_user(&fa, arg, sizeof(fa)))
		return -EFAULT;

	ret = check_xflags(fa.fsx_xflags);
	if (ret)
		return ret;

	if (fa.fsx_extsize != 0 || fa.fsx_projid != 0 || fa.fsx_cowextsize != 0)
		return -EOPNOTSUPP;

	ret = mnt_want_write_file(file);
	if (ret)
		return ret;

	inode_lock(inode);

	old_flags = binode->flags;
	old_i_flags = inode->i_flags;

422
423
424
425
	simple_fill_fsxattr(&old_fa,
			    btrfs_inode_flags_to_xflags(binode->flags));
	ret = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa);
	if (ret)
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
		goto out_unlock;

	if (fa.fsx_xflags & FS_XFLAG_SYNC)
		binode->flags |= BTRFS_INODE_SYNC;
	else
		binode->flags &= ~BTRFS_INODE_SYNC;
	if (fa.fsx_xflags & FS_XFLAG_IMMUTABLE)
		binode->flags |= BTRFS_INODE_IMMUTABLE;
	else
		binode->flags &= ~BTRFS_INODE_IMMUTABLE;
	if (fa.fsx_xflags & FS_XFLAG_APPEND)
		binode->flags |= BTRFS_INODE_APPEND;
	else
		binode->flags &= ~BTRFS_INODE_APPEND;
	if (fa.fsx_xflags & FS_XFLAG_NODUMP)
		binode->flags |= BTRFS_INODE_NODUMP;
	else
		binode->flags &= ~BTRFS_INODE_NODUMP;
	if (fa.fsx_xflags & FS_XFLAG_NOATIME)
		binode->flags |= BTRFS_INODE_NOATIME;
	else
		binode->flags &= ~BTRFS_INODE_NOATIME;

	/* 1 item for the inode */
	trans = btrfs_start_transaction(root, 1);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto out_unlock;
	}

	btrfs_sync_inode_flags_to_i_flags(inode);
	inode_inc_iversion(inode);
	inode->i_ctime = current_time(inode);
	ret = btrfs_update_inode(trans, root, inode);

	btrfs_end_transaction(trans);

out_unlock:
	if (ret) {
		binode->flags = old_flags;
		inode->i_flags = old_i_flags;
	}

	inode_unlock(inode);
	mnt_drop_write_file(file);

	return ret;
}

475
476
static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
{
Al Viro's avatar
Al Viro committed
477
	struct inode *inode = file_inode(file);
478
479
480

	return put_user(inode->i_generation, arg);
}
Christoph Hellwig's avatar
Christoph Hellwig committed
481

482
483
static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
					void __user *arg)
484
485
486
487
488
489
490
491
492
493
494
{
	struct btrfs_device *device;
	struct request_queue *q;
	struct fstrim_range range;
	u64 minlen = ULLONG_MAX;
	u64 num_devices = 0;
	int ret;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

495
496
497
498
499
500
501
502
503
504
	/*
	 * If the fs is mounted with nologreplay, which requires it to be
	 * mounted in RO mode as well, we can not allow discard on free space
	 * inside block groups, because log trees refer to extents that are not
	 * pinned in a block group's free space cache (pinning the extents is
	 * precisely the first phase of replaying a log tree).
	 */
	if (btrfs_test_opt(fs_info, NOLOGREPLAY))
		return -EROFS;

505
506
507
	rcu_read_lock();
	list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
				dev_list) {
508
509
510
511
512
		if (!device->bdev)
			continue;
		q = bdev_get_queue(device->bdev);
		if (blk_queue_discard(q)) {
			num_devices++;
513
			minlen = min_t(u64, q->limits.discard_granularity,
514
515
516
				     minlen);
		}
	}
517
	rcu_read_unlock();
518

519
520
521
522
	if (!num_devices)
		return -EOPNOTSUPP;
	if (copy_from_user(&range, arg, sizeof(range)))
		return -EFAULT;
523
524
525
526
527
528
529

	/*
	 * NOTE: Don't truncate the range using super->total_bytes.  Bytenr of
	 * block group is in the logical address space, which can be any
	 * sectorsize aligned bytenr in  the range [0, U64_MAX].
	 */
	if (range.len < fs_info->sb->s_blocksize)
530
		return -EINVAL;
531
532

	range.minlen = max(range.minlen, minlen);
533
	ret = btrfs_trim_fs(fs_info, &range);
534
535
536
537
538
539
540
541
542
	if (ret < 0)
		return ret;

	if (copy_to_user(arg, &range, sizeof(range)))
		return -EFAULT;

	return 0;
}

543
int __pure btrfs_is_empty_uuid(u8 *uuid)
544
{
545
546
547
548
549
550
551
	int i;

	for (i = 0; i < BTRFS_UUID_SIZE; i++) {
		if (uuid[i])
			return 0;
	}
	return 1;
552
553
}

554
static noinline int create_subvol(struct inode *dir,
555
				  struct dentry *dentry,
556
				  const char *name, int namelen,
Arne Jansen's avatar
Arne Jansen committed
557
				  u64 *async_transid,
558
				  struct btrfs_qgroup_inherit *inherit)
Christoph Hellwig's avatar
Christoph Hellwig committed
559
{
560
	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
Christoph Hellwig's avatar
Christoph Hellwig committed
561
562
	struct btrfs_trans_handle *trans;
	struct btrfs_key key;
563
	struct btrfs_root_item *root_item;
Christoph Hellwig's avatar
Christoph Hellwig committed
564
565
	struct btrfs_inode_item *inode_item;
	struct extent_buffer *leaf;
566
	struct btrfs_root *root = BTRFS_I(dir)->root;
567
	struct btrfs_root *new_root;
568
	struct btrfs_block_rsv block_rsv;
569
	struct timespec64 cur_time = current_time(dir);
570
	struct inode *inode;
Christoph Hellwig's avatar
Christoph Hellwig committed
571
572
573
574
	int ret;
	int err;
	u64 objectid;
	u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
575
	u64 index = 0;
576
	uuid_le new_uuid;
Christoph Hellwig's avatar
Christoph Hellwig committed
577

578
579
580
581
	root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
	if (!root_item)
		return -ENOMEM;

582
	ret = btrfs_find_free_objectid(fs_info->tree_root, &objectid);
583
	if (ret)
584
		goto fail_free;
585

586
587
	/*
	 * Don't create subvolume whose level is not zero. Or qgroup will be
588
	 * screwed up since it assumes subvolume qgroup's level to be 0.
589
	 */
590
591
592
593
	if (btrfs_qgroup_level(objectid)) {
		ret = -ENOSPC;
		goto fail_free;
	}
594

595
	btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
Josef Bacik's avatar
Josef Bacik committed
596
	/*
597
598
	 * The same as the snapshot creation, please see the comment
	 * of create_snapshot().
Josef Bacik's avatar
Josef Bacik committed
599
	 */
600
	ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 8, false);
601
	if (ret)
602
		goto fail_free;
603
604
605
606

	trans = btrfs_start_transaction(root, 0);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
607
		btrfs_subvolume_release_metadata(fs_info, &block_rsv);
608
		goto fail_free;
609
610
611
	}
	trans->block_rsv = &block_rsv;
	trans->bytes_reserved = block_rsv.size;
Christoph Hellwig's avatar
Christoph Hellwig committed
612

613
	ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit);
Arne Jansen's avatar
Arne Jansen committed
614
615
616
	if (ret)
		goto fail;

617
	leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0);
618
619
620
621
	if (IS_ERR(leaf)) {
		ret = PTR_ERR(leaf);
		goto fail;
	}
Christoph Hellwig's avatar
Christoph Hellwig committed
622
623
624

	btrfs_mark_buffer_dirty(leaf);

625
	inode_item = &root_item->inode;
626
627
628
	btrfs_set_stack_inode_generation(inode_item, 1);
	btrfs_set_stack_inode_size(inode_item, 3);
	btrfs_set_stack_inode_nlink(inode_item, 1);
629
	btrfs_set_stack_inode_nbytes(inode_item,
630
				     fs_info->nodesize);
631
	btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
Christoph Hellwig's avatar
Christoph Hellwig committed
632

633
634
	btrfs_set_root_flags(root_item, 0);
	btrfs_set_root_limit(root_item, 0);
635
	btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
636

637
638
639
640
641
642
	btrfs_set_root_bytenr(root_item, leaf->start);
	btrfs_set_root_generation(root_item, trans->transid);
	btrfs_set_root_level(root_item, 0);
	btrfs_set_root_refs(root_item, 1);
	btrfs_set_root_used(root_item, leaf->len);
	btrfs_set_root_last_snapshot(root_item, 0);
Christoph Hellwig's avatar
Christoph Hellwig committed
643

644
645
	btrfs_set_root_generation_v2(root_item,
			btrfs_root_generation(root_item));
646
	uuid_le_gen(&new_uuid);
647
648
649
650
651
652
	memcpy(root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
	btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec);
	btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec);
	root_item->ctime = root_item->otime;
	btrfs_set_root_ctransid(root_item, trans->transid);
	btrfs_set_root_otransid(root_item, trans->transid);
Christoph Hellwig's avatar
Christoph Hellwig committed
653

654
	btrfs_tree_unlock(leaf);
Christoph Hellwig's avatar
Christoph Hellwig committed
655
656
657
	free_extent_buffer(leaf);
	leaf = NULL;

658
	btrfs_set_root_dirid(root_item, new_dirid);
Christoph Hellwig's avatar
Christoph Hellwig committed
659
660

	key.objectid = objectid;
661
	key.offset = 0;
662
	key.type = BTRFS_ROOT_ITEM_KEY;
663
	ret = btrfs_insert_root(trans, fs_info->tree_root, &key,
664
				root_item);
Christoph Hellwig's avatar
Christoph Hellwig committed
665
666
667
	if (ret)
		goto fail;

668
	key.offset = (u64)-1;
669
	new_root = btrfs_read_fs_root_no_name(fs_info, &key);
670
671
	if (IS_ERR(new_root)) {
		ret = PTR_ERR(new_root);
672
		btrfs_abort_transaction(trans, ret);
673
674
		goto fail;
	}
675
676
677

	btrfs_record_root_in_trans(trans, new_root);

678
	ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid);
679
680
	if (ret) {
		/* We potentially lose an unused inode item here */
681
		btrfs_abort_transaction(trans, ret);
682
683
684
		goto fail;
	}

685
686
687
688
	mutex_lock(&new_root->objectid_mutex);
	new_root->highest_objectid = new_dirid;
	mutex_unlock(&new_root->objectid_mutex);

Christoph Hellwig's avatar
Christoph Hellwig committed
689
690
691
	/*
	 * insert the directory item
	 */
692
	ret = btrfs_set_inode_index(BTRFS_I(dir), &index);
693
	if (ret) {
694
		btrfs_abort_transaction(trans, ret);
695
696
		goto fail;
	}
697

698
	ret = btrfs_insert_dir_item(trans, name, namelen, BTRFS_I(dir), &key,
699
				    BTRFS_FT_DIR, index);
700
	if (ret) {
701
		btrfs_abort_transaction(trans, ret);
Christoph Hellwig's avatar
Christoph Hellwig committed
702
		goto fail;
703
	}
704

705
	btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
706
	ret = btrfs_update_inode(trans, root, dir);
707
708
709
710
	if (ret) {
		btrfs_abort_transaction(trans, ret);
		goto fail;
	}
711

712
	ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid,
713
				 btrfs_ino(BTRFS_I(dir)), index, name, namelen);
714
715
716
717
	if (ret) {
		btrfs_abort_transaction(trans, ret);
		goto fail;
	}
Christoph Hellwig's avatar
Christoph Hellwig committed
718

719
	ret = btrfs_uuid_tree_add(trans, root_item->uuid,
720
				  BTRFS_UUID_KEY_SUBVOL, objectid);
721
	if (ret)
722
		btrfs_abort_transaction(trans, ret);
723

Christoph Hellwig's avatar
Christoph Hellwig committed
724
fail:
725
	kfree(root_item);
726
727
	trans->block_rsv = NULL;
	trans->bytes_reserved = 0;
728
	btrfs_subvolume_release_metadata(fs_info, &block_rsv);
729

Sage Weil's avatar
Sage Weil committed
730
731
	if (async_transid) {
		*async_transid = trans->transid;
732
		err = btrfs_commit_transaction_async(trans, 1);
733
		if (err)
734
			err = btrfs_commit_transaction(trans);
Sage Weil's avatar
Sage Weil committed
735
	} else {
736
		err = btrfs_commit_transaction(trans);
Sage Weil's avatar
Sage Weil committed
737
	}
Christoph Hellwig's avatar
Christoph Hellwig committed
738
739
	if (err && !ret)
		ret = err;
740

741
742
	if (!ret) {
		inode = btrfs_lookup_dentry(dir, dentry);
743
744
		if (IS_ERR(inode))
			return PTR_ERR(inode);
745
746
		d_instantiate(dentry, inode);
	}
Christoph Hellwig's avatar
Christoph Hellwig committed
747
	return ret;
748
749
750
751

fail_free:
	kfree(root_item);
	return ret;
Christoph Hellwig's avatar
Christoph Hellwig committed
752
753
}

754
static int create_snapshot(struct btrfs_root *root, struct inode *dir,
755
			   struct dentry *dentry,
756
757
			   u64 *async_transid, bool readonly,
			   struct btrfs_qgroup_inherit *inherit)
Christoph Hellwig's avatar
Christoph Hellwig committed
758
{
759
	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
760
	struct inode *inode;
Christoph Hellwig's avatar
Christoph Hellwig committed
761
762
	struct btrfs_pending_snapshot *pending_snapshot;
	struct btrfs_trans_handle *trans;
763
	int ret;
764
	bool snapshot_force_cow = false;
Christoph Hellwig's avatar
Christoph Hellwig committed
765

766
	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
Christoph Hellwig's avatar
Christoph Hellwig committed
767
768
		return -EINVAL;

769
770
771
772
773
774
	if (atomic_read(&root->nr_swapfiles)) {
		btrfs_warn(fs_info,
			   "cannot snapshot subvolume with active swapfile");
		return -ETXTBSY;
	}

775
	pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL);
776
777
778
	if (!pending_snapshot)
		return -ENOMEM;

779
	pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
780
			GFP_KERNEL);
781
782
	pending_snapshot->path = btrfs_alloc_path();
	if (!pending_snapshot->root_item || !pending_snapshot->path) {
783
784
785
786
		ret = -ENOMEM;
		goto free_pending;
	}

787
788
789
790
791
	/*
	 * Force new buffered writes to reserve space even when NOCOW is
	 * possible. This is to avoid later writeback (running dealloc) to
	 * fallback to COW mode and unexpectedly fail with ENOSPC.
	 */
792
	atomic_inc(&root->will_be_snapshotted);
793
	smp_mb__after_atomic();
794
795
796
	/* wait for no snapshot writes */
	wait_event(root->subv_writers->wait,
		   percpu_counter_sum(&root->subv_writers->counter) == 0);
797

798
	ret = btrfs_start_delalloc_snapshot(root);
799
	if (ret)
800
		goto dec_and_free;
801

802
803
804
805
806
807
808
809
	/*
	 * All previous writes have started writeback in NOCOW mode, so now
	 * we force future writes to fallback to COW mode during snapshot
	 * creation.
	 */
	atomic_inc(&root->snapshot_force_cow);
	snapshot_force_cow = true;

810
	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
811

812
813
	btrfs_init_block_rsv(&pending_snapshot->block_rsv,
			     BTRFS_BLOCK_RSV_TEMP);
814
815
816
817
818
819
	/*
	 * 1 - parent dir inode
	 * 2 - dir entries
	 * 1 - root item
	 * 2 - root ref/backref
	 * 1 - root of snapshot
820
	 * 1 - UUID item
821
822
	 */
	ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
823
					&pending_snapshot->block_rsv, 8,
824
					false);
825
	if (ret)
826
		goto dec_and_free;
827

828
	pending_snapshot->dentry = dentry;
Christoph Hellwig's avatar
Christoph Hellwig committed
829
	pending_snapshot->root = root;
830
	pending_snapshot->readonly = readonly;
831
	pending_snapshot->dir = dir;
832
	pending_snapshot->inherit = inherit;
833

834
	trans = btrfs_start_transaction(root, 0);
835
836
837
838
839
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto fail;
	}

840
	spin_lock(&fs_info->trans_lock);
Christoph Hellwig's avatar
Christoph Hellwig committed
841
842
	list_add(&pending_snapshot->list,
		 &trans->transaction->pending_snapshots);
843
	spin_unlock(&fs_info->trans_lock);
Sage Weil's avatar
Sage Weil committed
844
845
	if (async_transid) {
		*async_transid = trans->transid;
846
		ret = btrfs_commit_transaction_async(trans, 1);
847
		if (ret)
848
			ret = btrfs_commit_transaction(trans);
Sage Weil's avatar
Sage Weil committed
849
	} else {
850
		ret = btrfs_commit_transaction(trans);
Sage Weil's avatar
Sage Weil committed
851
	}
852
	if (ret)
853
		goto fail;
854
855
856
857
858

	ret = pending_snapshot->error;
	if (ret)
		goto fail;

859
860
861
862
	ret = btrfs_orphan_cleanup(pending_snapshot->snap);
	if (ret)
		goto fail;

863
	inode = btrfs_lookup_dentry(d_inode(dentry->d_parent), dentry);
864
865
866
867
	if (IS_ERR(inode)) {
		ret = PTR_ERR(inode);
		goto fail;
	}
868

869
870
871
	d_instantiate(dentry, inode);
	ret = 0;
fail:
872
	btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
873
dec_and_free:
874
875
	if (snapshot_force_cow)
		atomic_dec(&root->snapshot_force_cow);
876
	if (atomic_dec_and_test(&root->will_be_snapshotted))
877
		wake_up_var(&root->will_be_snapshotted);
878
879
free_pending:
	kfree(pending_snapshot->root_item);
880
	btrfs_free_path(pending_snapshot->path);
881
882
	kfree(pending_snapshot);

Christoph Hellwig's avatar
Christoph Hellwig committed
883
884
885
	return ret;
}

886
887
888
889
890
891
892
893
894
895
896
/*  copy of may_delete in fs/namei.c()
 *	Check whether we can remove a link victim from directory dir, check
 *  whether the type of victim is right.
 *  1. We can't do it if dir is read-only (done in permission())
 *  2. We should have write and exec permissions on dir
 *  3. We can't remove anything from append-only dir
 *  4. We can't do anything with immutable dir (done in permission())
 *  5. If the sticky bit on dir is set we should either
 *	a. be owner of dir, or
 *	b. be owner of victim, or
 *	c. have CAP_FOWNER capability
897
 *  6. If the victim is append-only or immutable we can't do anything with
898
899
900
901
902
903
904
905
 *     links pointing to it.
 *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
 *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
 *  9. We can't remove a root or mountpoint.
 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
 *     nfs_async_unlink().
 */

906
static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
907
908
909
{
	int error;

910
	if (d_really_is_negative(victim))
911
912
		return -ENOENT;

913
	BUG_ON(d_inode(victim->d_parent) != dir);
914
	audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
915
916
917
918
919
920

	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
	if (error)
		return error;
	if (IS_APPEND(dir))
		return -EPERM;
921
922
	if (check_sticky(dir, d_inode(victim)) || IS_APPEND(d_inode(victim)) ||
	    IS_IMMUTABLE(d_inode(victim)) || IS_SWAPFILE(d_inode(victim)))
923
924
		return -EPERM;
	if (isdir) {
925
		if (!d_is_dir(victim))
926
927
928
			return -ENOTDIR;
		if (IS_ROOT(victim))
			return -EBUSY;
929
	} else if (d_is_dir(victim))
930
931
932
933
934
935
936
937
		return -EISDIR;
	if (IS_DEADDIR(dir))
		return -ENOENT;
	if (victim->d_flags & DCACHE_NFSFS_RENAMED)
		return -EBUSY;
	return 0;
}

938
939
940
/* copy of may_create in fs/namei.c() */
static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
{
941
	if (d_really_is_positive(child))
942
943
944
945
946
947
948
949
950
951
952
		return -EEXIST;
	if (IS_DEADDIR(dir))
		return -ENOENT;
	return inode_permission(dir, MAY_WRITE | MAY_EXEC);
}

/*
 * Create a new subvolume below @parent.  This is largely modeled after
 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup
 * inside this filesystem so it's quite a bit simpler.
 */
Al Viro's avatar
Al Viro committed
953
static noinline int btrfs_mksubvol(const struct path *parent,
954
				   const char *name, int namelen,
Sage Weil's avatar
Sage Weil committed
955
				   struct btrfs_root *snap_src,
Arne Jansen's avatar
Arne Jansen committed
956
				   u64 *async_transid, bool readonly,
957
				   struct btrfs_qgroup_inherit *inherit)
958
{
959
960
	struct inode *dir = d_inode(parent->dentry);
	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
961
962
963
	struct dentry *dentry;
	int error;

964
965
966
	error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
	if (error == -EINTR)
		return error;
967
968
969
970
971
972

	dentry = lookup_one_len(name, parent->dentry, namelen);
	error = PTR_ERR(dentry);
	if (IS_ERR(dentry))
		goto out_unlock;

973
	error = btrfs_may_create(dir, dentry);
974
	if (error)
975
		goto out_dput;
976

977
978
979
980
981
982
983
984
985
986
	/*
	 * even if this name doesn't exist, we may get hash collisions.
	 * check for them now when we can safely fail
	 */
	error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root,
					       dir->i_ino, name,
					       namelen);
	if (error)
		goto out_dput;

987
	down_read(&fs_info->subvol_sem);
988
989
990
991

	if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
		goto out_up_read;

992
	if (snap_src) {