ioctl.c 145 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
Christoph Hellwig's avatar
Christoph Hellwig committed
2
3
4
5
6
7
8
9
10
/*
 * Copyright (C) 2007 Oracle.  All rights reserved.
 */

#include <linux/kernel.h>
#include <linux/bio.h>
#include <linux/buffer_head.h>
#include <linux/file.h>
#include <linux/fs.h>
11
#include <linux/fsnotify.h>
Christoph Hellwig's avatar
Christoph Hellwig committed
12
13
14
15
16
17
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
18
#include <linux/mount.h>
Christoph Hellwig's avatar
Christoph Hellwig committed
19
#include <linux/mpage.h>
20
#include <linux/namei.h>
Christoph Hellwig's avatar
Christoph Hellwig committed
21
22
23
24
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/compat.h>
#include <linux/bit_spinlock.h>
25
#include <linux/security.h>
Christoph Hellwig's avatar
Christoph Hellwig committed
26
#include <linux/xattr.h>
27
#include <linux/mm.h>
28
#include <linux/slab.h>
29
#include <linux/blkdev.h>
30
#include <linux/uuid.h>
31
#include <linux/btrfs.h>
Mark Fasheh's avatar
Mark Fasheh committed
32
#include <linux/uaccess.h>
33
#include <linux/iversion.h>
Christoph Hellwig's avatar
Christoph Hellwig committed
34
35
36
37
38
39
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
#include "print-tree.h"
#include "volumes.h"
40
#include "locking.h"
41
#include "inode-map.h"
42
#include "backref.h"
43
#include "rcu-string.h"
44
#include "send.h"
45
#include "dev-replace.h"
46
#include "props.h"
47
#include "sysfs.h"
Josef Bacik's avatar
Josef Bacik committed
48
#include "qgroup.h"
49
#include "tree-log.h"
50
#include "compression.h"
Christoph Hellwig's avatar
Christoph Hellwig committed
51

52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#ifdef CONFIG_64BIT
/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
 * structures are incorrect, as the timespec structure from userspace
 * is 4 bytes too small. We define these alternatives here to teach
 * the kernel about the 32-bit struct packing.
 */
struct btrfs_ioctl_timespec_32 {
	__u64 sec;
	__u32 nsec;
} __attribute__ ((__packed__));

struct btrfs_ioctl_received_subvol_args_32 {
	char	uuid[BTRFS_UUID_SIZE];	/* in */
	__u64	stransid;		/* in */
	__u64	rtransid;		/* out */
	struct btrfs_ioctl_timespec_32 stime; /* in */
	struct btrfs_ioctl_timespec_32 rtime; /* out */
	__u64	flags;			/* in */
	__u64	reserved[16];		/* in */
} __attribute__ ((__packed__));

#define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \
				struct btrfs_ioctl_received_subvol_args_32)
#endif

77
78
79
80
81
82
83
84
85
86
87
88
89
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_send_args_32 {
	__s64 send_fd;			/* in */
	__u64 clone_sources_count;	/* in */
	compat_uptr_t clone_sources;	/* in */
	__u64 parent_root;		/* in */
	__u64 flags;			/* in */
	__u64 reserved[4];		/* in */
} __attribute__ ((__packed__));

#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
			       struct btrfs_ioctl_send_args_32)
#endif
90

Mark Fasheh's avatar
Mark Fasheh committed
91
static int btrfs_clone(struct inode *src, struct inode *inode,
92
93
		       u64 off, u64 olen, u64 olen_aligned, u64 destoff,
		       int no_time_update);
Mark Fasheh's avatar
Mark Fasheh committed
94

95
/* Mask out flags that are inappropriate for the given type of inode. */
96
97
static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode,
		unsigned int flags)
98
{
99
	if (S_ISDIR(inode->i_mode))
100
		return flags;
101
	else if (S_ISREG(inode->i_mode))
102
103
104
105
106
107
		return flags & ~FS_DIRSYNC_FL;
	else
		return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
}

/*
108
109
 * Export internal inode flags to the format expected by the FS_IOC_GETFLAGS
 * ioctl.
110
 */
111
static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags)
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
{
	unsigned int iflags = 0;

	if (flags & BTRFS_INODE_SYNC)
		iflags |= FS_SYNC_FL;
	if (flags & BTRFS_INODE_IMMUTABLE)
		iflags |= FS_IMMUTABLE_FL;
	if (flags & BTRFS_INODE_APPEND)
		iflags |= FS_APPEND_FL;
	if (flags & BTRFS_INODE_NODUMP)
		iflags |= FS_NODUMP_FL;
	if (flags & BTRFS_INODE_NOATIME)
		iflags |= FS_NOATIME_FL;
	if (flags & BTRFS_INODE_DIRSYNC)
		iflags |= FS_DIRSYNC_FL;
Li Zefan's avatar
Li Zefan committed
127
128
129
	if (flags & BTRFS_INODE_NODATACOW)
		iflags |= FS_NOCOW_FL;

130
	if (flags & BTRFS_INODE_NOCOMPRESS)
Li Zefan's avatar
Li Zefan committed
131
		iflags |= FS_NOCOMP_FL;
132
133
	else if (flags & BTRFS_INODE_COMPRESS)
		iflags |= FS_COMPR_FL;
134
135
136
137
138
139
140

	return iflags;
}

/*
 * Update inode->i_flags based on the btrfs internal flags.
 */
141
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode)
142
{
143
	struct btrfs_inode *binode = BTRFS_I(inode);
144
	unsigned int new_fl = 0;
145

146
	if (binode->flags & BTRFS_INODE_SYNC)
147
		new_fl |= S_SYNC;
148
	if (binode->flags & BTRFS_INODE_IMMUTABLE)
149
		new_fl |= S_IMMUTABLE;
150
	if (binode->flags & BTRFS_INODE_APPEND)
151
		new_fl |= S_APPEND;
152
	if (binode->flags & BTRFS_INODE_NOATIME)
153
		new_fl |= S_NOATIME;
154
	if (binode->flags & BTRFS_INODE_DIRSYNC)
155
156
157
158
159
		new_fl |= S_DIRSYNC;

	set_mask_bits(&inode->i_flags,
		      S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC,
		      new_fl);
160
161
162
163
}

static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
{
164
165
	struct btrfs_inode *binode = BTRFS_I(file_inode(file));
	unsigned int flags = btrfs_inode_flags_to_fsflags(binode->flags);
166
167
168
169
170
171

	if (copy_to_user(arg, &flags, sizeof(flags)))
		return -EFAULT;
	return 0;
}

172
173
/* Check if @flags are a supported and valid set of FS_*_FL flags */
static int check_fsflags(unsigned int flags)
174
175
176
177
{
	if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
		      FS_NOATIME_FL | FS_NODUMP_FL | \
		      FS_SYNC_FL | FS_DIRSYNC_FL | \
Li Zefan's avatar
Li Zefan committed
178
179
		      FS_NOCOMP_FL | FS_COMPR_FL |
		      FS_NOCOW_FL))
180
181
182
183
184
185
186
187
		return -EOPNOTSUPP;

	if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
		return -EINVAL;

	return 0;
}

188
189
static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
{
Al Viro's avatar
Al Viro committed
190
	struct inode *inode = file_inode(file);
191
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
192
193
	struct btrfs_inode *binode = BTRFS_I(inode);
	struct btrfs_root *root = binode->root;
194
	struct btrfs_trans_handle *trans;
195
	unsigned int fsflags, old_fsflags;
196
	int ret;
197
198
	u64 old_flags;
	unsigned int old_i_flags;
199
	umode_t mode;
200

201
202
203
	if (!inode_owner_or_capable(inode))
		return -EPERM;

204
205
206
	if (btrfs_root_readonly(root))
		return -EROFS;

207
	if (copy_from_user(&fsflags, arg, sizeof(fsflags)))
208
209
		return -EFAULT;

210
	ret = check_fsflags(fsflags);
211
212
	if (ret)
		return ret;
Christoph Hellwig's avatar
Christoph Hellwig committed
213

214
215
216
217
	ret = mnt_want_write_file(file);
	if (ret)
		return ret;

Al Viro's avatar
Al Viro committed
218
	inode_lock(inode);
219

220
221
	old_flags = binode->flags;
	old_i_flags = inode->i_flags;
222
	mode = inode->i_mode;
223

224
225
226
	fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
	old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
	if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
227
228
229
230
231
232
		if (!capable(CAP_LINUX_IMMUTABLE)) {
			ret = -EPERM;
			goto out_unlock;
		}
	}

233
234
	if (fsflags & FS_SYNC_FL)
		binode->flags |= BTRFS_INODE_SYNC;
235
	else
236
237
238
		binode->flags &= ~BTRFS_INODE_SYNC;
	if (fsflags & FS_IMMUTABLE_FL)
		binode->flags |= BTRFS_INODE_IMMUTABLE;
239
	else
240
241
242
		binode->flags &= ~BTRFS_INODE_IMMUTABLE;
	if (fsflags & FS_APPEND_FL)
		binode->flags |= BTRFS_INODE_APPEND;
243
	else
244
245
246
		binode->flags &= ~BTRFS_INODE_APPEND;
	if (fsflags & FS_NODUMP_FL)
		binode->flags |= BTRFS_INODE_NODUMP;
247
	else
248
249
250
		binode->flags &= ~BTRFS_INODE_NODUMP;
	if (fsflags & FS_NOATIME_FL)
		binode->flags |= BTRFS_INODE_NOATIME;
251
	else
252
253
254
		binode->flags &= ~BTRFS_INODE_NOATIME;
	if (fsflags & FS_DIRSYNC_FL)
		binode->flags |= BTRFS_INODE_DIRSYNC;
255
	else
256
257
		binode->flags &= ~BTRFS_INODE_DIRSYNC;
	if (fsflags & FS_NOCOW_FL) {
258
259
260
261
262
263
264
		if (S_ISREG(mode)) {
			/*
			 * It's safe to turn csums off here, no extents exist.
			 * Otherwise we want the flag to reflect the real COW
			 * status of the file and will not set it.
			 */
			if (inode->i_size == 0)
265
266
				binode->flags |= BTRFS_INODE_NODATACOW
					      | BTRFS_INODE_NODATASUM;
267
		} else {
268
			binode->flags |= BTRFS_INODE_NODATACOW;
269
270
271
		}
	} else {
		/*
272
		 * Revert back under same assumptions as above
273
274
275
		 */
		if (S_ISREG(mode)) {
			if (inode->i_size == 0)
276
				binode->flags &= ~(BTRFS_INODE_NODATACOW
277
278
				             | BTRFS_INODE_NODATASUM);
		} else {
279
			binode->flags &= ~BTRFS_INODE_NODATACOW;
280
281
		}
	}
282

283
284
285
286
287
	/*
	 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
	 * flag may be changed automatically if compression code won't make
	 * things smaller.
	 */
288
289
290
	if (fsflags & FS_NOCOMP_FL) {
		binode->flags &= ~BTRFS_INODE_COMPRESS;
		binode->flags |= BTRFS_INODE_NOCOMPRESS;
291
292
293
294

		ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
		if (ret && ret != -ENODATA)
			goto out_drop;
295
	} else if (fsflags & FS_COMPR_FL) {
296
297
		const char *comp;

298
299
		binode->flags |= BTRFS_INODE_COMPRESS;
		binode->flags &= ~BTRFS_INODE_NOCOMPRESS;
300

301
302
303
304
		comp = btrfs_compress_type2str(fs_info->compress_type);
		if (!comp || comp[0] == 0)
			comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB);

305
306
307
308
309
		ret = btrfs_set_prop(inode, "btrfs.compression",
				     comp, strlen(comp), 0);
		if (ret)
			goto out_drop;

Li Zefan's avatar
Li Zefan committed
310
	} else {
311
312
313
		ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0);
		if (ret && ret != -ENODATA)
			goto out_drop;
314
		binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
315
	}
316

317
	trans = btrfs_start_transaction(root, 1);
318
319
320
321
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto out_drop;
	}
322

323
	btrfs_sync_inode_flags_to_i_flags(inode);
324
	inode_inc_iversion(inode);
325
	inode->i_ctime = current_time(inode);
326
327
	ret = btrfs_update_inode(trans, root, inode);

328
	btrfs_end_transaction(trans);
329
330
 out_drop:
	if (ret) {
331
332
		binode->flags = old_flags;
		inode->i_flags = old_i_flags;
333
	}
334
335

 out_unlock:
Al Viro's avatar
Al Viro committed
336
	inode_unlock(inode);
337
	mnt_drop_write_file(file);
338
	return ret;
339
340
}

341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
/*
 * Translate btrfs internal inode flags to xflags as expected by the
 * FS_IOC_FSGETXATT ioctl. Filter only the supported ones, unknown flags are
 * silently dropped.
 */
static unsigned int btrfs_inode_flags_to_xflags(unsigned int flags)
{
	unsigned int xflags = 0;

	if (flags & BTRFS_INODE_APPEND)
		xflags |= FS_XFLAG_APPEND;
	if (flags & BTRFS_INODE_IMMUTABLE)
		xflags |= FS_XFLAG_IMMUTABLE;
	if (flags & BTRFS_INODE_NOATIME)
		xflags |= FS_XFLAG_NOATIME;
	if (flags & BTRFS_INODE_NODUMP)
		xflags |= FS_XFLAG_NODUMP;
	if (flags & BTRFS_INODE_SYNC)
		xflags |= FS_XFLAG_SYNC;

	return xflags;
}

/* Check if @flags are a supported and valid set of FS_XFLAGS_* flags */
static int check_xflags(unsigned int flags)
{
	if (flags & ~(FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE | FS_XFLAG_NOATIME |
		      FS_XFLAG_NODUMP | FS_XFLAG_SYNC))
		return -EOPNOTSUPP;
	return 0;
}

373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
/*
 * Set the xflags from the internal inode flags. The remaining items of fsxattr
 * are zeroed.
 */
static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg)
{
	struct btrfs_inode *binode = BTRFS_I(file_inode(file));
	struct fsxattr fa;

	memset(&fa, 0, sizeof(fa));
	fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags);

	if (copy_to_user(arg, &fa, sizeof(fa)))
		return -EFAULT;

	return 0;
}

391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
{
	struct inode *inode = file_inode(file);
	struct btrfs_inode *binode = BTRFS_I(inode);
	struct btrfs_root *root = binode->root;
	struct btrfs_trans_handle *trans;
	struct fsxattr fa;
	unsigned old_flags;
	unsigned old_i_flags;
	int ret = 0;

	if (!inode_owner_or_capable(inode))
		return -EPERM;

	if (btrfs_root_readonly(root))
		return -EROFS;

	memset(&fa, 0, sizeof(fa));
	if (copy_from_user(&fa, arg, sizeof(fa)))
		return -EFAULT;

	ret = check_xflags(fa.fsx_xflags);
	if (ret)
		return ret;

	if (fa.fsx_extsize != 0 || fa.fsx_projid != 0 || fa.fsx_cowextsize != 0)
		return -EOPNOTSUPP;

	ret = mnt_want_write_file(file);
	if (ret)
		return ret;

	inode_lock(inode);

	old_flags = binode->flags;
	old_i_flags = inode->i_flags;

	/* We need the capabilities to change append-only or immutable inode */
	if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) ||
	     (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) &&
	    !capable(CAP_LINUX_IMMUTABLE)) {
		ret = -EPERM;
		goto out_unlock;
	}

	if (fa.fsx_xflags & FS_XFLAG_SYNC)
		binode->flags |= BTRFS_INODE_SYNC;
	else
		binode->flags &= ~BTRFS_INODE_SYNC;
	if (fa.fsx_xflags & FS_XFLAG_IMMUTABLE)
		binode->flags |= BTRFS_INODE_IMMUTABLE;
	else
		binode->flags &= ~BTRFS_INODE_IMMUTABLE;
	if (fa.fsx_xflags & FS_XFLAG_APPEND)
		binode->flags |= BTRFS_INODE_APPEND;
	else
		binode->flags &= ~BTRFS_INODE_APPEND;
	if (fa.fsx_xflags & FS_XFLAG_NODUMP)
		binode->flags |= BTRFS_INODE_NODUMP;
	else
		binode->flags &= ~BTRFS_INODE_NODUMP;
	if (fa.fsx_xflags & FS_XFLAG_NOATIME)
		binode->flags |= BTRFS_INODE_NOATIME;
	else
		binode->flags &= ~BTRFS_INODE_NOATIME;

	/* 1 item for the inode */
	trans = btrfs_start_transaction(root, 1);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto out_unlock;
	}

	btrfs_sync_inode_flags_to_i_flags(inode);
	inode_inc_iversion(inode);
	inode->i_ctime = current_time(inode);
	ret = btrfs_update_inode(trans, root, inode);

	btrfs_end_transaction(trans);

out_unlock:
	if (ret) {
		binode->flags = old_flags;
		inode->i_flags = old_i_flags;
	}

	inode_unlock(inode);
	mnt_drop_write_file(file);

	return ret;
}

483
484
static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
{
Al Viro's avatar
Al Viro committed
485
	struct inode *inode = file_inode(file);
486
487
488

	return put_user(inode->i_generation, arg);
}
Christoph Hellwig's avatar
Christoph Hellwig committed
489

490
491
static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
{
492
493
	struct inode *inode = file_inode(file);
	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
494
495
496
497
498
	struct btrfs_device *device;
	struct request_queue *q;
	struct fstrim_range range;
	u64 minlen = ULLONG_MAX;
	u64 num_devices = 0;
499
	u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
500
501
502
503
504
	int ret;

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

505
506
507
	rcu_read_lock();
	list_for_each_entry_rcu(device, &fs_info->fs_devices->devices,
				dev_list) {
508
509
510
511
512
		if (!device->bdev)
			continue;
		q = bdev_get_queue(device->bdev);
		if (blk_queue_discard(q)) {
			num_devices++;
513
			minlen = min_t(u64, q->limits.discard_granularity,
514
515
516
				     minlen);
		}
	}
517
	rcu_read_unlock();
518

519
520
521
522
	if (!num_devices)
		return -EOPNOTSUPP;
	if (copy_from_user(&range, arg, sizeof(range)))
		return -EFAULT;
523
524
	if (range.start > total_bytes ||
	    range.len < fs_info->sb->s_blocksize)
525
		return -EINVAL;
526

527
	range.len = min(range.len, total_bytes - range.start);
528
	range.minlen = max(range.minlen, minlen);
529
	ret = btrfs_trim_fs(fs_info, &range);
530
531
532
533
534
535
536
537
538
	if (ret < 0)
		return ret;

	if (copy_to_user(arg, &range, sizeof(range)))
		return -EFAULT;

	return 0;
}

539
540
int btrfs_is_empty_uuid(u8 *uuid)
{
541
542
543
544
545
546
547
	int i;

	for (i = 0; i < BTRFS_UUID_SIZE; i++) {
		if (uuid[i])
			return 0;
	}
	return 1;
548
549
}

550
static noinline int create_subvol(struct inode *dir,
551
				  struct dentry *dentry,
552
				  const char *name, int namelen,
Arne Jansen's avatar
Arne Jansen committed
553
				  u64 *async_transid,
554
				  struct btrfs_qgroup_inherit *inherit)
Christoph Hellwig's avatar
Christoph Hellwig committed
555
{
556
	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
Christoph Hellwig's avatar
Christoph Hellwig committed
557
558
	struct btrfs_trans_handle *trans;
	struct btrfs_key key;
559
	struct btrfs_root_item *root_item;
Christoph Hellwig's avatar
Christoph Hellwig committed
560
561
	struct btrfs_inode_item *inode_item;
	struct extent_buffer *leaf;
562
	struct btrfs_root *root = BTRFS_I(dir)->root;
563
	struct btrfs_root *new_root;
564
	struct btrfs_block_rsv block_rsv;
565
	struct timespec64 cur_time = current_time(dir);
566
	struct inode *inode;
Christoph Hellwig's avatar
Christoph Hellwig committed
567
568
569
570
	int ret;
	int err;
	u64 objectid;
	u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
571
	u64 index = 0;
572
	uuid_le new_uuid;
Christoph Hellwig's avatar
Christoph Hellwig committed
573

574
575
576
577
	root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
	if (!root_item)
		return -ENOMEM;

578
	ret = btrfs_find_free_objectid(fs_info->tree_root, &objectid);
579
	if (ret)
580
		goto fail_free;
581

582
583
	/*
	 * Don't create subvolume whose level is not zero. Or qgroup will be
584
	 * screwed up since it assumes subvolume qgroup's level to be 0.
585
	 */
586
587
588
589
	if (btrfs_qgroup_level(objectid)) {
		ret = -ENOSPC;
		goto fail_free;
	}
590

591
	btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
Josef Bacik's avatar
Josef Bacik committed
592
	/*
593
594
	 * The same as the snapshot creation, please see the comment
	 * of create_snapshot().
Josef Bacik's avatar
Josef Bacik committed
595
	 */
596
	ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 8, false);
597
	if (ret)
598
		goto fail_free;
599
600
601
602

	trans = btrfs_start_transaction(root, 0);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
603
		btrfs_subvolume_release_metadata(fs_info, &block_rsv);
604
		goto fail_free;
605
606
607
	}
	trans->block_rsv = &block_rsv;
	trans->bytes_reserved = block_rsv.size;
Christoph Hellwig's avatar
Christoph Hellwig committed
608

609
	ret = btrfs_qgroup_inherit(trans, fs_info, 0, objectid, inherit);
Arne Jansen's avatar
Arne Jansen committed
610
611
612
	if (ret)
		goto fail;

613
	leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0);
614
615
616
617
	if (IS_ERR(leaf)) {
		ret = PTR_ERR(leaf);
		goto fail;
	}
Christoph Hellwig's avatar
Christoph Hellwig committed
618

619
	memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header));
Christoph Hellwig's avatar
Christoph Hellwig committed
620
621
	btrfs_set_header_bytenr(leaf, leaf->start);
	btrfs_set_header_generation(leaf, trans->transid);
622
	btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
Christoph Hellwig's avatar
Christoph Hellwig committed
623
624
	btrfs_set_header_owner(leaf, objectid);

625
626
	write_extent_buffer_fsid(leaf, fs_info->fsid);
	write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid);
Christoph Hellwig's avatar
Christoph Hellwig committed
627
628
	btrfs_mark_buffer_dirty(leaf);

629
	inode_item = &root_item->inode;
630
631
632
	btrfs_set_stack_inode_generation(inode_item, 1);
	btrfs_set_stack_inode_size(inode_item, 3);
	btrfs_set_stack_inode_nlink(inode_item, 1);
633
	btrfs_set_stack_inode_nbytes(inode_item,
634
				     fs_info->nodesize);
635
	btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
Christoph Hellwig's avatar
Christoph Hellwig committed
636

637
638
	btrfs_set_root_flags(root_item, 0);
	btrfs_set_root_limit(root_item, 0);
639
	btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
640

641
642
643
644
645
646
	btrfs_set_root_bytenr(root_item, leaf->start);
	btrfs_set_root_generation(root_item, trans->transid);
	btrfs_set_root_level(root_item, 0);
	btrfs_set_root_refs(root_item, 1);
	btrfs_set_root_used(root_item, leaf->len);
	btrfs_set_root_last_snapshot(root_item, 0);
Christoph Hellwig's avatar
Christoph Hellwig committed
647

648
649
	btrfs_set_root_generation_v2(root_item,
			btrfs_root_generation(root_item));
650
	uuid_le_gen(&new_uuid);
651
652
653
654
655
656
	memcpy(root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
	btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec);
	btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec);
	root_item->ctime = root_item->otime;
	btrfs_set_root_ctransid(root_item, trans->transid);
	btrfs_set_root_otransid(root_item, trans->transid);
Christoph Hellwig's avatar
Christoph Hellwig committed
657

658
	btrfs_tree_unlock(leaf);
Christoph Hellwig's avatar
Christoph Hellwig committed
659
660
661
	free_extent_buffer(leaf);
	leaf = NULL;

662
	btrfs_set_root_dirid(root_item, new_dirid);
Christoph Hellwig's avatar
Christoph Hellwig committed
663
664

	key.objectid = objectid;
665
	key.offset = 0;
666
	key.type = BTRFS_ROOT_ITEM_KEY;
667
	ret = btrfs_insert_root(trans, fs_info->tree_root, &key,
668
				root_item);
Christoph Hellwig's avatar
Christoph Hellwig committed
669
670
671
	if (ret)
		goto fail;

672
	key.offset = (u64)-1;
673
	new_root = btrfs_read_fs_root_no_name(fs_info, &key);
674
675
	if (IS_ERR(new_root)) {
		ret = PTR_ERR(new_root);
676
		btrfs_abort_transaction(trans, ret);
677
678
		goto fail;
	}
679
680
681

	btrfs_record_root_in_trans(trans, new_root);

682
	ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid);
683
684
	if (ret) {
		/* We potentially lose an unused inode item here */
685
		btrfs_abort_transaction(trans, ret);
686
687
688
		goto fail;
	}

689
690
691
692
	mutex_lock(&new_root->objectid_mutex);
	new_root->highest_objectid = new_dirid;
	mutex_unlock(&new_root->objectid_mutex);

Christoph Hellwig's avatar
Christoph Hellwig committed
693
694
695
	/*
	 * insert the directory item
	 */
696
	ret = btrfs_set_inode_index(BTRFS_I(dir), &index);
697
	if (ret) {
698
		btrfs_abort_transaction(trans, ret);
699
700
		goto fail;
	}
701
702

	ret = btrfs_insert_dir_item(trans, root,
703
				    name, namelen, BTRFS_I(dir), &key,
704
				    BTRFS_FT_DIR, index);
705
	if (ret) {
706
		btrfs_abort_transaction(trans, ret);
Christoph Hellwig's avatar
Christoph Hellwig committed
707
		goto fail;
708
	}
709

710
	btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
711
712
713
	ret = btrfs_update_inode(trans, root, dir);
	BUG_ON(ret);

714
	ret = btrfs_add_root_ref(trans, fs_info,
715
				 objectid, root->root_key.objectid,
716
				 btrfs_ino(BTRFS_I(dir)), index, name, namelen);
717
	BUG_ON(ret);
Christoph Hellwig's avatar
Christoph Hellwig committed
718

719
	ret = btrfs_uuid_tree_add(trans, root_item->uuid,
720
				  BTRFS_UUID_KEY_SUBVOL, objectid);
721
	if (ret)
722
		btrfs_abort_transaction(trans, ret);
723

Christoph Hellwig's avatar
Christoph Hellwig committed
724
fail:
725
	kfree(root_item);
726
727
	trans->block_rsv = NULL;
	trans->bytes_reserved = 0;
728
	btrfs_subvolume_release_metadata(fs_info, &block_rsv);
729

Sage Weil's avatar
Sage Weil committed
730
731
	if (async_transid) {
		*async_transid = trans->transid;
732
		err = btrfs_commit_transaction_async(trans, 1);
733
		if (err)
734
			err = btrfs_commit_transaction(trans);
Sage Weil's avatar
Sage Weil committed
735
	} else {
736
		err = btrfs_commit_transaction(trans);
Sage Weil's avatar
Sage Weil committed
737
	}
Christoph Hellwig's avatar
Christoph Hellwig committed
738
739
	if (err && !ret)
		ret = err;
740

741
742
	if (!ret) {
		inode = btrfs_lookup_dentry(dir, dentry);
743
744
		if (IS_ERR(inode))
			return PTR_ERR(inode);
745
746
		d_instantiate(dentry, inode);
	}
Christoph Hellwig's avatar
Christoph Hellwig committed
747
	return ret;
748
749
750
751

fail_free:
	kfree(root_item);
	return ret;
Christoph Hellwig's avatar
Christoph Hellwig committed
752
753
}

754
static int create_snapshot(struct btrfs_root *root, struct inode *dir,
755
			   struct dentry *dentry,
756
757
			   u64 *async_transid, bool readonly,
			   struct btrfs_qgroup_inherit *inherit)
Christoph Hellwig's avatar
Christoph Hellwig committed
758
{
759
	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
760
	struct inode *inode;
Christoph Hellwig's avatar
Christoph Hellwig committed
761
762
	struct btrfs_pending_snapshot *pending_snapshot;
	struct btrfs_trans_handle *trans;
763
	int ret;
Christoph Hellwig's avatar
Christoph Hellwig committed
764

765
	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
Christoph Hellwig's avatar
Christoph Hellwig committed
766
767
		return -EINVAL;

768
	pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL);
769
770
771
	if (!pending_snapshot)
		return -ENOMEM;

772
	pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
773
			GFP_KERNEL);
774
775
	pending_snapshot->path = btrfs_alloc_path();
	if (!pending_snapshot->root_item || !pending_snapshot->path) {
776
777
778
779
		ret = -ENOMEM;
		goto free_pending;
	}

780
	atomic_inc(&root->will_be_snapshotted);
781
	smp_mb__after_atomic();
782
783
784
	/* wait for no snapshot writes */
	wait_event(root->subv_writers->wait,
		   percpu_counter_sum(&root->subv_writers->counter) == 0);
785

786
	ret = btrfs_start_delalloc_inodes(root);
787
	if (ret)
788
		goto dec_and_free;
789

790
	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
791

792
793
	btrfs_init_block_rsv(&pending_snapshot->block_rsv,
			     BTRFS_BLOCK_RSV_TEMP);
794
795
796
797
798
799
	/*
	 * 1 - parent dir inode
	 * 2 - dir entries
	 * 1 - root item
	 * 2 - root ref/backref
	 * 1 - root of snapshot
800
	 * 1 - UUID item
801
802
	 */
	ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
803
					&pending_snapshot->block_rsv, 8,
804
					false);
805
	if (ret)
806
		goto dec_and_free;
807

808
	pending_snapshot->dentry = dentry;
Christoph Hellwig's avatar
Christoph Hellwig committed
809
	pending_snapshot->root = root;
810
	pending_snapshot->readonly = readonly;
811
	pending_snapshot->dir = dir;
812
	pending_snapshot->inherit = inherit;
813

814
	trans = btrfs_start_transaction(root, 0);
815
816
817
818
819
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto fail;
	}

820
	spin_lock(&fs_info->trans_lock);
Christoph Hellwig's avatar
Christoph Hellwig committed
821
822
	list_add(&pending_snapshot->list,
		 &trans->transaction->pending_snapshots);
823
	spin_unlock(&fs_info->trans_lock);
Sage Weil's avatar
Sage Weil committed
824
825
	if (async_transid) {
		*async_transid = trans->transid;
826
		ret = btrfs_commit_transaction_async(trans, 1);
827
		if (ret)
828
			ret = btrfs_commit_transaction(trans);
Sage Weil's avatar
Sage Weil committed
829
	} else {
830
		ret = btrfs_commit_transaction(trans);
Sage Weil's avatar
Sage Weil committed
831
	}
832
	if (ret)
833
		goto fail;
834
835
836
837
838

	ret = pending_snapshot->error;
	if (ret)
		goto fail;

839
840
841
842
	ret = btrfs_orphan_cleanup(pending_snapshot->snap);
	if (ret)
		goto fail;

843
	inode = btrfs_lookup_dentry(d_inode(dentry->d_parent), dentry);
844
845
846
847
	if (IS_ERR(inode)) {
		ret = PTR_ERR(inode);
		goto fail;
	}
848

849
850
851
	d_instantiate(dentry, inode);
	ret = 0;
fail:
852
	btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
853
dec_and_free:
854
	if (atomic_dec_and_test(&root->will_be_snapshotted))
855
		wake_up_var(&root->will_be_snapshotted);
856
857
free_pending:
	kfree(pending_snapshot->root_item);
858
	btrfs_free_path(pending_snapshot->path);
859
860
	kfree(pending_snapshot);

Christoph Hellwig's avatar
Christoph Hellwig committed
861
862
863
	return ret;
}

864
865
866
867
868
869
870
871
872
873
874
/*  copy of may_delete in fs/namei.c()
 *	Check whether we can remove a link victim from directory dir, check
 *  whether the type of victim is right.
 *  1. We can't do it if dir is read-only (done in permission())
 *  2. We should have write and exec permissions on dir
 *  3. We can't remove anything from append-only dir
 *  4. We can't do anything with immutable dir (done in permission())
 *  5. If the sticky bit on dir is set we should either
 *	a. be owner of dir, or
 *	b. be owner of victim, or
 *	c. have CAP_FOWNER capability
875
 *  6. If the victim is append-only or immutable we can't do anything with
876
877
878
879
880
881
882
883
 *     links pointing to it.
 *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
 *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
 *  9. We can't remove a root or mountpoint.
 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
 *     nfs_async_unlink().
 */

884
static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
885
886
887
{
	int error;

888
	if (d_really_is_negative(victim))
889
890
		return -ENOENT;

891
	BUG_ON(d_inode(victim->d_parent) != dir);
892
	audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
893
894
895
896
897
898

	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
	if (error)
		return error;
	if (IS_APPEND(dir))
		return -EPERM;
899
900
	if (check_sticky(dir, d_inode(victim)) || IS_APPEND(d_inode(victim)) ||
	    IS_IMMUTABLE(d_inode(victim)) || IS_SWAPFILE(d_inode(victim)))
901
902
		return -EPERM;
	if (isdir) {
903
		if (!d_is_dir(victim))
904
905
906
			return -ENOTDIR;
		if (IS_ROOT(victim))
			return -EBUSY;
907
	} else if (d_is_dir(victim))
908
909
910
911
912
913
914
915
		return -EISDIR;
	if (IS_DEADDIR(dir))
		return -ENOENT;
	if (victim->d_flags & DCACHE_NFSFS_RENAMED)
		return -EBUSY;
	return 0;
}

916
917
918
/* copy of may_create in fs/namei.c() */
static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
{
919
	if (d_really_is_positive(child))
920
921
922
923
924
925
926
927
928
929
930
		return -EEXIST;
	if (IS_DEADDIR(dir))
		return -ENOENT;
	return inode_permission(dir, MAY_WRITE | MAY_EXEC);
}

/*
 * Create a new subvolume below @parent.  This is largely modeled after
 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup
 * inside this filesystem so it's quite a bit simpler.
 */
Al Viro's avatar
Al Viro committed
931
static noinline int btrfs_mksubvol(const struct path *parent,
932
				   const char *name, int namelen,
Sage Weil's avatar
Sage Weil committed
933
				   struct btrfs_root *snap_src,
Arne Jansen's avatar
Arne Jansen committed
934
				   u64 *async_transid, bool readonly,
935
				   struct btrfs_qgroup_inherit *inherit)
936
{
937
938
	struct inode *dir = d_inode(parent->dentry);
	struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
939
940
941
	struct dentry *dentry;
	int error;

942
943
944
	error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT);
	if (error == -EINTR)
		return error;
945
946
947
948
949
950

	dentry = lookup_one_len(name, parent->dentry, namelen);
	error = PTR_ERR(dentry);
	if (IS_ERR(dentry))
		goto out_unlock;

951
	error = btrfs_may_create(dir, dentry);
952
	if (error)
953
		goto out_dput;
954

955
956
957
958
959
960
961
962
963
964
	/*
	 * even if this name doesn't exist, we may get hash collisions.
	 * check for them now when we can safely fail
	 */
	error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root,
					       dir->i_ino, name,
					       namelen);
	if (error)
		goto out_dput;

965
	down_read(&fs_info->subvol_sem);
966
967
968
969

	if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
		goto out_up_read;

970
	if (snap_src) {
971
		error = create_snapshot(snap_src, dir, dentry,
Arne Jansen's avatar
Arne Jansen committed
972
					async_transid, readonly, inherit);
973
	} else {
974
975
		error = create_subvol(dir, dentry, name, namelen,
				      async_transid, inherit);
976
	}
977
978
979
	if (!error)
		fsnotify_mkdir(dir, dentry);
out_up_read:
980
	up_read(&fs_info->subvol_sem);