tree-checker.c 47.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/*
 * Copyright (C) Qu Wenruo 2017.  All rights reserved.
 */

/*
 * The module is used to catch unexpected/corrupted tree block data.
 * Such behavior can be caused either by a fuzzed image or bugs.
 *
 * The objective is to do leaf/node validation checks when tree block is read
 * from disk, and check *every* possible member, so other code won't
 * need to checking them again.
 *
 * Due to the potential and unwanted damage, every checker needs to be
 * carefully reviewed otherwise so it does not prevent mount of valid images.
 */

18
19
20
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/error-injection.h>
21
22
23
24
#include "ctree.h"
#include "tree-checker.h"
#include "disk-io.h"
#include "compression.h"
25
#include "volumes.h"
26
#include "misc.h"
27

28
29
30
31
32
33
/*
 * Error message should follow the following format:
 * corrupt <type>: <identifier>, <reason>[, <bad_value>]
 *
 * @type:	leaf or node
 * @identifier:	the necessary info to locate the leaf/node.
34
 * 		It's recommended to decode key.objecitd/offset if it's
35
36
 * 		meaningful.
 * @reason:	describe the error
37
 * @bad_value:	optional, it's recommended to output bad value and its
38
39
40
41
42
43
44
45
46
47
 *		expected value (range).
 *
 * Since comma is used to separate the components, only space is allowed
 * inside each component.
 */

/*
 * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
 * Allows callers to customize the output.
 */
48
__printf(3, 4)
49
__cold
50
static void generic_err(const struct extent_buffer *eb, int slot,
51
52
			const char *fmt, ...)
{
53
	const struct btrfs_fs_info *fs_info = eb->fs_info;
54
55
56
57
58
59
60
61
	struct va_format vaf;
	va_list args;

	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

62
	btrfs_crit(fs_info,
63
64
		"corrupt %s: root=%llu block=%llu slot=%d, %pV",
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
65
		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, &vaf);
66
67
68
	va_end(args);
}

69
70
71
72
/*
 * Customized reporter for extent data item, since its key objectid and
 * offset has its own meaning.
 */
73
__printf(3, 4)
74
__cold
75
static void file_extent_err(const struct extent_buffer *eb, int slot,
76
77
			    const char *fmt, ...)
{
78
	const struct btrfs_fs_info *fs_info = eb->fs_info;
79
80
81
82
83
84
85
86
87
88
	struct btrfs_key key;
	struct va_format vaf;
	va_list args;

	btrfs_item_key_to_cpu(eb, &key, slot);
	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

89
	btrfs_crit(fs_info,
90
	"corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
91
92
93
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
		key.objectid, key.offset, &vaf);
94
95
96
97
98
99
100
	va_end(args);
}

/*
 * Return 0 if the btrfs_file_extent_##name is aligned to @alignment
 * Else return 1
 */
101
#define CHECK_FE_ALIGNED(leaf, slot, fi, name, alignment)		      \
102
103
({									      \
	if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \
104
		file_extent_err((leaf), (slot),				      \
105
106
107
108
109
110
	"invalid %s for file extent, have %llu, should be aligned to %u",     \
			(#name), btrfs_file_extent_##name((leaf), (fi)),      \
			(alignment));					      \
	(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment)));   \
})

111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
static u64 file_extent_end(struct extent_buffer *leaf,
			   struct btrfs_key *key,
			   struct btrfs_file_extent_item *extent)
{
	u64 end;
	u64 len;

	if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) {
		len = btrfs_file_extent_ram_bytes(leaf, extent);
		end = ALIGN(key->offset + len, leaf->fs_info->sectorsize);
	} else {
		len = btrfs_file_extent_num_bytes(leaf, extent);
		end = key->offset + len;
	}
	return end;
}

128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
/*
 * Customized report for dir_item, the only new important information is
 * key->objectid, which represents inode number
 */
__printf(3, 4)
__cold
static void dir_item_err(const struct extent_buffer *eb, int slot,
			 const char *fmt, ...)
{
	const struct btrfs_fs_info *fs_info = eb->fs_info;
	struct btrfs_key key;
	struct va_format vaf;
	va_list args;

	btrfs_item_key_to_cpu(eb, &key, slot);
	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

	btrfs_crit(fs_info,
		"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
		key.objectid, &vaf);
	va_end(args);
}

/*
 * This functions checks prev_key->objectid, to ensure current key and prev_key
 * share the same objectid as inode number.
 *
 * This is to detect missing INODE_ITEM in subvolume trees.
 *
 * Return true if everything is OK or we don't need to check.
 * Return false if anything is wrong.
 */
static bool check_prev_ino(struct extent_buffer *leaf,
			   struct btrfs_key *key, int slot,
			   struct btrfs_key *prev_key)
{
	/* No prev key, skip check */
	if (slot == 0)
		return true;

	/* Only these key->types needs to be checked */
	ASSERT(key->type == BTRFS_XATTR_ITEM_KEY ||
	       key->type == BTRFS_INODE_REF_KEY ||
	       key->type == BTRFS_DIR_INDEX_KEY ||
	       key->type == BTRFS_DIR_ITEM_KEY ||
	       key->type == BTRFS_EXTENT_DATA_KEY);

	/*
	 * Only subvolume trees along with their reloc trees need this check.
	 * Things like log tree doesn't follow this ino requirement.
	 */
	if (!is_fstree(btrfs_header_owner(leaf)))
		return true;

	if (key->objectid == prev_key->objectid)
		return true;

	/* Error found */
	dir_item_err(leaf, slot,
		"invalid previous key objectid, have %llu expect %llu",
		prev_key->objectid, key->objectid);
	return false;
}
196
static int check_extent_data_item(struct extent_buffer *leaf,
197
198
				  struct btrfs_key *key, int slot,
				  struct btrfs_key *prev_key)
199
{
200
	struct btrfs_fs_info *fs_info = leaf->fs_info;
201
	struct btrfs_file_extent_item *fi;
202
	u32 sectorsize = fs_info->sectorsize;
203
	u32 item_size = btrfs_item_size_nr(leaf, slot);
204
	u64 extent_end;
205
206

	if (!IS_ALIGNED(key->offset, sectorsize)) {
207
		file_extent_err(leaf, slot,
208
209
"unaligned file_offset for file extent, have %llu should be aligned to %u",
			key->offset, sectorsize);
210
211
212
		return -EUCLEAN;
	}

213
214
215
216
217
218
	/*
	 * Previous key must have the same key->objectid (ino).
	 * It can be XATTR_ITEM, INODE_ITEM or just another EXTENT_DATA.
	 * But if objectids mismatch, it means we have a missing
	 * INODE_ITEM.
	 */
219
	if (!check_prev_ino(leaf, key, slot, prev_key))
220
221
		return -EUCLEAN;

222
223
	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);

224
225
226
227
228
229
	/*
	 * Make sure the item contains at least inline header, so the file
	 * extent type is not some garbage.
	 */
	if (item_size < BTRFS_FILE_EXTENT_INLINE_DATA_START) {
		file_extent_err(leaf, slot,
230
				"invalid item size, have %u expect [%zu, %u)",
231
232
233
234
				item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START,
				SZ_4K);
		return -EUCLEAN;
	}
235
	if (btrfs_file_extent_type(leaf, fi) >= BTRFS_NR_FILE_EXTENT_TYPES) {
236
		file_extent_err(leaf, slot,
237
238
		"invalid type for file extent, have %u expect range [0, %u]",
			btrfs_file_extent_type(leaf, fi),
239
			BTRFS_NR_FILE_EXTENT_TYPES - 1);
240
241
242
243
		return -EUCLEAN;
	}

	/*
244
	 * Support for new compression/encryption must introduce incompat flag,
245
246
	 * and must be caught in open_ctree().
	 */
247
	if (btrfs_file_extent_compression(leaf, fi) >= BTRFS_NR_COMPRESS_TYPES) {
248
		file_extent_err(leaf, slot,
249
250
	"invalid compression for file extent, have %u expect range [0, %u]",
			btrfs_file_extent_compression(leaf, fi),
251
			BTRFS_NR_COMPRESS_TYPES - 1);
252
253
254
		return -EUCLEAN;
	}
	if (btrfs_file_extent_encryption(leaf, fi)) {
255
		file_extent_err(leaf, slot,
256
257
			"invalid encryption for file extent, have %u expect 0",
			btrfs_file_extent_encryption(leaf, fi));
258
259
260
261
262
		return -EUCLEAN;
	}
	if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
		/* Inline extent must have 0 as key offset */
		if (key->offset) {
263
			file_extent_err(leaf, slot,
264
265
		"invalid file_offset for inline file extent, have %llu expect 0",
				key->offset);
266
267
268
269
270
271
272
273
274
275
276
			return -EUCLEAN;
		}

		/* Compressed inline extent has no on-disk size, skip it */
		if (btrfs_file_extent_compression(leaf, fi) !=
		    BTRFS_COMPRESS_NONE)
			return 0;

		/* Uncompressed inline extent size must match item size */
		if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
		    btrfs_file_extent_ram_bytes(leaf, fi)) {
277
			file_extent_err(leaf, slot,
278
279
280
	"invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
				item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
				btrfs_file_extent_ram_bytes(leaf, fi));
281
282
283
284
285
286
287
			return -EUCLEAN;
		}
		return 0;
	}

	/* Regular or preallocated extent has fixed item size */
	if (item_size != sizeof(*fi)) {
288
		file_extent_err(leaf, slot,
289
	"invalid item size for reg/prealloc file extent, have %u expect %zu",
290
			item_size, sizeof(*fi));
291
292
		return -EUCLEAN;
	}
293
294
295
296
297
	if (CHECK_FE_ALIGNED(leaf, slot, fi, ram_bytes, sectorsize) ||
	    CHECK_FE_ALIGNED(leaf, slot, fi, disk_bytenr, sectorsize) ||
	    CHECK_FE_ALIGNED(leaf, slot, fi, disk_num_bytes, sectorsize) ||
	    CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) ||
	    CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize))
298
		return -EUCLEAN;
299

300
301
302
303
304
305
306
307
308
309
	/* Catch extent end overflow */
	if (check_add_overflow(btrfs_file_extent_num_bytes(leaf, fi),
			       key->offset, &extent_end)) {
		file_extent_err(leaf, slot,
	"extent end overflow, have file offset %llu extent num bytes %llu",
				key->offset,
				btrfs_file_extent_num_bytes(leaf, fi));
		return -EUCLEAN;
	}

310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
	/*
	 * Check that no two consecutive file extent items, in the same leaf,
	 * present ranges that overlap each other.
	 */
	if (slot > 0 &&
	    prev_key->objectid == key->objectid &&
	    prev_key->type == BTRFS_EXTENT_DATA_KEY) {
		struct btrfs_file_extent_item *prev_fi;
		u64 prev_end;

		prev_fi = btrfs_item_ptr(leaf, slot - 1,
					 struct btrfs_file_extent_item);
		prev_end = file_extent_end(leaf, prev_key, prev_fi);
		if (prev_end > key->offset) {
			file_extent_err(leaf, slot - 1,
"file extent end range (%llu) goes beyond start offset (%llu) of the next file extent",
					prev_end, key->offset);
			return -EUCLEAN;
		}
	}

331
332
333
	return 0;
}

334
static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
335
			   int slot, struct btrfs_key *prev_key)
336
{
337
	struct btrfs_fs_info *fs_info = leaf->fs_info;
338
339
	u32 sectorsize = fs_info->sectorsize;
	u32 csumsize = btrfs_super_csum_size(fs_info->super_copy);
340
341

	if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
342
		generic_err(leaf, slot,
343
344
		"invalid key objectid for csum item, have %llu expect %llu",
			key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
345
346
347
		return -EUCLEAN;
	}
	if (!IS_ALIGNED(key->offset, sectorsize)) {
348
		generic_err(leaf, slot,
349
350
	"unaligned key offset for csum item, have %llu should be aligned to %u",
			key->offset, sectorsize);
351
352
353
		return -EUCLEAN;
	}
	if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
354
		generic_err(leaf, slot,
355
356
	"unaligned item size for csum item, have %u should be aligned to %u",
			btrfs_item_size_nr(leaf, slot), csumsize);
357
358
		return -EUCLEAN;
	}
359
360
361
362
363
364
365
366
367
368
369
370
371
372
	if (slot > 0 && prev_key->type == BTRFS_EXTENT_CSUM_KEY) {
		u64 prev_csum_end;
		u32 prev_item_size;

		prev_item_size = btrfs_item_size_nr(leaf, slot - 1);
		prev_csum_end = (prev_item_size / csumsize) * sectorsize;
		prev_csum_end += prev_key->offset;
		if (prev_csum_end > key->offset) {
			generic_err(leaf, slot - 1,
"csum end range (%llu) goes beyond the start range (%llu) of the next csum item",
				    prev_csum_end, key->offset);
			return -EUCLEAN;
		}
	}
373
374
375
	return 0;
}

376
static int check_dir_item(struct extent_buffer *leaf,
377
378
			  struct btrfs_key *key, struct btrfs_key *prev_key,
			  int slot)
379
{
380
	struct btrfs_fs_info *fs_info = leaf->fs_info;
381
382
383
384
	struct btrfs_dir_item *di;
	u32 item_size = btrfs_item_size_nr(leaf, slot);
	u32 cur = 0;

385
	if (!check_prev_ino(leaf, key, slot, prev_key))
386
		return -EUCLEAN;
387
388
389
390
391
392
393
394
395
396
397
	di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
	while (cur < item_size) {
		u32 name_len;
		u32 data_len;
		u32 max_name_len;
		u32 total_size;
		u32 name_hash;
		u8 dir_type;

		/* header itself should not cross item boundary */
		if (cur + sizeof(*di) > item_size) {
398
			dir_item_err(leaf, slot,
399
		"dir item header crosses item boundary, have %zu boundary %u",
400
401
402
403
404
405
406
				cur + sizeof(*di), item_size);
			return -EUCLEAN;
		}

		/* dir type check */
		dir_type = btrfs_dir_type(leaf, di);
		if (dir_type >= BTRFS_FT_MAX) {
407
			dir_item_err(leaf, slot,
408
409
410
411
412
413
414
			"invalid dir item type, have %u expect [0, %u)",
				dir_type, BTRFS_FT_MAX);
			return -EUCLEAN;
		}

		if (key->type == BTRFS_XATTR_ITEM_KEY &&
		    dir_type != BTRFS_FT_XATTR) {
415
			dir_item_err(leaf, slot,
416
417
418
419
420
421
		"invalid dir item type for XATTR key, have %u expect %u",
				dir_type, BTRFS_FT_XATTR);
			return -EUCLEAN;
		}
		if (dir_type == BTRFS_FT_XATTR &&
		    key->type != BTRFS_XATTR_ITEM_KEY) {
422
			dir_item_err(leaf, slot,
423
424
425
426
427
428
429
430
431
432
433
434
			"xattr dir type found for non-XATTR key");
			return -EUCLEAN;
		}
		if (dir_type == BTRFS_FT_XATTR)
			max_name_len = XATTR_NAME_MAX;
		else
			max_name_len = BTRFS_NAME_LEN;

		/* Name/data length check */
		name_len = btrfs_dir_name_len(leaf, di);
		data_len = btrfs_dir_data_len(leaf, di);
		if (name_len > max_name_len) {
435
			dir_item_err(leaf, slot,
436
437
438
439
			"dir item name len too long, have %u max %u",
				name_len, max_name_len);
			return -EUCLEAN;
		}
440
		if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info)) {
441
			dir_item_err(leaf, slot,
442
443
			"dir item name and data len too long, have %u max %u",
				name_len + data_len,
444
				BTRFS_MAX_XATTR_SIZE(fs_info));
445
446
447
448
			return -EUCLEAN;
		}

		if (data_len && dir_type != BTRFS_FT_XATTR) {
449
			dir_item_err(leaf, slot,
450
451
452
453
454
455
456
457
458
			"dir item with invalid data len, have %u expect 0",
				data_len);
			return -EUCLEAN;
		}

		total_size = sizeof(*di) + name_len + data_len;

		/* header and name/data should not cross item boundary */
		if (cur + total_size > item_size) {
459
			dir_item_err(leaf, slot,
460
461
462
463
464
465
466
467
468
469
470
		"dir item data crosses item boundary, have %u boundary %u",
				cur + total_size, item_size);
			return -EUCLEAN;
		}

		/*
		 * Special check for XATTR/DIR_ITEM, as key->offset is name
		 * hash, should match its name
		 */
		if (key->type == BTRFS_DIR_ITEM_KEY ||
		    key->type == BTRFS_XATTR_ITEM_KEY) {
471
472
			char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];

473
474
475
476
			read_extent_buffer(leaf, namebuf,
					(unsigned long)(di + 1), name_len);
			name_hash = btrfs_name_hash(namebuf, name_len);
			if (key->offset != name_hash) {
477
				dir_item_err(leaf, slot,
478
479
480
481
482
483
484
485
486
487
488
		"name hash mismatch with key, have 0x%016x expect 0x%016llx",
					name_hash, key->offset);
				return -EUCLEAN;
			}
		}
		cur += total_size;
		di = (struct btrfs_dir_item *)((void *)di + total_size);
	}
	return 0;
}

489
__printf(3, 4)
490
__cold
491
static void block_group_err(const struct extent_buffer *eb, int slot,
492
493
			    const char *fmt, ...)
{
494
	const struct btrfs_fs_info *fs_info = eb->fs_info;
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
	struct btrfs_key key;
	struct va_format vaf;
	va_list args;

	btrfs_item_key_to_cpu(eb, &key, slot);
	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

	btrfs_crit(fs_info,
	"corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
		key.objectid, key.offset, &vaf);
	va_end(args);
}

513
static int check_block_group_item(struct extent_buffer *leaf,
514
515
516
517
518
519
520
521
522
				  struct btrfs_key *key, int slot)
{
	struct btrfs_block_group_item bgi;
	u32 item_size = btrfs_item_size_nr(leaf, slot);
	u64 flags;
	u64 type;

	/*
	 * Here we don't really care about alignment since extent allocator can
523
	 * handle it.  We care more about the size.
524
	 */
525
	if (key->offset == 0) {
526
		block_group_err(leaf, slot,
527
				"invalid block group size 0");
528
529
530
531
		return -EUCLEAN;
	}

	if (item_size != sizeof(bgi)) {
532
		block_group_err(leaf, slot,
533
534
535
536
537
538
539
			"invalid item size, have %u expect %zu",
				item_size, sizeof(bgi));
		return -EUCLEAN;
	}

	read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
			   sizeof(bgi));
540
	if (btrfs_stack_block_group_chunk_objectid(&bgi) !=
541
	    BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
542
		block_group_err(leaf, slot,
543
		"invalid block group chunk objectid, have %llu expect %llu",
544
				btrfs_stack_block_group_chunk_objectid(&bgi),
545
546
547
548
				BTRFS_FIRST_CHUNK_TREE_OBJECTID);
		return -EUCLEAN;
	}

549
	if (btrfs_stack_block_group_used(&bgi) > key->offset) {
550
		block_group_err(leaf, slot,
551
			"invalid block group used, have %llu expect [0, %llu)",
552
				btrfs_stack_block_group_used(&bgi), key->offset);
553
554
555
		return -EUCLEAN;
	}

556
	flags = btrfs_stack_block_group_flags(&bgi);
557
	if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
558
		block_group_err(leaf, slot,
559
560
561
562
563
564
565
566
567
568
569
570
"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
			flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
			hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
		return -EUCLEAN;
	}

	type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
	if (type != BTRFS_BLOCK_GROUP_DATA &&
	    type != BTRFS_BLOCK_GROUP_METADATA &&
	    type != BTRFS_BLOCK_GROUP_SYSTEM &&
	    type != (BTRFS_BLOCK_GROUP_METADATA |
			   BTRFS_BLOCK_GROUP_DATA)) {
571
		block_group_err(leaf, slot,
572
"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
573
574
575
576
577
578
579
			type, hweight64(type),
			BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
			BTRFS_BLOCK_GROUP_SYSTEM,
			BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
		return -EUCLEAN;
	}
	return 0;
580
581
}

582
__printf(4, 5)
583
__cold
584
static void chunk_err(const struct extent_buffer *leaf,
585
586
587
		      const struct btrfs_chunk *chunk, u64 logical,
		      const char *fmt, ...)
{
588
	const struct btrfs_fs_info *fs_info = leaf->fs_info;
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
	bool is_sb;
	struct va_format vaf;
	va_list args;
	int i;
	int slot = -1;

	/* Only superblock eb is able to have such small offset */
	is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET);

	if (!is_sb) {
		/*
		 * Get the slot number by iterating through all slots, this
		 * would provide better readability.
		 */
		for (i = 0; i < btrfs_header_nritems(leaf); i++) {
			if (btrfs_item_ptr_offset(leaf, i) ==
					(unsigned long)chunk) {
				slot = i;
				break;
			}
		}
	}
	va_start(args, fmt);
	vaf.fmt = fmt;
	vaf.va = &args;

	if (is_sb)
		btrfs_crit(fs_info,
		"corrupt superblock syschunk array: chunk_start=%llu, %pV",
			   logical, &vaf);
	else
		btrfs_crit(fs_info,
	"corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV",
			   BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot,
			   logical, &vaf);
	va_end(args);
}

627
628
629
/*
 * The common chunk check which could also work on super block sys chunk array.
 *
630
 * Return -EUCLEAN if anything is corrupted.
631
632
 * Return 0 if everything is OK.
 */
633
int btrfs_check_chunk_valid(struct extent_buffer *leaf,
634
635
			    struct btrfs_chunk *chunk, u64 logical)
{
636
	struct btrfs_fs_info *fs_info = leaf->fs_info;
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
	u64 length;
	u64 stripe_len;
	u16 num_stripes;
	u16 sub_stripes;
	u64 type;
	u64 features;
	bool mixed = false;

	length = btrfs_chunk_length(leaf, chunk);
	stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
	sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
	type = btrfs_chunk_type(leaf, chunk);

	if (!num_stripes) {
652
		chunk_err(leaf, chunk, logical,
653
			  "invalid chunk num_stripes, have %u", num_stripes);
654
		return -EUCLEAN;
655
656
	}
	if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
657
		chunk_err(leaf, chunk, logical,
658
659
		"invalid chunk logical, have %llu should aligned to %u",
			  logical, fs_info->sectorsize);
660
		return -EUCLEAN;
661
662
	}
	if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) {
663
		chunk_err(leaf, chunk, logical,
664
665
666
			  "invalid chunk sectorsize, have %u expect %u",
			  btrfs_chunk_sector_size(leaf, chunk),
			  fs_info->sectorsize);
667
		return -EUCLEAN;
668
669
	}
	if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) {
670
		chunk_err(leaf, chunk, logical,
671
			  "invalid chunk length, have %llu", length);
672
		return -EUCLEAN;
673
674
	}
	if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
675
		chunk_err(leaf, chunk, logical,
676
			  "invalid chunk stripe length: %llu",
677
			  stripe_len);
678
		return -EUCLEAN;
679
680
681
	}
	if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
	    type) {
682
		chunk_err(leaf, chunk, logical,
683
			  "unrecognized chunk type: 0x%llx",
684
685
686
			  ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
			    BTRFS_BLOCK_GROUP_PROFILE_MASK) &
			  btrfs_chunk_type(leaf, chunk));
687
		return -EUCLEAN;
688
689
	}

690
	if (!has_single_bit_set(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
691
	    (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) {
692
		chunk_err(leaf, chunk, logical,
693
694
695
696
		"invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
			  type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
		return -EUCLEAN;
	}
697
	if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
698
		chunk_err(leaf, chunk, logical,
699
700
	"missing chunk type flag, have 0x%llx one bit must be set in 0x%llx",
			  type, BTRFS_BLOCK_GROUP_TYPE_MASK);
701
		return -EUCLEAN;
702
703
704
705
	}

	if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
	    (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
706
		chunk_err(leaf, chunk, logical,
707
708
			  "system chunk with data or metadata type: 0x%llx",
			  type);
709
		return -EUCLEAN;
710
711
712
713
714
715
716
717
718
	}

	features = btrfs_super_incompat_flags(fs_info->super_copy);
	if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
		mixed = true;

	if (!mixed) {
		if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
		    (type & BTRFS_BLOCK_GROUP_DATA)) {
719
			chunk_err(leaf, chunk, logical,
720
			"mixed chunk type in non-mixed mode: 0x%llx", type);
721
			return -EUCLEAN;
722
723
724
725
726
727
728
729
730
		}
	}

	if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
	    (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
	    (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
	    (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
	    (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
	    ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != 1)) {
731
		chunk_err(leaf, chunk, logical,
732
733
734
			"invalid num_stripes:sub_stripes %u:%u for profile %llu",
			num_stripes, sub_stripes,
			type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
735
		return -EUCLEAN;
736
737
738
	}

	return 0;
739
740
}

741
__printf(3, 4)
742
__cold
743
static void dev_item_err(const struct extent_buffer *eb, int slot,
744
745
746
747
748
749
750
751
752
753
754
755
			 const char *fmt, ...)
{
	struct btrfs_key key;
	struct va_format vaf;
	va_list args;

	btrfs_item_key_to_cpu(eb, &key, slot);
	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

756
	btrfs_crit(eb->fs_info,
757
758
759
760
761
762
763
	"corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV",
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
		key.objectid, &vaf);
	va_end(args);
}

764
static int check_dev_item(struct extent_buffer *leaf,
765
766
767
768
769
			  struct btrfs_key *key, int slot)
{
	struct btrfs_dev_item *ditem;

	if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) {
770
		dev_item_err(leaf, slot,
771
772
773
774
775
776
			     "invalid objectid: has=%llu expect=%llu",
			     key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
		return -EUCLEAN;
	}
	ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
	if (btrfs_device_id(leaf, ditem) != key->offset) {
777
		dev_item_err(leaf, slot,
778
779
780
781
782
783
784
785
786
787
788
789
			     "devid mismatch: key has=%llu item has=%llu",
			     key->offset, btrfs_device_id(leaf, ditem));
		return -EUCLEAN;
	}

	/*
	 * For device total_bytes, we don't have reliable way to check it, as
	 * it can be 0 for device removal. Device size check can only be done
	 * by dev extents check.
	 */
	if (btrfs_device_bytes_used(leaf, ditem) >
	    btrfs_device_total_bytes(leaf, ditem)) {
790
		dev_item_err(leaf, slot,
791
792
793
794
795
796
797
798
799
800
801
802
			     "invalid bytes used: have %llu expect [0, %llu]",
			     btrfs_device_bytes_used(leaf, ditem),
			     btrfs_device_total_bytes(leaf, ditem));
		return -EUCLEAN;
	}
	/*
	 * Remaining members like io_align/type/gen/dev_group aren't really
	 * utilized.  Skip them to make later usage of them easier.
	 */
	return 0;
}

803
804
/* Inode item error output has the same format as dir_item_err() */
#define inode_item_err(fs_info, eb, slot, fmt, ...)			\
805
	dir_item_err(eb, slot, fmt, __VA_ARGS__)
806

807
static int check_inode_item(struct extent_buffer *leaf,
808
809
			    struct btrfs_key *key, int slot)
{
810
	struct btrfs_fs_info *fs_info = leaf->fs_info;
811
812
813
814
815
816
817
818
819
	struct btrfs_inode_item *iitem;
	u64 super_gen = btrfs_super_generation(fs_info->super_copy);
	u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
	u32 mode;

	if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID ||
	     key->objectid > BTRFS_LAST_FREE_OBJECTID) &&
	    key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID &&
	    key->objectid != BTRFS_FREE_INO_OBJECTID) {
820
		generic_err(leaf, slot,
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
	"invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
			    key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
			    BTRFS_FIRST_FREE_OBJECTID,
			    BTRFS_LAST_FREE_OBJECTID,
			    BTRFS_FREE_INO_OBJECTID);
		return -EUCLEAN;
	}
	if (key->offset != 0) {
		inode_item_err(fs_info, leaf, slot,
			"invalid key offset: has %llu expect 0",
			key->offset);
		return -EUCLEAN;
	}
	iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);

	/* Here we use super block generation + 1 to handle log tree */
	if (btrfs_inode_generation(leaf, iitem) > super_gen + 1) {
		inode_item_err(fs_info, leaf, slot,
			"invalid inode generation: has %llu expect (0, %llu]",
			       btrfs_inode_generation(leaf, iitem),
			       super_gen + 1);
		return -EUCLEAN;
	}
	/* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
	if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) {
		inode_item_err(fs_info, leaf, slot,
			"invalid inode generation: has %llu expect [0, %llu]",
			       btrfs_inode_transid(leaf, iitem), super_gen + 1);
		return -EUCLEAN;
	}

	/*
	 * For size and nbytes it's better not to be too strict, as for dir
	 * item its size/nbytes can easily get wrong, but doesn't affect
	 * anything in the fs. So here we skip the check.
	 */
	mode = btrfs_inode_mode(leaf, iitem);
	if (mode & ~valid_mask) {
		inode_item_err(fs_info, leaf, slot,
			       "unknown mode bit detected: 0x%x",
			       mode & ~valid_mask);
		return -EUCLEAN;
	}

	/*
866
867
868
	 * S_IFMT is not bit mapped so we can't completely rely on
	 * is_power_of_2/has_single_bit_set, but it can save us from checking
	 * FIFO/CHR/DIR/REG.  Only needs to check BLK, LNK and SOCKS
869
	 */
870
	if (!has_single_bit_set(mode & S_IFMT)) {
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
		if (!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode)) {
			inode_item_err(fs_info, leaf, slot,
			"invalid mode: has 0%o expect valid S_IF* bit(s)",
				       mode & S_IFMT);
			return -EUCLEAN;
		}
	}
	if (S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1) {
		inode_item_err(fs_info, leaf, slot,
		       "invalid nlink: has %u expect no more than 1 for dir",
			btrfs_inode_nlink(leaf, iitem));
		return -EUCLEAN;
	}
	if (btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK) {
		inode_item_err(fs_info, leaf, slot,
			       "unknown flags detected: 0x%llx",
			       btrfs_inode_flags(leaf, iitem) &
			       ~BTRFS_INODE_FLAG_MASK);
		return -EUCLEAN;
	}
	return 0;
}

894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
			   int slot)
{
	struct btrfs_fs_info *fs_info = leaf->fs_info;
	struct btrfs_root_item ri;
	const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY |
				     BTRFS_ROOT_SUBVOL_DEAD;

	/* No such tree id */
	if (key->objectid == 0) {
		generic_err(leaf, slot, "invalid root id 0");
		return -EUCLEAN;
	}

	/*
	 * Some older kernel may create ROOT_ITEM with non-zero offset, so here
	 * we only check offset for reloc tree whose key->offset must be a
	 * valid tree.
	 */
	if (key->objectid == BTRFS_TREE_RELOC_OBJECTID && key->offset == 0) {
		generic_err(leaf, slot, "invalid root id 0 for reloc tree");
		return -EUCLEAN;
	}

	if (btrfs_item_size_nr(leaf, slot) != sizeof(ri)) {
		generic_err(leaf, slot,
			    "invalid root item size, have %u expect %zu",
			    btrfs_item_size_nr(leaf, slot), sizeof(ri));
	}

	read_extent_buffer(leaf, &ri, btrfs_item_ptr_offset(leaf, slot),
			   sizeof(ri));

	/* Generation related */
	if (btrfs_root_generation(&ri) >
	    btrfs_super_generation(fs_info->super_copy) + 1) {
		generic_err(leaf, slot,
			"invalid root generation, have %llu expect (0, %llu]",
			    btrfs_root_generation(&ri),
			    btrfs_super_generation(fs_info->super_copy) + 1);
		return -EUCLEAN;
	}
	if (btrfs_root_generation_v2(&ri) >
	    btrfs_super_generation(fs_info->super_copy) + 1) {
		generic_err(leaf, slot,
		"invalid root v2 generation, have %llu expect (0, %llu]",
			    btrfs_root_generation_v2(&ri),
			    btrfs_super_generation(fs_info->super_copy) + 1);
		return -EUCLEAN;
	}
	if (btrfs_root_last_snapshot(&ri) >
	    btrfs_super_generation(fs_info->super_copy) + 1) {
		generic_err(leaf, slot,
		"invalid root last_snapshot, have %llu expect (0, %llu]",
			    btrfs_root_last_snapshot(&ri),
			    btrfs_super_generation(fs_info->super_copy) + 1);
		return -EUCLEAN;
	}

	/* Alignment and level check */
	if (!IS_ALIGNED(btrfs_root_bytenr(&ri), fs_info->sectorsize)) {
		generic_err(leaf, slot,
		"invalid root bytenr, have %llu expect to be aligned to %u",
			    btrfs_root_bytenr(&ri), fs_info->sectorsize);
		return -EUCLEAN;
	}
	if (btrfs_root_level(&ri) >= BTRFS_MAX_LEVEL) {
		generic_err(leaf, slot,
			    "invalid root level, have %u expect [0, %u]",
			    btrfs_root_level(&ri), BTRFS_MAX_LEVEL - 1);
		return -EUCLEAN;
	}
	if (ri.drop_level >= BTRFS_MAX_LEVEL) {
		generic_err(leaf, slot,
			    "invalid root level, have %u expect [0, %u]",
			    ri.drop_level, BTRFS_MAX_LEVEL - 1);
		return -EUCLEAN;
	}

	/* Flags check */
	if (btrfs_root_flags(&ri) & ~valid_root_flags) {
		generic_err(leaf, slot,
			    "invalid root flags, have 0x%llx expect mask 0x%llx",
			    btrfs_root_flags(&ri), valid_root_flags);
		return -EUCLEAN;
	}
	return 0;
}

983
984
985
986
987
988
989
990
991
992
993
994
995
__printf(3,4)
__cold
static void extent_err(const struct extent_buffer *eb, int slot,
		       const char *fmt, ...)
{
	struct btrfs_key key;
	struct va_format vaf;
	va_list args;
	u64 bytenr;
	u64 len;

	btrfs_item_key_to_cpu(eb, &key, slot);
	bytenr = key.objectid;
996
997
998
	if (key.type == BTRFS_METADATA_ITEM_KEY ||
	    key.type == BTRFS_TREE_BLOCK_REF_KEY ||
	    key.type == BTRFS_SHARED_BLOCK_REF_KEY)
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
		len = eb->fs_info->nodesize;
	else
		len = key.offset;
	va_start(args, fmt);

	vaf.fmt = fmt;
	vaf.va = &args;

	btrfs_crit(eb->fs_info,
	"corrupt %s: block=%llu slot=%d extent bytenr=%llu len=%llu %pV",
		btrfs_header_level(eb) == 0 ? "leaf" : "node",
		eb->start, slot, bytenr, len, &vaf);
	va_end(args);
}

static int check_extent_item(struct extent_buffer *leaf,
			     struct btrfs_key *key, int slot)
{
	struct btrfs_fs_info *fs_info = leaf->fs_info;
	struct btrfs_extent_item *ei;
	bool is_tree_block = false;
	unsigned long ptr;	/* Current pointer inside inline refs */
	unsigned long end;	/* Extent item end */
	const u32 item_size = btrfs_item_size_nr(leaf, slot);
	u64 flags;
	u64 generation;
	u64 total_refs;		/* Total refs in btrfs_extent_item */
	u64 inline_refs = 0;	/* found total inline refs */

	if (key->type == BTRFS_METADATA_ITEM_KEY &&
	    !btrfs_fs_incompat(fs_info, SKINNY_METADATA)) {
		generic_err(leaf, slot,
"invalid key type, METADATA_ITEM type invalid when SKINNY_METADATA feature disabled");
		return -EUCLEAN;
	}
	/* key->objectid is the bytenr for both key types */
	if (!IS_ALIGNED(key->objectid, fs_info->sectorsize)) {
		generic_err(leaf, slot,
		"invalid key objectid, have %llu expect to be aligned to %u",
			   key->objectid, fs_info->sectorsize);
		return -EUCLEAN;
	}

	/* key->offset is tree level for METADATA_ITEM_KEY */
	if (key->type == BTRFS_METADATA_ITEM_KEY &&
	    key->offset >= BTRFS_MAX_LEVEL) {
		extent_err(leaf, slot,
			   "invalid tree level, have %llu expect [0, %u]",
			   key->offset, BTRFS_MAX_LEVEL - 1);
		return -EUCLEAN;
	}

	/*
	 * EXTENT/METADATA_ITEM consists of:
	 * 1) One btrfs_extent_item
	 *    Records the total refs, type and generation of the extent.
	 *
	 * 2) One btrfs_tree_block_info (for EXTENT_ITEM and tree backref only)
	 *    Records the first key and level of the tree block.
	 *
	 * 2) Zero or more btrfs_extent_inline_ref(s)
	 *    Each inline ref has one btrfs_extent_inline_ref shows:
	 *    2.1) The ref type, one of the 4
	 *         TREE_BLOCK_REF	Tree block only
	 *         SHARED_BLOCK_REF	Tree block only
	 *         EXTENT_DATA_REF	Data only
	 *         SHARED_DATA_REF	Data only
	 *    2.2) Ref type specific data
	 *         Either using btrfs_extent_inline_ref::offset, or specific
	 *         data structure.
	 */
	if (item_size < sizeof(*ei)) {
		extent_err(leaf, slot,
			   "invalid item size, have %u expect [%zu, %u)",
			   item_size, sizeof(*ei),
			   BTRFS_LEAF_DATA_SIZE(fs_info));
		return -EUCLEAN;
	}
	end = item_size + btrfs_item_ptr_offset(leaf, slot);

	/* Checks against extent_item */
	ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
	flags = btrfs_extent_flags(leaf, ei);
	total_refs = btrfs_extent_refs(leaf, ei);
	generation = btrfs_extent_generation(leaf, ei);
	if (generation > btrfs_super_generation(fs_info->super_copy) + 1) {
		extent_err(leaf, slot,
			   "invalid generation, have %llu expect (0, %llu]",
			   generation,
			   btrfs_super_generation(fs_info->super_copy) + 1);
		return -EUCLEAN;
	}
1091
1092
	if (!has_single_bit_set(flags & (BTRFS_EXTENT_FLAG_DATA |
					 BTRFS_EXTENT_FLAG_TREE_BLOCK))) {
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
		extent_err(leaf, slot,
		"invalid extent flag, have 0x%llx expect 1 bit set in 0x%llx",
			flags, BTRFS_EXTENT_FLAG_DATA |
			BTRFS_EXTENT_FLAG_TREE_BLOCK);
		return -EUCLEAN;
	}
	is_tree_block = !!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK);
	if (is_tree_block) {
		if (key->type == BTRFS_EXTENT_ITEM_KEY &&
		    key->offset != fs_info->nodesize) {
			extent_err(leaf, slot,
				   "invalid extent length, have %llu expect %u",
				   key->offset, fs_info->nodesize);
			return -EUCLEAN;
		}
	} else {
		if (key->type != BTRFS_EXTENT_ITEM_KEY) {
			extent_err(leaf, slot,
			"invalid key type, have %u expect %u for data backref",
				   key->type, BTRFS_EXTENT_ITEM_KEY);
			return -EUCLEAN;
		}
		if (!IS_ALIGNED(key->offset, fs_info->sectorsize)) {
			extent_err(leaf, slot,
			"invalid extent length, have %llu expect aligned to %u",
				   key->offset, fs_info->sectorsize);
			return -EUCLEAN;
		}
	}
	ptr = (unsigned long)(struct btrfs_extent_item *)(ei + 1);

	/* Check the special case of btrfs_tree_block_info */
	if (is_tree_block && key->type != BTRFS_METADATA_ITEM_KEY) {
		struct btrfs_tree_block_info *info;

		info = (struct btrfs_tree_block_info *)ptr;
		if (btrfs_tree_block_level(leaf, info) >= BTRFS_MAX_LEVEL) {
			extent_err(leaf, slot,
			"invalid tree block info level, have %u expect [0, %u]",
				   btrfs_tree_block_level(leaf, info),
				   BTRFS_MAX_LEVEL - 1);
			return -EUCLEAN;
		}
		ptr = (unsigned long)(struct btrfs_tree_block_info *)(info + 1);
	}

	/* Check inline refs */
	while (ptr < end) {
		struct btrfs_extent_inline_ref *iref;
		struct btrfs_extent_data_ref *dref;
		struct btrfs_shared_data_ref *sref;
		u64 dref_offset;
		u64 inline_offset;
		u8 inline_type;

		if (ptr + sizeof(*iref) > end) {
			extent_err(leaf, slot,
"inline ref item overflows extent item, ptr %lu iref size %zu end %lu",
				   ptr, sizeof(*iref), end);
			return -EUCLEAN;
		}
		iref = (struct btrfs_extent_inline_ref *)ptr;
		inline_type = btrfs_extent_inline_ref_type(leaf, iref);
		inline_offset = btrfs_extent_inline_ref_offset(leaf, iref);
		if (ptr + btrfs_extent_inline_ref_size(inline_type) > end) {
			extent_err(leaf, slot,
"inline ref item overflows extent item, ptr %lu iref size %u end %lu",
				   ptr, inline_type, end);
			return -EUCLEAN;
		}

		switch (inline_type) {
		/* inline_offset is subvolid of the owner, no need to check */
		case BTRFS_TREE_BLOCK_REF_KEY:
			inline_refs++;
			break;
		/* Contains parent bytenr */
		case BTRFS_SHARED_BLOCK_REF_KEY:
			if (!IS_ALIGNED(inline_offset, fs_info->sectorsize)) {
				extent_err(leaf, slot,
		"invalid tree parent bytenr, have %llu expect aligned to %u",
					   inline_offset, fs_info->sectorsize);
				return -EUCLEAN;
			}
			inline_refs++;
			break;
		/*
		 * Contains owner subvolid, owner key objectid, adjusted offset.
		 * The only obvious corruption can happen in that offset.
		 */
		case BTRFS_EXTENT_DATA_REF_KEY:
			dref = (struct btrfs_extent_data_ref *)(&iref->offset);
			dref_offset = btrfs_extent_data_ref_offset(leaf, dref);
			if (!IS_ALIGNED(dref_offset, fs_info->sectorsize)) {
				extent_err(leaf, slot,
		"invalid data ref offset, have %llu expect aligned to %u",
					   dref_offset, fs_info->sectorsize);
				return -EUCLEAN;
			}
			inline_refs += btrfs_extent_data_ref_count(leaf, dref);
			break;
		/* Contains parent bytenr and ref count */
		case BTRFS_SHARED_DATA_REF_KEY:
			sref = (struct btrfs_shared_data_ref *)(iref + 1);
			if (!IS_ALIGNED(inline_offset, fs_info->sectorsize)) {
				extent_err(leaf, slot,
		"invalid data parent bytenr, have %llu expect aligned to %u",
					   inline_offset, fs_info->sectorsize);
				return -EUCLEAN;
			}
			inline_refs += btrfs_shared_data_ref_count(leaf, sref);
			break;
		default:
			extent_err(leaf, slot, "unknown inline ref type: %u",
				   inline_type);
			return -EUCLEAN;
		}
		ptr += btrfs_extent_inline_ref_size(inline_type);
	}
	/* No padding is allowed */
	if (ptr != end) {
		extent_err(leaf, slot,
			   "invalid extent item size, padding bytes found");
		return -EUCLEAN;
	}

	/* Finally, check the inline refs against total refs */
	if (inline_refs > total_refs) {
		extent_err(leaf, slot,
			"invalid extent refs, have %llu expect >= inline %llu",
			   total_refs, inline_refs);
		return -EUCLEAN;
	}
	return 0;
}

1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
static int check_simple_keyed_refs(struct extent_buffer *leaf,
				   struct btrfs_key *key, int slot)
{
	u32 expect_item_size = 0;

	if (key->type == BTRFS_SHARED_DATA_REF_KEY)
		expect_item_size = sizeof(struct btrfs_shared_data_ref);

	if (btrfs_item_size_nr(leaf, slot) != expect_item_size) {
		generic_err(leaf, slot,
		"invalid item size, have %u expect %u for key type %u",
			    btrfs_item_size_nr(leaf, slot),
			    expect_item_size, key->type);
		return -EUCLEAN;
	}
	if (!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize)) {
		generic_err(leaf, slot,
"invalid key objectid for shared block ref, have %llu expect aligned to %u",
			    key->objectid, leaf->fs_info->sectorsize);
		return -EUCLEAN;
	}
	if (key->type != BTRFS_TREE_BLOCK_REF_KEY &&
	    !IS_ALIGNED(key->offset, leaf->fs_info->sectorsize)) {
		extent_err(leaf, slot,
		"invalid tree parent bytenr, have %llu expect aligned to %u",
			   key->offset, leaf->fs_info->sectorsize);
		return -EUCLEAN;
	}
	return 0;
}

1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
static int check_extent_data_ref(struct extent_buffer *leaf,
				 struct btrfs_key *key, int slot)
{
	struct btrfs_extent_data_ref *dref;
	unsigned long ptr = btrfs_item_ptr_offset(leaf, slot);
	const unsigned long end = ptr + btrfs_item_size_nr(leaf, slot);

	if (btrfs_item_size_nr(leaf, slot) % sizeof(*dref) != 0) {
		generic_err(leaf, slot,
	"invalid item size, have %u expect aligned to %zu for key type %u",
			    btrfs_item_size_nr(leaf, slot),
			    sizeof(*dref), key->type);
	}
	if (!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize)) {
		generic_err(leaf, slot,
"invalid key objectid for shared block ref, have %llu expect aligned to %u",
			    key->objectid, leaf->fs_info->sectorsize);
		return -EUCLEAN;
	}
	for (; ptr < end; ptr += sizeof(*dref)) {
		u64 root_objectid;
		u64 owner;
		u64 offset;
		u64 hash;

		dref = (struct btrfs_extent_data_ref *)ptr;
		root_objectid = btrfs_extent_data_ref_root(leaf, dref);
		owner = btrfs_extent_data_ref_objectid(leaf, dref);
		offset = btrfs_extent_data_ref_offset(leaf, dref);
		hash = hash_extent_data_ref(root_objectid, owner, offset);
		if (hash != key->offset) {
			extent_err(leaf, slot,
	"invalid extent data ref hash, item has 0x%016llx key has 0x%016llx",
				   hash, key->offset);
			return -EUCLEAN;
		}
		if (!IS_ALIGNED(offset, leaf->fs_info->sectorsize)) {
			extent_err(leaf, slot,
	"invalid extent data backref offset, have %llu expect aligned to %u",
				   offset, leaf->fs_info->sectorsize);
		}
	}
	return 0;
}

1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
#define inode_ref_err(fs_info, eb, slot, fmt, args...)			\
	inode_item_err(fs_info, eb, slot, fmt, ##args)
static int check_inode_ref(struct extent_buffer *leaf,
			   struct btrfs_key *key, struct btrfs_key *prev_key,
			   int slot)
{
	struct btrfs_inode_ref *iref;
	unsigned long ptr;
	unsigned long end;

1315
1316
	if (!check_prev_ino(leaf, key, slot, prev_key))
		return -EUCLEAN;
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
	/* namelen can't be 0, so item_size == sizeof() is also invalid */
	if (btrfs_item_size_nr(leaf, slot) <= sizeof(*iref)) {
		inode_ref_err(fs_info, leaf, slot,
			"invalid item size, have %u expect (%zu, %u)",
			btrfs_item_size_nr(leaf, slot),
			sizeof(*iref), BTRFS_LEAF_DATA_SIZE(leaf->fs_info));
		return -EUCLEAN;
	}

	ptr = btrfs_item_ptr_offset(leaf, slot);
	end = ptr + btrfs_item_size_nr(leaf, slot);
	while (ptr < end) {
		u16 namelen;

		if (ptr + sizeof(iref) > end) {
			inode_ref_err(fs_info, leaf, slot,
			"inode ref overflow, ptr %lu end %lu inode_ref_size %zu",
				ptr, end, sizeof(iref));
			return -EUCLEAN;
		}

		iref = (struct btrfs_inode_ref *)ptr;
		namelen = btrfs_inode_ref_name_len(leaf, iref);
		if (ptr + sizeof(*iref) + namelen > end) {
			inode_ref_err(fs_info, leaf, slot,
				"inode ref overflow, ptr %lu end %lu namelen %u",
				ptr, end, namelen);
			return -EUCLEAN;
		}

		/*
		 * NOTE: In theory we should record all found index numbers
		 * to find any duplicated indexes, but that will be too time
		 * consuming for inodes with too many hard links.
		 */
		ptr += sizeof(*iref) + namelen;
	}
	return 0;
}

1357
1358
1359
/*
 * Common point to switch the item-specific validation.
 */
1360
static int check_leaf_item(struct extent_buffer *leaf,
1361
1362
			   struct btrfs_key *key, int slot,
			   struct btrfs_key *prev_key)
1363
1364
{
	int ret = 0;
1365
	struct btrfs_chunk *chunk;
1366
1367
1368

	switch (key->type) {
	case BTRFS_EXTENT_DATA_KEY:
1369
		ret = check_extent_data_item(leaf, key, slot, prev_key);
1370
1371
		break;
	case BTRFS_EXTENT_CSUM_KEY:
1372
		ret = check_csum_item(leaf, key, slot, prev_key);
1373
		break;
1374
1375
1376
	case BTRFS_DIR_ITEM_KEY:
	case BTRFS_DIR_INDEX_KEY:
	case BTRFS_XATTR_ITEM_KEY:
1377
		ret = check_dir_item(leaf, key, prev_key, slot);
1378
		break;
1379
1380
1381
	case BTRFS_INODE_REF_KEY:
		ret = check_inode_ref(leaf, key, prev_key, slot);
		break;
1382
	case BTRFS_BLOCK_GROUP_ITEM_KEY:
1383
		ret = check_block_group_item(leaf, key, slot);
1384
		break;
1385
1386
	case BTRFS_CHUNK_ITEM_KEY:
		chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
1387
		ret = btrfs_check_chunk_valid(leaf, chunk, key->offset);
1388
		break;
1389
	case BTRFS_DEV_ITEM_KEY:
1390
		ret = check_dev_item(leaf, key, slot);
1391
		break;
1392
	case BTRFS_INODE_ITEM_KEY:
1393
		ret = check_inode_item(leaf, key, slot);
1394
		break;
1395
1396
1397
	case BTRFS_ROOT_ITEM_KEY:
		ret = check_root_item(leaf, key, slot);
		break;
1398
1399
1400
1401
	case BTRFS_EXTENT_ITEM_KEY:
	case BTRFS_METADATA_ITEM_KEY:
		ret = check_extent_item(leaf, key, slot);
		break;
1402
1403
1404
1405
1406
	case BTRFS_TREE_BLOCK_REF_KEY:
	case BTRFS_SHARED_DATA_REF_KEY:
	case BTRFS_SHARED_BLOCK_REF_KEY:
		ret = check_simple_keyed_refs(leaf, key, slot);
		break;
1407
1408
1409
	case BTRFS_EXTENT_DATA_REF_KEY:
		ret = check_extent_data_ref(leaf, key, slot);
		break;
1410
1411
1412
1413
	}
	return ret;
}

1414
static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
1415
{
1416
	struct btrfs_fs_info *fs_info = leaf->fs_info;
1417
1418
1419
1420
1421
1422
	/* No valid key type is 0, so all key should be larger than this key */
	struct btrfs_key prev_key = {0, 0, 0};
	struct btrfs_key key;
	u32 nritems = btrfs_header_nritems(leaf);
	int slot;

1423
	if (btrfs_header_level(leaf) != 0) {
1424
		generic_err(leaf, 0,
1425
1426
1427
1428
1429
			"invalid level for leaf, have %d expect 0",
			btrfs_header_level(leaf));
		return -EUCLEAN;
	}

1430
1431
1432
1433
1434
1435
1436
1437
1438
	/*
	 * Extent buffers from a relocation tree have a owner field that
	 * corresponds to the subvolume tree they are based on. So just from an
	 * extent buffer alone we can not find out what is the id of the
	 * corresponding subvolume tree, so we can not figure out if the extent
	 * buffer corresponds to the root of the relocation tree or not. So
	 * skip this check for relocation trees.
	 */
	if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
1439
		u64 owner = btrfs_header_owner(leaf);
1440

1441
1442
1443
1444
1445
1446
1447
		/* These trees must never be empty */
		if (owner == BTRFS_ROOT_TREE_OBJECTID ||
		    owner == BTRFS_CHUNK_TREE_OBJECTID ||
		    owner == BTRFS_EXTENT_TREE_OBJECTID ||
		    owner == BTRFS_DEV_TREE_OBJECTID ||
		    owner == BTRFS_FS_TREE_OBJECTID ||
		    owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
1448
			generic_err(leaf, 0,
1449
1450
1451
1452
			"invalid root, root %llu must never be empty",
				    owner);
			return -EUCLEAN;
		}
1453
1454
1455
1456
1457
1458
		/* Unknown tree */
		if (owner == 0) {
			generic_err(leaf, 0,
				"invalid owner, root 0 is not defined");
			return -EUCLEAN;
		}
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
		return 0;
	}

	if (nritems == 0)
		return 0;

	/*
	 * Check the following things to make sure this is a good leaf, and
	 * leaf users won't need to bother with similar sanity checks:
	 *
	 * 1) key ordering
	 * 2) item offset and size
	 *    No overlap, no hole, all inside the leaf.
	 * 3) item content
	 *    If possible, do comprehensive sanity check.
	 *    NOTE: All checks must only rely on the item data itself.
	 */
	for (slot = 0; slot < nritems; slot++) {
		u32 item_end_expected;
		int ret;

		btrfs_item_key_to_cpu(leaf, &key, slot);

		/* Make sure the keys are in the right order */
		if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
1484
			generic_err(leaf, slot,
1485
1486
1487
1488
	"bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
				prev_key.objectid, prev_key.type,
				prev_key.offset, key.objectid, key.type,
				key.offset);
1489