partition-generic.c 15.9 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
/*
 *  Code extracted from drivers/block/genhd.c
 *  Copyright (C) 1991-1998  Linus Torvalds
 *  Re-organised Feb 1998 Russell King
 *
 *  We now have independent partition support from the
 *  block drivers, which allows all the partition code to
 *  be grouped in one location, and it to be mostly self
 *  contained.
 */

#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/kmod.h>
#include <linux/ctype.h>
#include <linux/genhd.h>
#include <linux/blktrace_api.h>

#include "partitions/check.h"

#ifdef CONFIG_BLK_DEV_MD
extern void md_autodetect_dev(dev_t dev);
#endif
 
/*
 * disk_name() is used by partition check code and the genhd driver.
 * It formats the devicename of the indicated disk into
 * the supplied buffer (of size at least 32), and returns
 * a pointer to that same buffer (for convenience).
 */

char *disk_name(struct gendisk *hd, int partno, char *buf)
{
	if (!partno)
		snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name);
	else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1]))
		snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno);
	else
		snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno);

	return buf;
}

const char *bdevname(struct block_device *bdev, char *buf)
{
	return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf);
}

EXPORT_SYMBOL(bdevname);

54
55
56
57
58
59
const char *bio_devname(struct bio *bio, char *buf)
{
	return disk_name(bio->bi_disk, bio->bi_partno, buf);
}
EXPORT_SYMBOL(bio_devname);

60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
/*
 * There's very little reason to use this, you should really
 * have a struct block_device just about everywhere and use
 * bdevname() instead.
 */
const char *__bdevname(dev_t dev, char *buffer)
{
	scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)",
				MAJOR(dev), MINOR(dev));
	return buffer;
}

EXPORT_SYMBOL(__bdevname);

static ssize_t part_partition_show(struct device *dev,
				   struct device_attribute *attr, char *buf)
{
	struct hd_struct *p = dev_to_part(dev);

	return sprintf(buf, "%d\n", p->partno);
}

static ssize_t part_start_show(struct device *dev,
			       struct device_attribute *attr, char *buf)
{
	struct hd_struct *p = dev_to_part(dev);

	return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect);
}

ssize_t part_size_show(struct device *dev,
		       struct device_attribute *attr, char *buf)
{
	struct hd_struct *p = dev_to_part(dev);
94
	return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p));
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
}

static ssize_t part_ro_show(struct device *dev,
			    struct device_attribute *attr, char *buf)
{
	struct hd_struct *p = dev_to_part(dev);
	return sprintf(buf, "%d\n", p->policy ? 1 : 0);
}

static ssize_t part_alignment_offset_show(struct device *dev,
					  struct device_attribute *attr, char *buf)
{
	struct hd_struct *p = dev_to_part(dev);
	return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
}

static ssize_t part_discard_alignment_show(struct device *dev,
					   struct device_attribute *attr, char *buf)
{
	struct hd_struct *p = dev_to_part(dev);
	return sprintf(buf, "%u\n", p->discard_alignment);
}

ssize_t part_stat_show(struct device *dev,
		       struct device_attribute *attr, char *buf)
{
	struct hd_struct *p = dev_to_part(dev);
122
	struct request_queue *q = part_to_disk(p)->queue;
123
	unsigned int inflight;
124

125
	inflight = part_in_flight(q, p);
126
127
128
	return sprintf(buf,
		"%8lu %8lu %8llu %8u "
		"%8lu %8lu %8llu %8u "
129
		"%8u %8u %8u "
130
131
		"%8lu %8lu %8llu %8u "
		"%8lu %8u"
132
		"\n",
133
134
135
		part_stat_read(p, ios[STAT_READ]),
		part_stat_read(p, merges[STAT_READ]),
		(unsigned long long)part_stat_read(p, sectors[STAT_READ]),
136
		(unsigned int)part_stat_read_msecs(p, STAT_READ),
137
138
139
		part_stat_read(p, ios[STAT_WRITE]),
		part_stat_read(p, merges[STAT_WRITE]),
		(unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
140
		(unsigned int)part_stat_read_msecs(p, STAT_WRITE),
141
		inflight,
142
		jiffies_to_msecs(part_stat_read(p, io_ticks)),
143
144
145
146
		jiffies_to_msecs(part_stat_read(p, time_in_queue)),
		part_stat_read(p, ios[STAT_DISCARD]),
		part_stat_read(p, merges[STAT_DISCARD]),
		(unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
147
148
149
		(unsigned int)part_stat_read_msecs(p, STAT_DISCARD),
		part_stat_read(p, ios[STAT_FLUSH]),
		(unsigned int)part_stat_read_msecs(p, STAT_FLUSH));
150
151
}

152
153
ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
			   char *buf)
154
155
{
	struct hd_struct *p = dev_to_part(dev);
156
157
	struct request_queue *q = part_to_disk(p)->queue;
	unsigned int inflight[2];
158

159
160
	part_in_flight_rw(q, p, inflight);
	return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]);
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
}

#ifdef CONFIG_FAIL_MAKE_REQUEST
ssize_t part_fail_show(struct device *dev,
		       struct device_attribute *attr, char *buf)
{
	struct hd_struct *p = dev_to_part(dev);

	return sprintf(buf, "%d\n", p->make_it_fail);
}

ssize_t part_fail_store(struct device *dev,
			struct device_attribute *attr,
			const char *buf, size_t count)
{
	struct hd_struct *p = dev_to_part(dev);
	int i;

	if (count > 0 && sscanf(buf, "%d", &i) > 0)
		p->make_it_fail = (i == 0) ? 0 : 1;

	return count;
}
#endif

186
187
188
189
190
191
192
193
static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
static DEVICE_ATTR(start, 0444, part_start_show, NULL);
static DEVICE_ATTR(size, 0444, part_size_show, NULL);
static DEVICE_ATTR(ro, 0444, part_ro_show, NULL);
static DEVICE_ATTR(alignment_offset, 0444, part_alignment_offset_show, NULL);
static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL);
static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
194
195
#ifdef CONFIG_FAIL_MAKE_REQUEST
static struct device_attribute dev_attr_fail =
196
	__ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
#endif

static struct attribute *part_attrs[] = {
	&dev_attr_partition.attr,
	&dev_attr_start.attr,
	&dev_attr_size.attr,
	&dev_attr_ro.attr,
	&dev_attr_alignment_offset.attr,
	&dev_attr_discard_alignment.attr,
	&dev_attr_stat.attr,
	&dev_attr_inflight.attr,
#ifdef CONFIG_FAIL_MAKE_REQUEST
	&dev_attr_fail.attr,
#endif
	NULL
};

static struct attribute_group part_attr_group = {
	.attrs = part_attrs,
};

static const struct attribute_group *part_attr_groups[] = {
	&part_attr_group,
#ifdef CONFIG_BLK_DEV_IO_TRACE
	&blk_trace_attr_group,
#endif
	NULL
};

static void part_release(struct device *dev)
{
	struct hd_struct *p = dev_to_part(dev);
229
	blk_free_devt(dev->devt);
230
	hd_free_part(p);
231
232
233
	kfree(p);
}

234
235
236
237
238
239
240
241
242
243
static int part_uevent(struct device *dev, struct kobj_uevent_env *env)
{
	struct hd_struct *part = dev_to_part(dev);

	add_uevent_var(env, "PARTN=%u", part->partno);
	if (part->info && part->info->volname[0])
		add_uevent_var(env, "PARTNAME=%s", part->info->volname);
	return 0;
}

244
245
246
247
struct device_type part_type = {
	.name		= "partition",
	.groups		= part_attr_groups,
	.release	= part_release,
248
	.uevent		= part_uevent,
249
250
};

251
static void delete_partition_work_fn(struct work_struct *work)
252
{
253
254
	struct hd_struct *part = container_of(to_rcu_work(work), struct hd_struct,
					rcu_work);
255
256
257
258
259
260
261

	part->start_sect = 0;
	part->nr_sects = 0;
	part_stat_set_all(part, 0);
	put_device(part_to_dev(part));
}

262
void __delete_partition(struct percpu_ref *ref)
263
{
264
	struct hd_struct *part = container_of(ref, struct hd_struct, ref);
265
266
	INIT_RCU_WORK(&part->rcu_work, delete_partition_work_fn);
	queue_rcu_work(system_wq, &part->rcu_work);
267
268
}

269
270
271
272
/*
 * Must be called either with bd_mutex held, before a disk can be opened or
 * after all disk users are gone.
 */
273
274
void delete_partition(struct gendisk *disk, int partno)
{
275
276
	struct disk_part_tbl *ptbl =
		rcu_dereference_protected(disk->part_tbl, 1);
277
278
279
280
281
	struct hd_struct *part;

	if (partno >= ptbl->len)
		return;

282
	part = rcu_dereference_protected(ptbl->part[partno], 1);
283
284
285
286
287
288
289
290
	if (!part)
		return;

	rcu_assign_pointer(ptbl->part[partno], NULL);
	rcu_assign_pointer(ptbl->last_lookup, NULL);
	kobject_put(part->holder_dir);
	device_del(part_to_dev(part));

291
292
293
294
295
296
297
	/*
	 * Remove gendisk pointer from idr so that it cannot be looked up
	 * while RCU period before freeing gendisk is running to prevent
	 * use-after-free issues. Note that the device number stays
	 * "in-use" until we really free the gendisk.
	 */
	blk_invalidate_devt(part_devt(part));
298
	hd_struct_kill(part);
299
300
301
302
303
304
305
}

static ssize_t whole_disk_show(struct device *dev,
			       struct device_attribute *attr, char *buf)
{
	return 0;
}
306
static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
307

308
309
310
311
/*
 * Must be called either with bd_mutex held, before a disk can be opened or
 * after all disk users are gone.
 */
312
313
314
315
316
317
318
319
320
321
322
323
struct hd_struct *add_partition(struct gendisk *disk, int partno,
				sector_t start, sector_t len, int flags,
				struct partition_meta_info *info)
{
	struct hd_struct *p;
	dev_t devt = MKDEV(0, 0);
	struct device *ddev = disk_to_dev(disk);
	struct device *pdev;
	struct disk_part_tbl *ptbl;
	const char *dname;
	int err;

324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
	/*
	 * Partitions are not supported on zoned block devices that are used as
	 * such.
	 */
	switch (disk->queue->limits.zoned) {
	case BLK_ZONED_HM:
		pr_warn("%s: partitions not supported on host managed zoned block device\n",
			disk->disk_name);
		return ERR_PTR(-ENXIO);
	case BLK_ZONED_HA:
		pr_info("%s: disabling host aware zoned block device support due to partitions\n",
			disk->disk_name);
		disk->queue->limits.zoned = BLK_ZONED_NONE;
		break;
	case BLK_ZONED_NONE:
		break;
	}

342
343
344
	err = disk_expand_part_tbl(disk, partno);
	if (err)
		return ERR_PTR(err);
345
	ptbl = rcu_dereference_protected(disk->part_tbl, 1);
346
347
348
349
350
351
352
353
354
355
356
357

	if (ptbl->part[partno])
		return ERR_PTR(-EBUSY);

	p = kzalloc(sizeof(*p), GFP_KERNEL);
	if (!p)
		return ERR_PTR(-EBUSY);

	if (!init_part_stats(p)) {
		err = -ENOMEM;
		goto out_free;
	}
358
359

	seqcount_init(&p->nr_sects_seq);
360
361
362
363
364
365
366
367
368
369
370
371
372
	pdev = part_to_dev(p);

	p->start_sect = start;
	p->alignment_offset =
		queue_limit_alignment_offset(&disk->queue->limits, start);
	p->discard_alignment =
		queue_limit_discard_alignment(&disk->queue->limits, start);
	p->nr_sects = len;
	p->partno = partno;
	p->policy = get_disk_ro(disk);

	if (info) {
		struct partition_meta_info *pinfo = alloc_part_info(disk);
373
374
		if (!pinfo) {
			err = -ENOMEM;
375
			goto out_free_stats;
376
		}
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
		memcpy(pinfo, info, sizeof(*info));
		p->info = pinfo;
	}

	dname = dev_name(ddev);
	if (isdigit(dname[strlen(dname) - 1]))
		dev_set_name(pdev, "%sp%d", dname, partno);
	else
		dev_set_name(pdev, "%s%d", dname, partno);

	device_initialize(pdev);
	pdev->class = &block_class;
	pdev->type = &part_type;
	pdev->parent = ddev;

	err = blk_alloc_devt(p, &devt);
	if (err)
		goto out_free_info;
	pdev->devt = devt;

	/* delay uevent until 'holders' subdir is created */
	dev_set_uevent_suppress(pdev, 1);
	err = device_add(pdev);
	if (err)
		goto out_put;

	err = -ENOMEM;
	p->holder_dir = kobject_create_and_add("holders", &pdev->kobj);
	if (!p->holder_dir)
		goto out_del;

	dev_set_uevent_suppress(pdev, 0);
	if (flags & ADDPART_FLAG_WHOLEDISK) {
		err = device_create_file(pdev, &dev_attr_whole_disk);
		if (err)
			goto out_del;
	}

415
416
417
418
419
420
421
	err = hd_ref_init(p);
	if (err) {
		if (flags & ADDPART_FLAG_WHOLEDISK)
			goto out_remove_file;
		goto out_del;
	}

422
423
424
425
426
427
	/* everything is up and running, commence */
	rcu_assign_pointer(ptbl->part[partno], p);

	/* suppress uevent if the disk suppresses it */
	if (!dev_get_uevent_suppress(ddev))
		kobject_uevent(&pdev->kobj, KOBJ_ADD);
428
	return p;
429
430
431
432
433
434
435
436

out_free_info:
	free_part_info(p);
out_free_stats:
	free_part_stats(p);
out_free:
	kfree(p);
	return ERR_PTR(err);
437
438
out_remove_file:
	device_remove_file(pdev, &dev_attr_whole_disk);
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
out_del:
	kobject_put(p->holder_dir);
	device_del(pdev);
out_put:
	put_device(pdev);
	return ERR_PTR(err);
}

static bool disk_unlock_native_capacity(struct gendisk *disk)
{
	const struct block_device_operations *bdops = disk->fops;

	if (bdops->unlock_native_capacity &&
	    !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) {
		printk(KERN_CONT "enabling native capacity\n");
		bdops->unlock_native_capacity(disk);
		disk->flags |= GENHD_FL_NATIVE_CAPACITY;
		return true;
	} else {
		printk(KERN_CONT "truncated\n");
		return false;
	}
}

463
int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev)
464
465
466
{
	struct disk_part_iter piter;
	struct hd_struct *part;
467
	int res;
468

469
470
	if (!disk_part_scan_enabled(disk))
		return 0;
471
	if (bdev->bd_part_count || bdev->bd_super)
472
473
474
475
476
477
478
479
480
481
		return -EBUSY;
	res = invalidate_partition(disk, 0);
	if (res)
		return res;

	disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
	while ((part = disk_part_iter_next(&piter)))
		delete_partition(disk, part->partno);
	disk_part_iter_exit(&piter);

482
483
484
	return 0;
}

485
486
static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
		struct parsed_partitions *state, int p)
487
{
488
489
	sector_t size = state->parts[p].size;
	sector_t from = state->parts[p].from;
490
	struct hd_struct *part;
491
492
493
494
495
496
497
498
499
500
501

	if (!size)
		return true;

	if (from >= get_capacity(disk)) {
		printk(KERN_WARNING
		       "%s: p%d start %llu is beyond EOD, ",
		       disk->disk_name, p, (unsigned long long) from);
		if (disk_unlock_native_capacity(disk))
			return false;
		return true;
502
503
	}

504
505
506
507
	if (from + size > get_capacity(disk)) {
		printk(KERN_WARNING
		       "%s: p%d size %llu extends beyond EOD, ",
		       disk->disk_name, p, (unsigned long long) size);
508

509
510
511
512
513
514
515
516
517
518
519
520
521
		if (disk_unlock_native_capacity(disk))
			return false;

		/*
		 * We can not ignore partitions of broken tables created by for
		 * example camera firmware, but we limit them to the end of the
		 * disk to avoid creating invalid block devices.
		 */
		size = get_capacity(disk) - from;
	}

	part = add_partition(disk, p, from, size, state->parts[p].flags,
			     &state->parts[p].info);
522
	if (IS_ERR(part) && PTR_ERR(part) != -ENXIO) {
523
524
525
526
527
528
529
530
531
532
533
534
		printk(KERN_ERR " %s: p%d could not be added: %ld\n",
		       disk->disk_name, p, -PTR_ERR(part));
		return true;
	}

#ifdef CONFIG_BLK_DEV_MD
	if (state->parts[p].flags & ADDPART_FLAG_RAID)
		md_autodetect_dev(part_to_dev(part)->devt);
#endif
	return true;
}

535
int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
536
537
538
{
	struct parsed_partitions *state;
	int ret = -EAGAIN, p, highest;
539

540
541
542
	if (!disk_part_scan_enabled(disk))
		return 0;

543
544
	state = check_partition(disk, bdev);
	if (!state)
545
546
547
		return 0;
	if (IS_ERR(state)) {
		/*
548
549
		 * I/O error reading the partition table.  If we tried to read
		 * beyond EOD, retry after unlocking the native capacity.
550
551
552
553
554
		 */
		if (PTR_ERR(state) == -ENOSPC) {
			printk(KERN_WARNING "%s: partition table beyond EOD, ",
			       disk->disk_name);
			if (disk_unlock_native_capacity(disk))
555
				return -EAGAIN;
556
557
558
		}
		return -EIO;
	}
559

560
	/*
561
	 * Partitions are not supported on host managed zoned block devices.
562
	 */
563
564
	if (disk->queue->limits.zoned == BLK_ZONED_HM) {
		pr_warn("%s: ignoring partition table on host managed zoned block device\n",
565
			disk->disk_name);
566
567
		ret = 0;
		goto out_free_state;
568
569
	}

570
	/*
571
572
573
	 * If we read beyond EOD, try unlocking native capacity even if the
	 * partition table was successfully read as we could be missing some
	 * partitions.
574
575
576
577
578
579
	 */
	if (state->access_beyond_eod) {
		printk(KERN_WARNING
		       "%s: partition table partially beyond EOD, ",
		       disk->disk_name);
		if (disk_unlock_native_capacity(disk))
580
			goto out_free_state;
581
582
583
584
585
	}

	/* tell userspace that the media / partition table may have changed */
	kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);

586
587
588
	/*
	 * Detect the highest partition number and preallocate disk->part_tbl.
	 * This is an optimization and not strictly necessary.
589
590
591
592
593
594
	 */
	for (p = 1, highest = 0; p < state->limit; p++)
		if (state->parts[p].size)
			highest = p;
	disk_expand_part_tbl(disk, highest);

595
596
597
	for (p = 1; p < state->limit; p++)
		if (!blk_add_partition(disk, bdev, state, p))
			goto out_free_state;
598

599
600
	ret = 0;
out_free_state:
601
	free_partitions(state);
602
	return ret;
603
604
}

605
606
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
{
607
	struct address_space *mapping = bdev->bd_inode->i_mapping;
608
609
	struct page *page;

610
	page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), NULL);
611
612
613
614
	if (!IS_ERR(page)) {
		if (PageError(page))
			goto fail;
		p->v = page;
615
		return (unsigned char *)page_address(page) +  ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << 9);
616
fail:
617
		put_page(page);
618
619
620
621
622
623
	}
	p->v = NULL;
	return NULL;
}

EXPORT_SYMBOL(read_dev_sector);