bus.c 30.1 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2
3
4
5
/*
 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
 */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
6
#include <linux/libnvdimm.h>
7
#include <linux/sched/mm.h>
8
#include <linux/vmalloc.h>
9
#include <linux/uaccess.h>
10
#include <linux/module.h>
11
#include <linux/blkdev.h>
12
#include <linux/fcntl.h>
13
#include <linux/async.h>
14
#include <linux/genhd.h>
15
#include <linux/ndctl.h>
16
#include <linux/sched.h>
17
#include <linux/slab.h>
18
#include <linux/cpu.h>
19
20
#include <linux/fs.h>
#include <linux/io.h>
21
#include <linux/mm.h>
22
#include <linux/nd.h>
23
#include "nd-core.h"
24
#include "nd.h"
25
#include "pfn.h"
26

27
int nvdimm_major;
28
static int nvdimm_bus_major;
29
struct class *nd_class;
30
static DEFINE_IDA(nd_ida);
31

32
33
34
35
static int to_nd_device_type(struct device *dev)
{
	if (is_nvdimm(dev))
		return ND_DEVICE_DIMM;
36
	else if (is_memory(dev))
37
38
39
		return ND_DEVICE_REGION_PMEM;
	else if (is_nd_blk(dev))
		return ND_DEVICE_REGION_BLK;
40
41
	else if (is_nd_dax(dev))
		return ND_DEVICE_DAX_PMEM;
42
	else if (is_nd_region(dev->parent))
43
		return nd_region_to_nstype(to_nd_region(dev->parent));
44
45
46
47
48
49
50
51
52
53

	return 0;
}

static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
{
	return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT,
			to_nd_device_type(dev));
}

54
55
56
static struct module *to_bus_provider(struct device *dev)
{
	/* pin bus providers while regions are enabled */
57
	if (is_nd_region(dev)) {
58
59
		struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);

60
		return nvdimm_bus->nd_desc->module;
61
62
63
64
	}
	return NULL;
}

65
66
67
68
69
70
71
72
73
74
75
static void nvdimm_bus_probe_start(struct nvdimm_bus *nvdimm_bus)
{
	nvdimm_bus_lock(&nvdimm_bus->dev);
	nvdimm_bus->probe_active++;
	nvdimm_bus_unlock(&nvdimm_bus->dev);
}

static void nvdimm_bus_probe_end(struct nvdimm_bus *nvdimm_bus)
{
	nvdimm_bus_lock(&nvdimm_bus->dev);
	if (--nvdimm_bus->probe_active == 0)
76
		wake_up(&nvdimm_bus->wait);
77
78
79
	nvdimm_bus_unlock(&nvdimm_bus->dev);
}

80
81
82
static int nvdimm_bus_probe(struct device *dev)
{
	struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
83
	struct module *provider = to_bus_provider(dev);
84
85
86
	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
	int rc;

87
88
89
	if (!try_module_get(provider))
		return -ENXIO;

Dan Williams's avatar
Dan Williams committed
90
91
92
	dev_dbg(&nvdimm_bus->dev, "START: %s.probe(%s)\n",
			dev->driver->name, dev_name(dev));

93
	nvdimm_bus_probe_start(nvdimm_bus);
94
	debug_nvdimm_lock(dev);
95
	rc = nd_drv->probe(dev);
96
97
	debug_nvdimm_unlock(dev);

98
99
	if ((rc == 0 || rc == -EOPNOTSUPP) &&
			dev->parent && is_nd_region(dev->parent))
100
		nd_region_advance_seeds(to_nd_region(dev->parent), dev);
101
102
	nvdimm_bus_probe_end(nvdimm_bus);

Dan Williams's avatar
Dan Williams committed
103
	dev_dbg(&nvdimm_bus->dev, "END: %s.probe(%s) = %d\n", dev->driver->name,
104
			dev_name(dev), rc);
105

106
107
	if (rc != 0)
		module_put(provider);
108
109
110
111
112
113
	return rc;
}

static int nvdimm_bus_remove(struct device *dev)
{
	struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
114
	struct module *provider = to_bus_provider(dev);
115
	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
116
	int rc = 0;
117

118
119
	if (nd_drv->remove) {
		debug_nvdimm_lock(dev);
120
		rc = nd_drv->remove(dev);
121
122
		debug_nvdimm_unlock(dev);
	}
123

124
125
	dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
			dev_name(dev), rc);
126
	module_put(provider);
127
128
129
	return rc;
}

130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
static void nvdimm_bus_shutdown(struct device *dev)
{
	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
	struct nd_device_driver *nd_drv = NULL;

	if (dev->driver)
		nd_drv = to_nd_device_driver(dev->driver);

	if (nd_drv && nd_drv->shutdown) {
		nd_drv->shutdown(dev);
		dev_dbg(&nvdimm_bus->dev, "%s.shutdown(%s)\n",
				dev->driver->name, dev_name(dev));
	}
}

145
146
void nd_device_notify(struct device *dev, enum nvdimm_event event)
{
147
	nd_device_lock(dev);
148
149
150
151
152
153
154
	if (dev->driver) {
		struct nd_device_driver *nd_drv;

		nd_drv = to_nd_device_driver(dev->driver);
		if (nd_drv->notify)
			nd_drv->notify(dev, event);
	}
155
	nd_device_unlock(dev);
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
}
EXPORT_SYMBOL(nd_device_notify);

void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event)
{
	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);

	if (!nvdimm_bus)
		return;

	/* caller is responsible for holding a reference on the device */
	nd_device_notify(&nd_region->dev, event);
}
EXPORT_SYMBOL_GPL(nvdimm_region_notify);

171
172
173
174
175
176
177
178
179
180
181
182
struct clear_badblocks_context {
	resource_size_t phys, cleared;
};

static int nvdimm_clear_badblocks_region(struct device *dev, void *data)
{
	struct clear_badblocks_context *ctx = data;
	struct nd_region *nd_region;
	resource_size_t ndr_end;
	sector_t sector;

	/* make sure device is a region */
183
	if (!is_memory(dev))
184
185
186
187
188
189
190
191
192
193
194
195
196
		return 0;

	nd_region = to_nd_region(dev);
	ndr_end = nd_region->ndr_start + nd_region->ndr_size - 1;

	/* make sure we are in the region */
	if (ctx->phys < nd_region->ndr_start
			|| (ctx->phys + ctx->cleared) > ndr_end)
		return 0;

	sector = (ctx->phys - nd_region->ndr_start) / 512;
	badblocks_clear(&nd_region->bb, sector, ctx->cleared / 512);

197
198
199
	if (nd_region->bb_state)
		sysfs_notify_dirent(nd_region->bb_state);

200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
	return 0;
}

static void nvdimm_clear_badblocks_regions(struct nvdimm_bus *nvdimm_bus,
		phys_addr_t phys, u64 cleared)
{
	struct clear_badblocks_context ctx = {
		.phys = phys,
		.cleared = cleared,
	};

	device_for_each_child(&nvdimm_bus->dev, &ctx,
			nvdimm_clear_badblocks_region);
}

static void nvdimm_account_cleared_poison(struct nvdimm_bus *nvdimm_bus,
		phys_addr_t phys, u64 cleared)
{
	if (cleared > 0)
219
		badrange_forget(&nvdimm_bus->badrange, phys, cleared);
220
221
222
223
224

	if (cleared > 0 && cleared / 512)
		nvdimm_clear_badblocks_regions(nvdimm_bus, phys, cleared);
}

225
226
227
228
229
230
231
232
long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
		unsigned int len)
{
	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
	struct nvdimm_bus_descriptor *nd_desc;
	struct nd_cmd_clear_error clear_err;
	struct nd_cmd_ars_cap ars_cap;
	u32 clear_err_unit, mask;
233
	unsigned int noio_flag;
234
235
236
237
238
239
	int cmd_rc, rc;

	if (!nvdimm_bus)
		return -ENXIO;

	nd_desc = nvdimm_bus->nd_desc;
240
241
242
243
	/*
	 * if ndctl does not exist, it's PMEM_LEGACY and
	 * we want to just pretend everything is handled.
	 */
244
	if (!nd_desc->ndctl)
245
		return len;
246
247
248
249

	memset(&ars_cap, 0, sizeof(ars_cap));
	ars_cap.address = phys;
	ars_cap.length = len;
250
	noio_flag = memalloc_noio_save();
251
252
	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, &ars_cap,
			sizeof(ars_cap), &cmd_rc);
253
	memalloc_noio_restore(noio_flag);
254
255
256
257
258
259
260
261
262
263
264
265
266
267
	if (rc < 0)
		return rc;
	if (cmd_rc < 0)
		return cmd_rc;
	clear_err_unit = ars_cap.clear_err_unit;
	if (!clear_err_unit || !is_power_of_2(clear_err_unit))
		return -ENXIO;

	mask = clear_err_unit - 1;
	if ((phys | len) & mask)
		return -ENXIO;
	memset(&clear_err, 0, sizeof(clear_err));
	clear_err.address = phys;
	clear_err.length = len;
268
	noio_flag = memalloc_noio_save();
269
270
	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CLEAR_ERROR, &clear_err,
			sizeof(clear_err), &cmd_rc);
271
	memalloc_noio_restore(noio_flag);
272
273
274
275
	if (rc < 0)
		return rc;
	if (cmd_rc < 0)
		return cmd_rc;
276

277
	nvdimm_account_cleared_poison(nvdimm_bus, phys, clear_err.cleared);
278

279
280
281
282
	return clear_err.cleared;
}
EXPORT_SYMBOL_GPL(nvdimm_clear_poison);

283
284
static int nvdimm_bus_match(struct device *dev, struct device_driver *drv);

285
static struct bus_type nvdimm_bus_type = {
286
	.name = "nd",
287
288
289
290
	.uevent = nvdimm_bus_uevent,
	.match = nvdimm_bus_match,
	.probe = nvdimm_bus_probe,
	.remove = nvdimm_bus_remove,
291
	.shutdown = nvdimm_bus_shutdown,
292
293
};

294
295
296
297
298
299
300
301
302
static void nvdimm_bus_release(struct device *dev)
{
	struct nvdimm_bus *nvdimm_bus;

	nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
	ida_simple_remove(&nd_ida, nvdimm_bus->id);
	kfree(nvdimm_bus);
}

303
bool is_nvdimm_bus(struct device *dev)
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
{
	return dev->release == nvdimm_bus_release;
}

struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
{
	struct device *dev;

	for (dev = nd_dev; dev; dev = dev->parent)
		if (is_nvdimm_bus(dev))
			break;
	dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n");
	if (dev)
		return to_nvdimm_bus(dev);
	return NULL;
}

struct nvdimm_bus *to_nvdimm_bus(struct device *dev)
{
	struct nvdimm_bus *nvdimm_bus;

	nvdimm_bus = container_of(dev, struct nvdimm_bus, dev);
	WARN_ON(!is_nvdimm_bus(dev));
	return nvdimm_bus;
}
EXPORT_SYMBOL_GPL(to_nvdimm_bus);

331
332
333
334
335
336
struct nvdimm_bus *nvdimm_to_bus(struct nvdimm *nvdimm)
{
	return to_nvdimm_bus(nvdimm->dev.parent);
}
EXPORT_SYMBOL_GPL(nvdimm_to_bus);

337
338
339
340
341
342
343
344
345
346
347
struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
		struct nvdimm_bus_descriptor *nd_desc)
{
	struct nvdimm_bus *nvdimm_bus;
	int rc;

	nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL);
	if (!nvdimm_bus)
		return NULL;
	INIT_LIST_HEAD(&nvdimm_bus->list);
	INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
348
	init_waitqueue_head(&nvdimm_bus->wait);
349
350
351
352
353
	nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
	if (nvdimm_bus->id < 0) {
		kfree(nvdimm_bus);
		return NULL;
	}
354
355
	mutex_init(&nvdimm_bus->reconfig_mutex);
	badrange_init(&nvdimm_bus->badrange);
356
357
358
359
360
	nvdimm_bus->nd_desc = nd_desc;
	nvdimm_bus->dev.parent = parent;
	nvdimm_bus->dev.release = nvdimm_bus_release;
	nvdimm_bus->dev.groups = nd_desc->attr_groups;
	nvdimm_bus->dev.bus = &nvdimm_bus_type;
361
	nvdimm_bus->dev.of_node = nd_desc->of_node;
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
	dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
	rc = device_register(&nvdimm_bus->dev);
	if (rc) {
		dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
		goto err;
	}

	return nvdimm_bus;
 err:
	put_device(&nvdimm_bus->dev);
	return NULL;
}
EXPORT_SYMBOL_GPL(nvdimm_bus_register);

void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
{
	if (!nvdimm_bus)
		return;
	device_unregister(&nvdimm_bus->dev);
}
EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);

static int child_unregister(struct device *dev, void *data)
{
	/*
	 * the singular ndctl class device per bus needs to be
	 * "device_destroy"ed, so skip it here
	 *
	 * i.e. remove classless children
	 */
	if (dev->class)
393
394
395
396
397
398
399
400
		return 0;

	if (is_nvdimm(dev)) {
		struct nvdimm *nvdimm = to_nvdimm(dev);
		bool dev_put = false;

		/* We are shutting down. Make state frozen artificially. */
		nvdimm_bus_lock(dev);
401
		set_bit(NVDIMM_SECURITY_FROZEN, &nvdimm->sec.flags);
402
403
404
405
406
407
408
409
410
		if (test_and_clear_bit(NDD_WORK_PENDING, &nvdimm->flags))
			dev_put = true;
		nvdimm_bus_unlock(dev);
		cancel_delayed_work_sync(&nvdimm->dwork);
		if (dev_put)
			put_device(dev);
	}
	nd_device_unregister(dev, ND_SYNC);

411
412
413
	return 0;
}

414
static void free_badrange_list(struct list_head *badrange_list)
415
{
416
	struct badrange_entry *bre, *next;
417

418
419
420
	list_for_each_entry_safe(bre, next, badrange_list, list) {
		list_del(&bre->list);
		kfree(bre);
421
	}
422
	list_del_init(badrange_list);
423
424
425
426
427
428
429
430
431
432
}

static int nd_bus_remove(struct device *dev)
{
	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);

	mutex_lock(&nvdimm_bus_list_mutex);
	list_del_init(&nvdimm_bus->list);
	mutex_unlock(&nvdimm_bus_list_mutex);

433
434
435
	wait_event(nvdimm_bus->wait,
			atomic_read(&nvdimm_bus->ioctl_active) == 0);

436
437
438
	nd_synchronize();
	device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);

439
440
441
	spin_lock(&nvdimm_bus->badrange.lock);
	free_badrange_list(&nvdimm_bus->badrange.list);
	spin_unlock(&nvdimm_bus->badrange.lock);
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476

	nvdimm_bus_destroy_ndctl(nvdimm_bus);

	return 0;
}

static int nd_bus_probe(struct device *dev)
{
	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
	int rc;

	rc = nvdimm_bus_create_ndctl(nvdimm_bus);
	if (rc)
		return rc;

	mutex_lock(&nvdimm_bus_list_mutex);
	list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list);
	mutex_unlock(&nvdimm_bus_list_mutex);

	/* enable bus provider attributes to look up their local context */
	dev_set_drvdata(dev, nvdimm_bus->nd_desc);

	return 0;
}

static struct nd_device_driver nd_bus_driver = {
	.probe = nd_bus_probe,
	.remove = nd_bus_remove,
	.drv = {
		.name = "nd_bus",
		.suppress_bind_attrs = true,
		.bus = &nvdimm_bus_type,
		.owner = THIS_MODULE,
		.mod_name = KBUILD_MODNAME,
	},
477
478
};

479
480
481
482
483
484
485
486
487
488
static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
{
	struct nd_device_driver *nd_drv = to_nd_device_driver(drv);

	if (is_nvdimm_bus(dev) && nd_drv == &nd_bus_driver)
		return true;

	return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
}

489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain);

void nd_synchronize(void)
{
	async_synchronize_full_domain(&nd_async_domain);
}
EXPORT_SYMBOL_GPL(nd_synchronize);

static void nd_async_device_register(void *d, async_cookie_t cookie)
{
	struct device *dev = d;

	if (device_add(dev) != 0) {
		dev_err(dev, "%s: failed\n", __func__);
		put_device(dev);
	}
	put_device(dev);
506
507
	if (dev->parent)
		put_device(dev->parent);
508
509
510
511
512
513
}

static void nd_async_device_unregister(void *d, async_cookie_t cookie)
{
	struct device *dev = d;

Dan Williams's avatar
Dan Williams committed
514
515
516
517
	/* flush bus operations before delete */
	nvdimm_bus_lock(dev);
	nvdimm_bus_unlock(dev);

518
519
520
521
	device_unregister(dev);
	put_device(dev);
}

522
void __nd_device_register(struct device *dev)
523
{
524
525
	if (!dev)
		return;
526
527
528
529
530
531
532
533
534
535

	/*
	 * Ensure that region devices always have their NUMA node set as
	 * early as possible. This way we are able to make certain that
	 * any memory associated with the creation and the creation
	 * itself of the region is associated with the correct node.
	 */
	if (is_nd_region(dev))
		set_dev_node(dev, to_nd_region(dev)->numa_node);

536
	dev->bus = &nvdimm_bus_type;
537
	if (dev->parent) {
538
		get_device(dev->parent);
539
540
541
		if (dev_to_node(dev) == NUMA_NO_NODE)
			set_dev_node(dev, dev_to_node(dev->parent));
	}
542
	get_device(dev);
543
544
545

	async_schedule_dev_domain(nd_async_device_register, dev,
				  &nd_async_domain);
546
}
547
548
549
550
551
552

void nd_device_register(struct device *dev)
{
	device_initialize(dev);
	__nd_device_register(dev);
}
553
554
555
556
EXPORT_SYMBOL(nd_device_register);

void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
{
557
558
	bool killed;

559
560
	switch (mode) {
	case ND_ASYNC:
561
562
563
564
565
566
567
568
569
		/*
		 * In the async case this is being triggered with the
		 * device lock held and the unregistration work needs to
		 * be moved out of line iff this is thread has won the
		 * race to schedule the deletion.
		 */
		if (!kill_device(dev))
			return;

570
571
572
573
574
		get_device(dev);
		async_schedule_domain(nd_async_device_unregister, dev,
				&nd_async_domain);
		break;
	case ND_SYNC:
575
576
577
578
579
580
581
		/*
		 * In the sync case the device is being unregistered due
		 * to a state change of the parent. Claim the kill state
		 * to synchronize against other unregistration requests,
		 * or otherwise let the async path handle it if the
		 * unregistration was already queued.
		 */
582
		nd_device_lock(dev);
583
		killed = kill_device(dev);
584
		nd_device_unlock(dev);
585
586
587
588

		if (!killed)
			return;

589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
		nd_synchronize();
		device_unregister(dev);
		break;
	}
}
EXPORT_SYMBOL(nd_device_unregister);

/**
 * __nd_driver_register() - register a region or a namespace driver
 * @nd_drv: driver to register
 * @owner: automatically set by nd_driver_register() macro
 * @mod_name: automatically set by nd_driver_register() macro
 */
int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
		const char *mod_name)
{
	struct device_driver *drv = &nd_drv->drv;

	if (!nd_drv->type) {
608
		pr_debug("driver type bitmask not set (%ps)\n",
609
610
611
612
				__builtin_return_address(0));
		return -EINVAL;
	}

613
614
	if (!nd_drv->probe) {
		pr_debug("%s ->probe() must be specified\n", mod_name);
615
616
617
618
619
620
621
622
623
624
625
		return -EINVAL;
	}

	drv->bus = &nvdimm_bus_type;
	drv->owner = owner;
	drv->mod_name = mod_name;

	return driver_register(drv);
}
EXPORT_SYMBOL(__nd_driver_register);

626
627
int nvdimm_revalidate_disk(struct gendisk *disk)
{
Dan Williams's avatar
Dan Williams committed
628
	struct device *dev = disk_to_dev(disk)->parent;
629
	struct nd_region *nd_region = to_nd_region(dev->parent);
630
	int disk_ro = get_disk_ro(disk);
631

632
633
634
635
636
	/*
	 * Upgrade to read-only if the region is read-only preserve as
	 * read-only if the disk is already read-only.
	 */
	if (disk_ro || nd_region->ro == disk_ro)
637
638
		return 0;

639
640
641
	dev_info(dev, "%s read-only, marking %s read-only\n",
			dev_name(&nd_region->dev), disk->disk_name);
	set_disk_ro(disk, 1);
642
643
644
645
646
647

	return 0;

}
EXPORT_SYMBOL(nvdimm_revalidate_disk);

648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
		char *buf)
{
	return sprintf(buf, ND_DEVICE_MODALIAS_FMT "\n",
			to_nd_device_type(dev));
}
static DEVICE_ATTR_RO(modalias);

static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
		char *buf)
{
	return sprintf(buf, "%s\n", dev->type->name);
}
static DEVICE_ATTR_RO(devtype);

static struct attribute *nd_device_attributes[] = {
	&dev_attr_modalias.attr,
	&dev_attr_devtype.attr,
	NULL,
};

669
/*
670
671
672
673
 * nd_device_attribute_group - generic attributes for all devices on an nd bus
 */
struct attribute_group nd_device_attribute_group = {
	.attrs = nd_device_attributes,
674
};
675
EXPORT_SYMBOL_GPL(nd_device_attribute_group);
676

677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
static ssize_t numa_node_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	return sprintf(buf, "%d\n", dev_to_node(dev));
}
static DEVICE_ATTR_RO(numa_node);

static struct attribute *nd_numa_attributes[] = {
	&dev_attr_numa_node.attr,
	NULL,
};

static umode_t nd_numa_attr_visible(struct kobject *kobj, struct attribute *a,
		int n)
{
	if (!IS_ENABLED(CONFIG_NUMA))
		return 0;

	return a->mode;
}

698
/*
699
700
701
702
703
704
705
706
 * nd_numa_attribute_group - NUMA attributes for all devices on an nd bus
 */
struct attribute_group nd_numa_attribute_group = {
	.attrs = nd_numa_attributes,
	.is_visible = nd_numa_attr_visible,
};
EXPORT_SYMBOL_GPL(nd_numa_attribute_group);

707
708
709
710
711
712
713
714
int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus)
{
	dev_t devt = MKDEV(nvdimm_bus_major, nvdimm_bus->id);
	struct device *dev;

	dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus,
			"ndctl%d", nvdimm_bus->id);

715
	if (IS_ERR(dev))
716
717
		dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n",
				nvdimm_bus->id, PTR_ERR(dev));
718
	return PTR_ERR_OR_ZERO(dev);
719
720
721
722
723
724
725
}

void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus)
{
	device_destroy(nd_class, MKDEV(nvdimm_bus_major, nvdimm_bus->id));
}

726
727
728
729
static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = {
	[ND_CMD_IMPLEMENTED] = { },
	[ND_CMD_SMART] = {
		.out_num = 2,
730
		.out_sizes = { 4, 128, },
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
	},
	[ND_CMD_SMART_THRESHOLD] = {
		.out_num = 2,
		.out_sizes = { 4, 8, },
	},
	[ND_CMD_DIMM_FLAGS] = {
		.out_num = 2,
		.out_sizes = { 4, 4 },
	},
	[ND_CMD_GET_CONFIG_SIZE] = {
		.out_num = 3,
		.out_sizes = { 4, 4, 4, },
	},
	[ND_CMD_GET_CONFIG_DATA] = {
		.in_num = 2,
		.in_sizes = { 4, 4, },
		.out_num = 2,
		.out_sizes = { 4, UINT_MAX, },
	},
	[ND_CMD_SET_CONFIG_DATA] = {
		.in_num = 3,
		.in_sizes = { 4, 4, UINT_MAX, },
		.out_num = 1,
		.out_sizes = { 4, },
	},
	[ND_CMD_VENDOR] = {
		.in_num = 3,
		.in_sizes = { 4, 4, UINT_MAX, },
		.out_num = 3,
		.out_sizes = { 4, 4, UINT_MAX, },
	},
762
763
764
765
766
767
	[ND_CMD_CALL] = {
		.in_num = 2,
		.in_sizes = { sizeof(struct nd_cmd_pkg), UINT_MAX, },
		.out_num = 1,
		.out_sizes = { UINT_MAX, },
	},
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
};

const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd)
{
	if (cmd < ARRAY_SIZE(__nd_cmd_dimm_descs))
		return &__nd_cmd_dimm_descs[cmd];
	return NULL;
}
EXPORT_SYMBOL_GPL(nd_cmd_dimm_desc);

static const struct nd_cmd_desc __nd_cmd_bus_descs[] = {
	[ND_CMD_IMPLEMENTED] = { },
	[ND_CMD_ARS_CAP] = {
		.in_num = 2,
		.in_sizes = { 8, 8, },
783
784
		.out_num = 4,
		.out_sizes = { 4, 4, 4, 4, },
785
786
	},
	[ND_CMD_ARS_START] = {
787
788
789
790
		.in_num = 5,
		.in_sizes = { 8, 8, 2, 1, 5, },
		.out_num = 2,
		.out_sizes = { 4, 4, },
791
792
	},
	[ND_CMD_ARS_STATUS] = {
793
794
		.out_num = 3,
		.out_sizes = { 4, 4, UINT_MAX, },
795
	},
796
797
798
799
800
801
	[ND_CMD_CLEAR_ERROR] = {
		.in_num = 2,
		.in_sizes = { 8, 8, },
		.out_num = 3,
		.out_sizes = { 4, 4, 8, },
	},
802
803
804
805
806
807
	[ND_CMD_CALL] = {
		.in_num = 2,
		.in_sizes = { sizeof(struct nd_cmd_pkg), UINT_MAX, },
		.out_num = 1,
		.out_sizes = { UINT_MAX, },
	},
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
};

const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd)
{
	if (cmd < ARRAY_SIZE(__nd_cmd_bus_descs))
		return &__nd_cmd_bus_descs[cmd];
	return NULL;
}
EXPORT_SYMBOL_GPL(nd_cmd_bus_desc);

u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
		const struct nd_cmd_desc *desc, int idx, void *buf)
{
	if (idx >= desc->in_num)
		return UINT_MAX;

	if (desc->in_sizes[idx] < UINT_MAX)
		return desc->in_sizes[idx];

	if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA && idx == 2) {
		struct nd_cmd_set_config_hdr *hdr = buf;

		return hdr->in_length;
	} else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2) {
		struct nd_cmd_vendor_hdr *hdr = buf;

		return hdr->in_length;
835
836
837
838
	} else if (cmd == ND_CMD_CALL) {
		struct nd_cmd_pkg *pkg = buf;

		return pkg->nd_size_in;
839
840
841
842
843
844
845
846
	}

	return UINT_MAX;
}
EXPORT_SYMBOL_GPL(nd_cmd_in_size);

u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
		const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
847
		const u32 *out_field, unsigned long remainder)
848
849
850
851
852
853
854
855
856
857
858
{
	if (idx >= desc->out_num)
		return UINT_MAX;

	if (desc->out_sizes[idx] < UINT_MAX)
		return desc->out_sizes[idx];

	if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && idx == 1)
		return in_field[1];
	else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2)
		return out_field[1];
859
860
861
862
863
864
865
866
867
868
869
870
871
872
	else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 2) {
		/*
		 * Per table 9-276 ARS Data in ACPI 6.1, out_field[1] is
		 * "Size of Output Buffer in bytes, including this
		 * field."
		 */
		if (out_field[1] < 4)
			return 0;
		/*
		 * ACPI 6.1 is ambiguous if 'status' is included in the
		 * output size. If we encounter an output size that
		 * overshoots the remainder by 4 bytes, assume it was
		 * including 'status'.
		 */
873
		if (out_field[1] - 4 == remainder)
874
			return remainder;
875
		return out_field[1] - 8;
876
	} else if (cmd == ND_CMD_CALL) {
877
878
879
880
881
		struct nd_cmd_pkg *pkg = (struct nd_cmd_pkg *) in_field;

		return pkg->nd_size_out;
	}

882
883
884
885
886

	return UINT_MAX;
}
EXPORT_SYMBOL_GPL(nd_cmd_out_size);

887
void wait_nvdimm_bus_probe_idle(struct device *dev)
888
{
889
890
	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);

891
892
893
	do {
		if (nvdimm_bus->probe_active == 0)
			break;
894
		nvdimm_bus_unlock(dev);
895
		nd_device_unlock(dev);
896
		wait_event(nvdimm_bus->wait,
897
				nvdimm_bus->probe_active == 0);
898
		nd_device_lock(dev);
899
		nvdimm_bus_lock(dev);
900
901
902
	} while (true);
}

903
static int nd_pmem_forget_poison_check(struct device *dev, void *data)
904
{
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
	struct nd_cmd_clear_error *clear_err =
		(struct nd_cmd_clear_error *)data;
	struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL;
	struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL;
	struct nd_dax *nd_dax = is_nd_dax(dev) ? to_nd_dax(dev) : NULL;
	struct nd_namespace_common *ndns = NULL;
	struct nd_namespace_io *nsio;
	resource_size_t offset = 0, end_trunc = 0, start, end, pstart, pend;

	if (nd_dax || !dev->driver)
		return 0;

	start = clear_err->address;
	end = clear_err->address + clear_err->cleared - 1;

	if (nd_btt || nd_pfn || nd_dax) {
		if (nd_btt)
			ndns = nd_btt->ndns;
		else if (nd_pfn)
			ndns = nd_pfn->ndns;
		else if (nd_dax)
			ndns = nd_dax->nd_pfn.ndns;

		if (!ndns)
			return 0;
	} else
		ndns = to_ndns(dev);

	nsio = to_nd_namespace_io(&ndns->dev);
	pstart = nsio->res.start + offset;
	pend = nsio->res.end - end_trunc;

	if ((pstart >= start) && (pend <= end))
938
		return -EBUSY;
939

940
	return 0;
941
942
943
944
945
946

}

static int nd_ns_forget_poison_check(struct device *dev, void *data)
{
	return device_for_each_child(dev, data, nd_pmem_forget_poison_check);
947
948
}

949
/* set_config requires an idle interleave set */
950
static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus,
951
		struct nvdimm *nvdimm, unsigned int cmd, void *data)
952
{
953
954
955
956
	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;

	/* ask the bus provider if it would like to block this request */
	if (nd_desc->clear_to_send) {
957
		int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd, data);
958
959
960
961

		if (rc)
			return rc;
	}
962

963
964
	/* require clear error to go through the pmem driver */
	if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR)
965
966
		return device_for_each_child(&nvdimm_bus->dev, data,
				nd_ns_forget_poison_check);
967

968
969
970
	if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA)
		return 0;

971
	/* prevent label manipulation while the kernel owns label updates */
972
	wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev);
973
974
975
976
977
	if (atomic_read(&nvdimm->busy))
		return -EBUSY;
	return 0;
}

978
979
980
981
982
983
984
static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
		int read_only, unsigned int ioctl_cmd, unsigned long arg)
{
	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
	const struct nd_cmd_desc *desc = NULL;
	unsigned int cmd = _IOC_NR(ioctl_cmd);
	struct device *dev = &nvdimm_bus->dev;
985
	void __user *p = (void __user *) arg;
986
	char *out_env = NULL, *in_env = NULL;
987
	const char *cmd_name, *dimm_name;
988
989
	u32 in_len = 0, out_len = 0;
	unsigned int func = cmd;
990
	unsigned long cmd_mask;
991
	struct nd_cmd_pkg pkg;
992
	int rc, i, cmd_rc;
993
	void *buf = NULL;
994
	u64 buf_len = 0;
995
996
997
998

	if (nvdimm) {
		desc = nd_cmd_dimm_desc(cmd);
		cmd_name = nvdimm_cmd_name(cmd);
999
		cmd_mask = nvdimm->cmd_mask;
1000
1001
1002
1003
		dimm_name = dev_name(&nvdimm->dev);
	} else {
		desc = nd_cmd_bus_desc(cmd);
		cmd_name = nvdimm_bus_cmd_name(cmd);
1004
		cmd_mask = nd_desc->cmd_mask;
1005
1006
1007
		dimm_name = "bus";
	}

1008
1009
1010
1011
1012
	if (cmd == ND_CMD_CALL) {
		if (copy_from_user(&pkg, p, sizeof(pkg)))
			return -EFAULT;
	}

1013
	if (!desc || (desc->out_num + desc->in_num == 0) ||
1014
			!test_bit(cmd, &cmd_mask))
1015
1016
1017
1018
		return -ENOTTY;

	/* fail write commands (when read-only) */
	if (read_only)
1019
1020
1021
1022
		switch (cmd) {
		case ND_CMD_VENDOR:
		case ND_CMD_SET_CONFIG_DATA:
		case ND_CMD_ARS_START:
1023
		case ND_CMD_CLEAR_ERROR:
1024
		case ND_CMD_CALL:
1025
			dev_dbg(dev, "'%s' command while read-only.\n",
1026
1027
1028
1029
1030
1031
1032
1033
					nvdimm ? nvdimm_cmd_name(cmd)
					: nvdimm_bus_cmd_name(cmd));
			return -EPERM;
		default:
			break;
		}

	/* process an input envelope */
1034
1035
1036
	in_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
	if (!in_env)
		return -ENOMEM;
1037
1038
1039
1040
1041
1042
1043
	for (i = 0; i < desc->in_num; i++) {
		u32 in_size, copy;

		in_size = nd_cmd_in_size(nvdimm, cmd, desc, i, in_env);
		if (in_size == UINT_MAX) {
			dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n",
					__func__, dimm_name, cmd_name, i);
1044
1045
			rc = -ENXIO;
			goto out;
1046
		}
1047
1048
		if (in_len < ND_CMD_MAX_ENVELOPE)
			copy = min_t(u32, ND_CMD_MAX_ENVELOPE - in_len, in_size);
1049
1050
		else
			copy = 0;
1051
1052
1053
1054
		if (copy && copy_from_user(&in_env[in_len], p + in_len, copy)) {
			rc = -EFAULT;
			goto out;
		}
1055
1056
1057
		in_len += in_size;
	}

1058
	if (cmd == ND_CMD_CALL) {
1059
		func = pkg.nd_command;
1060
1061
		dev_dbg(dev, "%s, idx: %llu, in: %u, out: %u, len %llu\n",
				dimm_name, pkg.nd_command,
1062
1063
1064
				in_len, out_len, buf_len);
	}

1065
	/* process an output envelope */
1066
1067
1068
1069
1070
1071
	out_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
	if (!out_env) {
		rc = -ENOMEM;
		goto out;
	}

1072
1073
	for (i = 0; i < desc->out_num; i++) {
		u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i,
1074
				(u32 *) in_env, (u32 *) out_env, 0);
1075
1076
1077
		u32 copy;

		if (out_size == UINT_MAX) {
1078
1079
			dev_dbg(dev, "%s unknown output size cmd: %s field: %d\n",
					dimm_name, cmd_name, i);
1080
1081
			rc = -EFAULT;
			goto out;
1082
		}
1083
1084
		if (out_len < ND_CMD_MAX_ENVELOPE)
			copy = min_t(u32, ND_CMD_MAX_ENVELOPE - out_len, out_size);
1085
1086
1087
		else
			copy = 0;
		if (copy && copy_from_user(&out_env[out_len],
1088
1089
1090
1091
					p + in_len + out_len, copy)) {
			rc = -EFAULT;
			goto out;
		}
1092
1093
1094
		out_len += out_size;
	}

1095
	buf_len = (u64) out_len + (u64) in_len;
1096
	if (buf_len > ND_IOCTL_MAX_BUFLEN) {
1097
1098
		dev_dbg(dev, "%s cmd: %s buf_len: %llu > %d\n", dimm_name,
				cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN);
1099
1100
		rc = -EINVAL;
		goto out;
1101
1102
1103
	}

	buf = vmalloc(buf_len);
1104
1105
1106
1107
	if (!buf) {
		rc = -ENOMEM;
		goto out;
	}
1108
1109
1110
1111
1112
1113

	if (copy_from_user(buf, p, buf_len)) {
		rc = -EFAULT;
		goto out;
	}

1114
	nd_device_lock(dev);
1115
	nvdimm_bus_lock(dev);
1116
	rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf);
1117
1118
1119
	if (rc)
		goto out_unlock;

1120
	rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, &cmd_rc);
1121
	if (rc < 0)
1122
		goto out_unlock;
1123
1124
1125
1126

	if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR && cmd_rc >= 0) {
		struct nd_cmd_clear_error *clear_err = buf;

1127
1128
		nvdimm_account_cleared_poison(nvdimm_bus, clear_err->address,
				clear_err->cleared);
1129
	}
1130

1131
1132
	if (copy_to_user(p, buf, buf_len))
		rc = -EFAULT;
1133

1134
out_unlock:
1135
	nvdimm_bus_unlock(dev);
1136
	nd_device_unlock(dev);
1137
1138
1139
out:
	kfree(in_env);
	kfree(out_env);
1140
1141
1142
1143
	vfree(buf);
	return rc;
}

1144
1145
1146
1147
enum nd_ioctl_mode {
	BUS_IOCTL,
	DIMM_IOCTL,
};
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161

static int match_dimm(struct device *dev, void *data)
{
	long id = (long) data;

	if (is_nvdimm(dev)) {
		struct nvdimm *nvdimm = to_nvdimm(dev);

		return nvdimm->id == id;
	}

	return 0;
}

1162
1163
1164
static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
		enum nd_ioctl_mode mode)

1165
{
1166
1167
1168
1169
	struct nvdimm_bus *nvdimm_bus, *found = NULL;
	long id = (long) file->private_data;
	struct nvdimm *nvdimm = NULL;
	int rc, ro;
1170

1171
	ro = ((file->f_flags & O_ACCMODE) == O_RDONLY);
1172
1173
	mutex_lock(&nvdimm_bus_list_mutex);
	list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
		if (mode == DIMM_IOCTL) {
			struct device *dev;

			dev = device_find_child(&nvdimm_bus->dev,
					file->private_data, match_dimm);
			if (!dev)
				continue;
			nvdimm = to_nvdimm(dev);
			found = nvdimm_bus;
		} else if (nvdimm_bus->id == id) {
			found = nvdimm_bus;
		}
1186

1187
1188
1189
1190
		if (found) {
			atomic_inc(&nvdimm_bus->ioctl_active);
			break;
		}
1191
1192
1193
	}
	mutex_unlock(&nvdimm_bus_list_mutex);

1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
	if (!found)
		return -ENXIO;

	nvdimm_bus = found;
	rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg);

	if (nvdimm)
		put_device(&nvdimm->dev);
	if (atomic_dec_and_test(&nvdimm_bus->ioctl_active))
		wake_up(&nvdimm_bus->wait);

1205
1206
1207
	return rc;
}

1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
static long bus_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
	return nd_ioctl(file, cmd, arg, BUS_IOCTL);
}

static long dimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
	return nd_ioctl(file, cmd, arg, DIMM_IOCTL);
}

1218
1219
1220
1221
1222
1223
static int nd_open(struct inode *inode, struct file *file)
{
	long minor = iminor(inode);

	file->private_data = (void *) minor;
	return 0;
1224
1225
1226
1227
}

static const struct file_operations nvdimm_bus_fops = {
	.owner = THIS_MODULE,
1228
	.open = nd_open,
1229
1230
	.unlocked_ioctl = bus_ioctl,
	.compat_ioctl = bus_ioctl,
1231
1232
1233
	.llseek = noop_llseek,
};

1234
1235
1236
static const struct file_operations nvdimm_fops = {
	.owner = THIS_MODULE,
	.open = nd_open,
1237
1238
	.unlocked_ioctl = dimm_ioctl,
	.compat_ioctl = dimm_ioctl,
1239
1240
1241
	.llseek = noop_llseek,
};

1242
1243
1244
1245
int __init nvdimm_bus_init(void)
{
	int rc;

1246
1247
1248
1249
	rc = bus_register(&nvdimm_bus_type);
	if (rc)
		return rc;

1250
1251
	rc = register_chrdev(0, "ndctl", &nvdimm_bus_fops);
	if (rc < 0)
1252
		goto err_bus_chrdev;
1253
1254
	nvdimm_bus_major = rc;

1255
1256
1257
1258
1259
	rc = register_chrdev(0, "dimmctl", &nvdimm_fops);
	if (rc < 0)
		goto err_dimm_chrdev;
	nvdimm_major = rc;

1260
	nd_class = class_create(THIS_MODULE, "nd");
1261
1262
	if (IS_ERR(nd_class)) {
		rc = PTR_ERR(nd_class);
1263
		goto err_class;
1264
	}
1265

1266
1267
1268
1269
	rc = driver_register(&nd_bus_driver.drv);
	if (rc)
		goto err_nd_bus;

1270
1271
	return 0;

1272
1273
 err_nd_bus:
	class_destroy(nd_class);
1274
 err_class: