region_devs.c 30.7 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2
3
4
/*
 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
 */
5
#include <linux/scatterlist.h>
6
#include <linux/highmem.h>
7
#include <linux/sched.h>
8
#include <linux/slab.h>
Dan Williams's avatar
Dan Williams committed
9
#include <linux/hash.h>
10
#include <linux/sort.h>
11
#include <linux/io.h>
12
#include <linux/nd.h>
13
14
15
#include "nd-core.h"
#include "nd.h"

16
17
18
19
20
21
/*
 * For readq() and writeq() on 32-bit builds, the hi-lo, lo-hi order is
 * irrelevant.
 */
#include <linux/io-64-nonatomic-hi-lo.h>

22
static DEFINE_IDA(region_ida);
Dan Williams's avatar
Dan Williams committed
23
static DEFINE_PER_CPU(int, flush_idx);
24

25
26
27
28
29
30
31
static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
		struct nd_region_data *ndrd)
{
	int i, j;

	dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm),
			nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es");
32
	for (i = 0; i < (1 << ndrd->hints_shift); i++) {
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
		struct resource *res = &nvdimm->flush_wpq[i];
		unsigned long pfn = PHYS_PFN(res->start);
		void __iomem *flush_page;

		/* check if flush hints share a page */
		for (j = 0; j < i; j++) {
			struct resource *res_j = &nvdimm->flush_wpq[j];
			unsigned long pfn_j = PHYS_PFN(res_j->start);

			if (pfn == pfn_j)
				break;
		}

		if (j < i)
			flush_page = (void __iomem *) ((unsigned long)
48
49
					ndrd_get_flush_wpq(ndrd, dimm, j)
					& PAGE_MASK);
50
51
		else
			flush_page = devm_nvdimm_ioremap(dev,
52
					PFN_PHYS(pfn), PAGE_SIZE);
53
54
		if (!flush_page)
			return -ENXIO;
55
56
		ndrd_set_flush_wpq(ndrd, dimm, i, flush_page
				+ (res->start & ~PAGE_MASK));
57
58
59
60
61
62
63
	}

	return 0;
}

int nd_region_activate(struct nd_region *nd_region)
{
64
	int i, j, num_flush = 0;
65
66
67
68
69
70
71
72
73
	struct nd_region_data *ndrd;
	struct device *dev = &nd_region->dev;
	size_t flush_data_size = sizeof(void *);

	nvdimm_bus_lock(&nd_region->dev);
	for (i = 0; i < nd_region->ndr_mappings; i++) {
		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
		struct nvdimm *nvdimm = nd_mapping->nvdimm;

74
75
76
77
78
		if (test_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags)) {
			nvdimm_bus_unlock(&nd_region->dev);
			return -EBUSY;
		}

79
80
		/* at least one null hint slot per-dimm for the "no-hint" case */
		flush_data_size += sizeof(void *);
Dan Williams's avatar
Dan Williams committed
81
		num_flush = min_not_zero(num_flush, nvdimm->num_flush);
82
83
84
85
86
87
88
89
90
91
92
		if (!nvdimm->num_flush)
			continue;
		flush_data_size += nvdimm->num_flush * sizeof(void *);
	}
	nvdimm_bus_unlock(&nd_region->dev);

	ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL);
	if (!ndrd)
		return -ENOMEM;
	dev_set_drvdata(dev, ndrd);

93
94
95
96
	if (!num_flush)
		return 0;

	ndrd->hints_shift = ilog2(num_flush);
97
98
99
100
101
102
103
104
105
	for (i = 0; i < nd_region->ndr_mappings; i++) {
		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
		struct nvdimm *nvdimm = nd_mapping->nvdimm;
		int rc = nvdimm_map_flush(&nd_region->dev, nvdimm, i, ndrd);

		if (rc)
			return rc;
	}

106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
	/*
	 * Clear out entries that are duplicates. This should prevent the
	 * extra flushings.
	 */
	for (i = 0; i < nd_region->ndr_mappings - 1; i++) {
		/* ignore if NULL already */
		if (!ndrd_get_flush_wpq(ndrd, i, 0))
			continue;

		for (j = i + 1; j < nd_region->ndr_mappings; j++)
			if (ndrd_get_flush_wpq(ndrd, i, 0) ==
			    ndrd_get_flush_wpq(ndrd, j, 0))
				ndrd_set_flush_wpq(ndrd, j, 0, NULL);
	}

121
122
123
	return 0;
}

124
125
126
127
128
129
130
131
132
133
134
static void nd_region_release(struct device *dev)
{
	struct nd_region *nd_region = to_nd_region(dev);
	u16 i;

	for (i = 0; i < nd_region->ndr_mappings; i++) {
		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
		struct nvdimm *nvdimm = nd_mapping->nvdimm;

		put_device(&nvdimm->dev);
	}
Vishal Verma's avatar
Vishal Verma committed
135
	free_percpu(nd_region->lane);
136
	ida_simple_remove(&region_ida, nd_region->id);
137
138
139
140
	if (is_nd_blk(dev))
		kfree(to_nd_blk_region(dev));
	else
		kfree(nd_region);
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
}

static struct device_type nd_blk_device_type = {
	.name = "nd_blk",
	.release = nd_region_release,
};

static struct device_type nd_pmem_device_type = {
	.name = "nd_pmem",
	.release = nd_region_release,
};

static struct device_type nd_volatile_device_type = {
	.name = "nd_volatile",
	.release = nd_region_release,
};

158
bool is_nd_pmem(struct device *dev)
159
160
161
162
{
	return dev ? dev->type == &nd_pmem_device_type : false;
}

163
164
165
166
167
bool is_nd_blk(struct device *dev)
{
	return dev ? dev->type == &nd_blk_device_type : false;
}

168
169
170
171
172
bool is_nd_volatile(struct device *dev)
{
	return dev ? dev->type == &nd_volatile_device_type : false;
}

173
174
175
176
177
178
179
180
181
struct nd_region *to_nd_region(struct device *dev)
{
	struct nd_region *nd_region = container_of(dev, struct nd_region, dev);

	WARN_ON(dev->type->release != nd_region_release);
	return nd_region;
}
EXPORT_SYMBOL_GPL(to_nd_region);

182
183
184
185
186
187
188
189
struct device *nd_region_dev(struct nd_region *nd_region)
{
	if (!nd_region)
		return NULL;
	return &nd_region->dev;
}
EXPORT_SYMBOL_GPL(nd_region_dev);

190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
struct nd_blk_region *to_nd_blk_region(struct device *dev)
{
	struct nd_region *nd_region = to_nd_region(dev);

	WARN_ON(!is_nd_blk(dev));
	return container_of(nd_region, struct nd_blk_region, nd_region);
}
EXPORT_SYMBOL_GPL(to_nd_blk_region);

void *nd_region_provider_data(struct nd_region *nd_region)
{
	return nd_region->provider_data;
}
EXPORT_SYMBOL_GPL(nd_region_provider_data);

void *nd_blk_region_provider_data(struct nd_blk_region *ndbr)
{
	return ndbr->blk_provider_data;
}
EXPORT_SYMBOL_GPL(nd_blk_region_provider_data);

void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data)
{
	ndbr->blk_provider_data = data;
}
EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data);

217
218
219
220
221
222
223
224
225
226
/**
 * nd_region_to_nstype() - region to an integer namespace type
 * @nd_region: region-device to interrogate
 *
 * This is the 'nstype' attribute of a region as well, an input to the
 * MODALIAS for namespace devices, and bit number for a nvdimm_bus to match
 * namespace devices with namespace drivers.
 */
int nd_region_to_nstype(struct nd_region *nd_region)
{
227
	if (is_memory(&nd_region->dev)) {
228
229
230
231
232
233
		u16 i, alias;

		for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) {
			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
			struct nvdimm *nvdimm = nd_mapping->nvdimm;

234
			if (test_bit(NDD_ALIASING, &nvdimm->flags))
235
236
237
238
239
240
241
242
243
244
245
246
				alias++;
		}
		if (alias)
			return ND_DEVICE_NAMESPACE_PMEM;
		else
			return ND_DEVICE_NAMESPACE_IO;
	} else if (is_nd_blk(&nd_region->dev)) {
		return ND_DEVICE_NAMESPACE_BLK;
	}

	return 0;
}
247
248
EXPORT_SYMBOL(nd_region_to_nstype);

249
250
251
252
253
254
static ssize_t size_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct nd_region *nd_region = to_nd_region(dev);
	unsigned long long size = 0;

255
	if (is_memory(dev)) {
256
257
258
259
260
261
262
263
264
265
266
		size = nd_region->ndr_size;
	} else if (nd_region->ndr_mappings == 1) {
		struct nd_mapping *nd_mapping = &nd_region->mapping[0];

		size = nd_mapping->size;
	}

	return sprintf(buf, "%llu\n", size);
}
static DEVICE_ATTR_RO(size);

267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
static ssize_t deep_flush_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct nd_region *nd_region = to_nd_region(dev);

	/*
	 * NOTE: in the nvdimm_has_flush() error case this attribute is
	 * not visible.
	 */
	return sprintf(buf, "%d\n", nvdimm_has_flush(nd_region));
}

static ssize_t deep_flush_store(struct device *dev, struct device_attribute *attr,
		const char *buf, size_t len)
{
	bool flush;
	int rc = strtobool(buf, &flush);
	struct nd_region *nd_region = to_nd_region(dev);

	if (rc)
		return rc;
	if (!flush)
		return -EINVAL;
290
291
292
	rc = nvdimm_flush(nd_region, NULL);
	if (rc)
		return rc;
293
294
295
296
297

	return len;
}
static DEVICE_ATTR_RW(deep_flush);

298
299
300
301
302
303
304
305
306
static ssize_t mappings_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct nd_region *nd_region = to_nd_region(dev);

	return sprintf(buf, "%d\n", nd_region->ndr_mappings);
}
static DEVICE_ATTR_RO(mappings);

307
308
309
310
311
312
313
314
315
static ssize_t nstype_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct nd_region *nd_region = to_nd_region(dev);

	return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region));
}
static DEVICE_ATTR_RO(nstype);

316
317
318
319
320
static ssize_t set_cookie_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct nd_region *nd_region = to_nd_region(dev);
	struct nd_interleave_set *nd_set = nd_region->nd_set;
321
	ssize_t rc = 0;
322

323
	if (is_memory(dev) && nd_set)
324
325
326
327
		/* pass, should be precluded by region_visible */;
	else
		return -ENXIO;

328
329
330
331
332
333
	/*
	 * The cookie to show depends on which specification of the
	 * labels we are using. If there are not labels then default to
	 * the v1.1 namespace label cookie definition. To read all this
	 * data we need to wait for probing to settle.
	 */
334
	nd_device_lock(dev);
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
	nvdimm_bus_lock(dev);
	wait_nvdimm_bus_probe_idle(dev);
	if (nd_region->ndr_mappings) {
		struct nd_mapping *nd_mapping = &nd_region->mapping[0];
		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);

		if (ndd) {
			struct nd_namespace_index *nsindex;

			nsindex = to_namespace_index(ndd, ndd->ns_current);
			rc = sprintf(buf, "%#llx\n",
					nd_region_interleave_set_cookie(nd_region,
						nsindex));
		}
	}
	nvdimm_bus_unlock(dev);
351
	nd_device_unlock(dev);
352
353
354
355

	if (rc)
		return rc;
	return sprintf(buf, "%#llx\n", nd_set->cookie1);
356
357
358
}
static DEVICE_ATTR_RO(set_cookie);

359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
{
	resource_size_t blk_max_overlap = 0, available, overlap;
	int i;

	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));

 retry:
	available = 0;
	overlap = blk_max_overlap;
	for (i = 0; i < nd_region->ndr_mappings; i++) {
		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);

		/* if a dimm is disabled the available capacity is zero */
		if (!ndd)
			return 0;

377
		if (is_memory(&nd_region->dev)) {
378
379
380
381
382
383
			available += nd_pmem_available_dpa(nd_region,
					nd_mapping, &overlap);
			if (overlap > blk_max_overlap) {
				blk_max_overlap = overlap;
				goto retry;
			}
384
385
		} else if (is_nd_blk(&nd_region->dev))
			available += nd_blk_available_dpa(nd_region);
386
387
388
389
390
	}

	return available;
}

391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
resource_size_t nd_region_allocatable_dpa(struct nd_region *nd_region)
{
	resource_size_t available = 0;
	int i;

	if (is_memory(&nd_region->dev))
		available = PHYS_ADDR_MAX;

	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
	for (i = 0; i < nd_region->ndr_mappings; i++) {
		struct nd_mapping *nd_mapping = &nd_region->mapping[i];

		if (is_memory(&nd_region->dev))
			available = min(available,
					nd_pmem_max_contiguous_dpa(nd_region,
								   nd_mapping));
		else if (is_nd_blk(&nd_region->dev))
			available += nd_blk_available_dpa(nd_region);
	}
	if (is_memory(&nd_region->dev))
		return available * nd_region->ndr_mappings;
	return available;
}

415
416
417
418
419
420
421
422
423
424
425
426
static ssize_t available_size_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct nd_region *nd_region = to_nd_region(dev);
	unsigned long long available = 0;

	/*
	 * Flush in-flight updates and grab a snapshot of the available
	 * size.  Of course, this value is potentially invalidated the
	 * memory nvdimm_bus_lock() is dropped, but that's userspace's
	 * problem to not race itself.
	 */
427
	nd_device_lock(dev);
428
429
430
431
	nvdimm_bus_lock(dev);
	wait_nvdimm_bus_probe_idle(dev);
	available = nd_region_available_dpa(nd_region);
	nvdimm_bus_unlock(dev);
432
	nd_device_unlock(dev);
433
434
435
436
437

	return sprintf(buf, "%llu\n", available);
}
static DEVICE_ATTR_RO(available_size);

438
439
440
441
442
443
static ssize_t max_available_extent_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct nd_region *nd_region = to_nd_region(dev);
	unsigned long long available = 0;

444
	nd_device_lock(dev);
445
446
447
448
	nvdimm_bus_lock(dev);
	wait_nvdimm_bus_probe_idle(dev);
	available = nd_region_allocatable_dpa(nd_region);
	nvdimm_bus_unlock(dev);
449
	nd_device_unlock(dev);
450
451
452
453
454

	return sprintf(buf, "%llu\n", available);
}
static DEVICE_ATTR_RO(max_available_extent);

455
456
457
static ssize_t init_namespaces_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
458
	struct nd_region_data *ndrd = dev_get_drvdata(dev);
459
460
461
	ssize_t rc;

	nvdimm_bus_lock(dev);
462
463
	if (ndrd)
		rc = sprintf(buf, "%d/%d\n", ndrd->ns_active, ndrd->ns_count);
464
465
466
467
468
469
470
471
	else
		rc = -ENXIO;
	nvdimm_bus_unlock(dev);

	return rc;
}
static DEVICE_ATTR_RO(init_namespaces);

472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
static ssize_t namespace_seed_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct nd_region *nd_region = to_nd_region(dev);
	ssize_t rc;

	nvdimm_bus_lock(dev);
	if (nd_region->ns_seed)
		rc = sprintf(buf, "%s\n", dev_name(nd_region->ns_seed));
	else
		rc = sprintf(buf, "\n");
	nvdimm_bus_unlock(dev);
	return rc;
}
static DEVICE_ATTR_RO(namespace_seed);

488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
static ssize_t btt_seed_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct nd_region *nd_region = to_nd_region(dev);
	ssize_t rc;

	nvdimm_bus_lock(dev);
	if (nd_region->btt_seed)
		rc = sprintf(buf, "%s\n", dev_name(nd_region->btt_seed));
	else
		rc = sprintf(buf, "\n");
	nvdimm_bus_unlock(dev);

	return rc;
}
static DEVICE_ATTR_RO(btt_seed);

505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
static ssize_t pfn_seed_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct nd_region *nd_region = to_nd_region(dev);
	ssize_t rc;

	nvdimm_bus_lock(dev);
	if (nd_region->pfn_seed)
		rc = sprintf(buf, "%s\n", dev_name(nd_region->pfn_seed));
	else
		rc = sprintf(buf, "\n");
	nvdimm_bus_unlock(dev);

	return rc;
}
static DEVICE_ATTR_RO(pfn_seed);

522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
static ssize_t dax_seed_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct nd_region *nd_region = to_nd_region(dev);
	ssize_t rc;

	nvdimm_bus_lock(dev);
	if (nd_region->dax_seed)
		rc = sprintf(buf, "%s\n", dev_name(nd_region->dax_seed));
	else
		rc = sprintf(buf, "\n");
	nvdimm_bus_unlock(dev);

	return rc;
}
static DEVICE_ATTR_RO(dax_seed);

539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
static ssize_t read_only_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct nd_region *nd_region = to_nd_region(dev);

	return sprintf(buf, "%d\n", nd_region->ro);
}

static ssize_t read_only_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	bool ro;
	int rc = strtobool(buf, &ro);
	struct nd_region *nd_region = to_nd_region(dev);

	if (rc)
		return rc;

	nd_region->ro = ro;
	return len;
}
static DEVICE_ATTR_RW(read_only);

562
static ssize_t region_badblocks_show(struct device *dev,
563
564
565
		struct device_attribute *attr, char *buf)
{
	struct nd_region *nd_region = to_nd_region(dev);
566
	ssize_t rc;
567

568
	nd_device_lock(dev);
569
570
571
572
	if (dev->driver)
		rc = badblocks_show(&nd_region->bb, buf, 0);
	else
		rc = -ENXIO;
573
	nd_device_unlock(dev);
574

575
576
	return rc;
}
577
static DEVICE_ATTR(badblocks, 0444, region_badblocks_show, NULL);
578

579
580
581
582
583
584
585
586
587
static ssize_t resource_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct nd_region *nd_region = to_nd_region(dev);

	return sprintf(buf, "%#llx\n", nd_region->ndr_start);
}
static DEVICE_ATTR_RO(resource);

588
589
590
591
592
static ssize_t persistence_domain_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct nd_region *nd_region = to_nd_region(dev);

593
594
595
596
597
598
	if (test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags))
		return sprintf(buf, "cpu_cache\n");
	else if (test_bit(ND_REGION_PERSIST_MEMCTRL, &nd_region->flags))
		return sprintf(buf, "memory_controller\n");
	else
		return sprintf(buf, "\n");
599
600
601
}
static DEVICE_ATTR_RO(persistence_domain);

602
603
static struct attribute *nd_region_attributes[] = {
	&dev_attr_size.attr,
604
	&dev_attr_nstype.attr,
605
	&dev_attr_mappings.attr,
606
	&dev_attr_btt_seed.attr,
607
	&dev_attr_pfn_seed.attr,
608
	&dev_attr_dax_seed.attr,
609
	&dev_attr_deep_flush.attr,
610
	&dev_attr_read_only.attr,
611
	&dev_attr_set_cookie.attr,
612
	&dev_attr_available_size.attr,
613
	&dev_attr_max_available_extent.attr,
614
	&dev_attr_namespace_seed.attr,
615
	&dev_attr_init_namespaces.attr,
616
	&dev_attr_badblocks.attr,
617
	&dev_attr_resource.attr,
618
	&dev_attr_persistence_domain.attr,
619
620
621
	NULL,
};

622
623
624
625
626
static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
{
	struct device *dev = container_of(kobj, typeof(*dev), kobj);
	struct nd_region *nd_region = to_nd_region(dev);
	struct nd_interleave_set *nd_set = nd_region->nd_set;
627
	int type = nd_region_to_nstype(nd_region);
628

629
	if (!is_memory(dev) && a == &dev_attr_pfn_seed.attr)
630
631
		return 0;

632
	if (!is_memory(dev) && a == &dev_attr_dax_seed.attr)
633
634
		return 0;

635
	if (!is_memory(dev) && a == &dev_attr_badblocks.attr)
636
637
		return 0;

638
	if (a == &dev_attr_resource.attr) {
639
		if (is_memory(dev))
640
641
642
643
			return 0400;
		else
			return 0;
	}
644

645
646
647
648
649
650
651
652
653
654
655
	if (a == &dev_attr_deep_flush.attr) {
		int has_flush = nvdimm_has_flush(nd_region);

		if (has_flush == 1)
			return a->mode;
		else if (has_flush == 0)
			return 0444;
		else
			return 0;
	}

656
657
658
659
660
661
662
	if (a == &dev_attr_persistence_domain.attr) {
		if ((nd_region->flags & (BIT(ND_REGION_PERSIST_CACHE)
					| BIT(ND_REGION_PERSIST_MEMCTRL))) == 0)
			return 0;
		return a->mode;
	}

663
664
	if (a != &dev_attr_set_cookie.attr
			&& a != &dev_attr_available_size.attr)
665
666
		return a->mode;

667
668
669
670
	if ((type == ND_DEVICE_NAMESPACE_PMEM
				|| type == ND_DEVICE_NAMESPACE_BLK)
			&& a == &dev_attr_available_size.attr)
		return a->mode;
671
	else if (is_memory(dev) && nd_set)
672
		return a->mode;
673
674
675
676

	return 0;
}

677
678
struct attribute_group nd_region_attribute_group = {
	.attrs = nd_region_attributes,
679
	.is_visible = region_visible,
680
681
682
};
EXPORT_SYMBOL_GPL(nd_region_attribute_group);

683
684
u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
		struct nd_namespace_index *nsindex)
685
686
687
{
	struct nd_interleave_set *nd_set = nd_region->nd_set;

688
689
690
691
692
693
694
	if (!nd_set)
		return 0;

	if (nsindex && __le16_to_cpu(nsindex->major) == 1
			&& __le16_to_cpu(nsindex->minor) == 1)
		return nd_set->cookie1;
	return nd_set->cookie2;
695
696
}

697
698
699
700
701
702
703
704
705
u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region)
{
	struct nd_interleave_set *nd_set = nd_region->nd_set;

	if (nd_set)
		return nd_set->altcookie;
	return 0;
}

706
707
708
709
void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
{
	struct nd_label_ent *label_ent, *e;

710
	lockdep_assert_held(&nd_mapping->lock);
711
712
713
714
715
716
	list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
		list_del(&label_ent->list);
		kfree(label_ent);
	}
}

717
/*
718
719
 * When a namespace is activated create new seeds for the next
 * namespace, or namespace-personality to be configured.
720
 */
721
void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev)
722
{
723
724
725
726
	nvdimm_bus_lock(dev);
	if (nd_region->ns_seed == dev) {
		nd_region_create_ns_seed(nd_region);
	} else if (is_nd_btt(dev)) {
727
728
		struct nd_btt *nd_btt = to_nd_btt(dev);

729
730
		if (nd_region->btt_seed == dev)
			nd_region_create_btt_seed(nd_region);
731
732
		if (nd_region->ns_seed == &nd_btt->ndns->dev)
			nd_region_create_ns_seed(nd_region);
733
	} else if (is_nd_pfn(dev)) {
734
735
		struct nd_pfn *nd_pfn = to_nd_pfn(dev);

736
737
		if (nd_region->pfn_seed == dev)
			nd_region_create_pfn_seed(nd_region);
738
739
		if (nd_region->ns_seed == &nd_pfn->ndns->dev)
			nd_region_create_ns_seed(nd_region);
740
	} else if (is_nd_dax(dev)) {
741
742
		struct nd_dax *nd_dax = to_nd_dax(dev);

743
744
		if (nd_region->dax_seed == dev)
			nd_region_create_dax_seed(nd_region);
745
746
		if (nd_region->ns_seed == &nd_dax->nd_pfn.ndns->dev)
			nd_region_create_ns_seed(nd_region);
747
	}
748
	nvdimm_bus_unlock(dev);
749
750
}

751
752
753
754
755
756
757
758
759
760
761
static ssize_t mappingN(struct device *dev, char *buf, int n)
{
	struct nd_region *nd_region = to_nd_region(dev);
	struct nd_mapping *nd_mapping;
	struct nvdimm *nvdimm;

	if (n >= nd_region->ndr_mappings)
		return -ENXIO;
	nd_mapping = &nd_region->mapping[n];
	nvdimm = nd_mapping->nvdimm;

762
763
764
	return sprintf(buf, "%s,%llu,%llu,%d\n", dev_name(&nvdimm->dev),
			nd_mapping->start, nd_mapping->size,
			nd_mapping->position);
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
}

#define REGION_MAPPING(idx) \
static ssize_t mapping##idx##_show(struct device *dev,		\
		struct device_attribute *attr, char *buf)	\
{								\
	return mappingN(dev, buf, idx);				\
}								\
static DEVICE_ATTR_RO(mapping##idx)

/*
 * 32 should be enough for a while, even in the presence of socket
 * interleave a 32-way interleave set is a degenerate case.
 */
REGION_MAPPING(0);
REGION_MAPPING(1);
REGION_MAPPING(2);
REGION_MAPPING(3);
REGION_MAPPING(4);
REGION_MAPPING(5);
REGION_MAPPING(6);
REGION_MAPPING(7);
REGION_MAPPING(8);
REGION_MAPPING(9);
REGION_MAPPING(10);
REGION_MAPPING(11);
REGION_MAPPING(12);
REGION_MAPPING(13);
REGION_MAPPING(14);
REGION_MAPPING(15);
REGION_MAPPING(16);
REGION_MAPPING(17);
REGION_MAPPING(18);
REGION_MAPPING(19);
REGION_MAPPING(20);
REGION_MAPPING(21);
REGION_MAPPING(22);
REGION_MAPPING(23);
REGION_MAPPING(24);
REGION_MAPPING(25);
REGION_MAPPING(26);
REGION_MAPPING(27);
REGION_MAPPING(28);
REGION_MAPPING(29);
REGION_MAPPING(30);
REGION_MAPPING(31);

static umode_t mapping_visible(struct kobject *kobj, struct attribute *a, int n)
{
	struct device *dev = container_of(kobj, struct device, kobj);
	struct nd_region *nd_region = to_nd_region(dev);

	if (n < nd_region->ndr_mappings)
		return a->mode;
	return 0;
}

static struct attribute *mapping_attributes[] = {
	&dev_attr_mapping0.attr,
	&dev_attr_mapping1.attr,
	&dev_attr_mapping2.attr,
	&dev_attr_mapping3.attr,
	&dev_attr_mapping4.attr,
	&dev_attr_mapping5.attr,
	&dev_attr_mapping6.attr,
	&dev_attr_mapping7.attr,
	&dev_attr_mapping8.attr,
	&dev_attr_mapping9.attr,
	&dev_attr_mapping10.attr,
	&dev_attr_mapping11.attr,
	&dev_attr_mapping12.attr,
	&dev_attr_mapping13.attr,
	&dev_attr_mapping14.attr,
	&dev_attr_mapping15.attr,
	&dev_attr_mapping16.attr,
	&dev_attr_mapping17.attr,
	&dev_attr_mapping18.attr,
	&dev_attr_mapping19.attr,
	&dev_attr_mapping20.attr,
	&dev_attr_mapping21.attr,
	&dev_attr_mapping22.attr,
	&dev_attr_mapping23.attr,
	&dev_attr_mapping24.attr,
	&dev_attr_mapping25.attr,
	&dev_attr_mapping26.attr,
	&dev_attr_mapping27.attr,
	&dev_attr_mapping28.attr,
	&dev_attr_mapping29.attr,
	&dev_attr_mapping30.attr,
	&dev_attr_mapping31.attr,
	NULL,
};

struct attribute_group nd_mapping_attribute_group = {
	.is_visible = mapping_visible,
	.attrs = mapping_attributes,
};
EXPORT_SYMBOL_GPL(nd_mapping_attribute_group);

864
int nd_blk_region_init(struct nd_region *nd_region)
865
{
866
867
868
869
870
871
872
	struct device *dev = &nd_region->dev;
	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);

	if (!is_nd_blk(dev))
		return 0;

	if (nd_region->ndr_mappings < 1) {
873
		dev_dbg(dev, "invalid BLK region\n");
874
875
876
877
		return -ENXIO;
	}

	return to_nd_blk_region(dev)->enable(nvdimm_bus, dev);
878
879
}

Vishal Verma's avatar
Vishal Verma committed
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
/**
 * nd_region_acquire_lane - allocate and lock a lane
 * @nd_region: region id and number of lanes possible
 *
 * A lane correlates to a BLK-data-window and/or a log slot in the BTT.
 * We optimize for the common case where there are 256 lanes, one
 * per-cpu.  For larger systems we need to lock to share lanes.  For now
 * this implementation assumes the cost of maintaining an allocator for
 * free lanes is on the order of the lock hold time, so it implements a
 * static lane = cpu % num_lanes mapping.
 *
 * In the case of a BTT instance on top of a BLK namespace a lane may be
 * acquired recursively.  We lock on the first instance.
 *
 * In the case of a BTT instance on top of PMEM, we only acquire a lane
 * for the BTT metadata updates.
 */
unsigned int nd_region_acquire_lane(struct nd_region *nd_region)
{
	unsigned int cpu, lane;

	cpu = get_cpu();
	if (nd_region->num_lanes < nr_cpu_ids) {
		struct nd_percpu_lane *ndl_lock, *ndl_count;

		lane = cpu % nd_region->num_lanes;
		ndl_count = per_cpu_ptr(nd_region->lane, cpu);
		ndl_lock = per_cpu_ptr(nd_region->lane, lane);
		if (ndl_count->count++ == 0)
			spin_lock(&ndl_lock->lock);
	} else
		lane = cpu;

	return lane;
}
EXPORT_SYMBOL(nd_region_acquire_lane);

void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane)
{
	if (nd_region->num_lanes < nr_cpu_ids) {
		unsigned int cpu = get_cpu();
		struct nd_percpu_lane *ndl_lock, *ndl_count;

		ndl_count = per_cpu_ptr(nd_region->lane, cpu);
		ndl_lock = per_cpu_ptr(nd_region->lane, lane);
		if (--ndl_count->count == 0)
			spin_unlock(&ndl_lock->lock);
		put_cpu();
	}
	put_cpu();
}
EXPORT_SYMBOL(nd_region_release_lane);

933
934
935
936
937
938
static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
		struct nd_region_desc *ndr_desc, struct device_type *dev_type,
		const char *caller)
{
	struct nd_region *nd_region;
	struct device *dev;
939
	void *region_buf;
Vishal Verma's avatar
Vishal Verma committed
940
	unsigned int i;
941
	int ro = 0;
942
943

	for (i = 0; i < ndr_desc->num_mappings; i++) {
944
945
		struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
		struct nvdimm *nvdimm = mapping->nvdimm;
946

947
948
949
950
		if ((mapping->start | mapping->size) % PAGE_SIZE) {
			dev_err(&nvdimm_bus->dev,
				"%s: %s mapping%d is not %ld aligned\n",
				caller, dev_name(&nvdimm->dev), i, PAGE_SIZE);
951
952
			return NULL;
		}
953

954
		if (test_bit(NDD_UNARMED, &nvdimm->flags))
955
			ro = 1;
956
957
958
959
960
961
962

		if (test_bit(NDD_NOBLK, &nvdimm->flags)
				&& dev_type == &nd_blk_device_type) {
			dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not BLK capable\n",
					caller, dev_name(&nvdimm->dev), i);
			return NULL;
		}
963
964
	}

965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
	if (dev_type == &nd_blk_device_type) {
		struct nd_blk_region_desc *ndbr_desc;
		struct nd_blk_region *ndbr;

		ndbr_desc = to_blk_region_desc(ndr_desc);
		ndbr = kzalloc(sizeof(*ndbr) + sizeof(struct nd_mapping)
				* ndr_desc->num_mappings,
				GFP_KERNEL);
		if (ndbr) {
			nd_region = &ndbr->nd_region;
			ndbr->enable = ndbr_desc->enable;
			ndbr->do_io = ndbr_desc->do_io;
		}
		region_buf = ndbr;
	} else {
980
981
982
		nd_region = kzalloc(struct_size(nd_region, mapping,
						ndr_desc->num_mappings),
				    GFP_KERNEL);
983
984
985
986
		region_buf = nd_region;
	}

	if (!region_buf)
987
988
		return NULL;
	nd_region->id = ida_simple_get(&region_ida, 0, 0, GFP_KERNEL);
Vishal Verma's avatar
Vishal Verma committed
989
990
991
992
993
994
995
996
997
998
999
1000
1001
	if (nd_region->id < 0)
		goto err_id;

	nd_region->lane = alloc_percpu(struct nd_percpu_lane);
	if (!nd_region->lane)
		goto err_percpu;

        for (i = 0; i < nr_cpu_ids; i++) {
		struct nd_percpu_lane *ndl;

		ndl = per_cpu_ptr(nd_region->lane, i);
		spin_lock_init(&ndl->lock);
		ndl->count = 0;
1002
1003
1004
	}

	for (i = 0; i < ndr_desc->num_mappings; i++) {
1005
1006
1007
1008
1009
1010
		struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
		struct nvdimm *nvdimm = mapping->nvdimm;

		nd_region->mapping[i].nvdimm = nvdimm;
		nd_region->mapping[i].start = mapping->start;
		nd_region->mapping[i].size = mapping->size;
1011
		nd_region->mapping[i].position = mapping->position;
1012
1013
		INIT_LIST_HEAD(&nd_region->mapping[i].labels);
		mutex_init(&nd_region->mapping[i].lock);
1014
1015
1016
1017
1018

		get_device(&nvdimm->dev);
	}
	nd_region->ndr_mappings = ndr_desc->num_mappings;
	nd_region->provider_data = ndr_desc->provider_data;
1019
	nd_region->nd_set = ndr_desc->nd_set;
Vishal Verma's avatar
Vishal Verma committed
1020
	nd_region->num_lanes = ndr_desc->num_lanes;
1021
	nd_region->flags = ndr_desc->flags;
1022
	nd_region->ro = ro;
1023
	nd_region->numa_node = ndr_desc->numa_node;
1024
	nd_region->target_node = ndr_desc->target_node;
1025
	ida_init(&nd_region->ns_ida);
1026
	ida_init(&nd_region->btt_ida);
1027
	ida_init(&nd_region->pfn_ida);
1028
	ida_init(&nd_region->dax_ida);
1029
1030
1031
1032
1033
	dev = &nd_region->dev;
	dev_set_name(dev, "region%d", nd_region->id);
	dev->parent = &nvdimm_bus->dev;
	dev->type = dev_type;
	dev->groups = ndr_desc->attr_groups;
1034
	dev->of_node = ndr_desc->of_node;
1035
1036
	nd_region->ndr_size = resource_size(ndr_desc->res);
	nd_region->ndr_start = ndr_desc->res->start;
1037
1038
1039
1040
1041
	if (ndr_desc->flush)
		nd_region->flush = ndr_desc->flush;
	else
		nd_region->flush = NULL;

1042
1043
1044
	nd_device_register(dev);

	return nd_region;
Vishal Verma's avatar
Vishal Verma committed
1045
1046
1047
1048

 err_percpu:
	ida_simple_remove(&region_ida, nd_region->id);
 err_id:
1049
	kfree(region_buf);
Vishal Verma's avatar
Vishal Verma committed
1050
	return NULL;
1051
1052
1053
1054
1055
}

struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
		struct nd_region_desc *ndr_desc)
{
Vishal Verma's avatar
Vishal Verma committed
1056
	ndr_desc->num_lanes = ND_MAX_LANES;
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
	return nd_region_create(nvdimm_bus, ndr_desc, &nd_pmem_device_type,
			__func__);
}
EXPORT_SYMBOL_GPL(nvdimm_pmem_region_create);

struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
		struct nd_region_desc *ndr_desc)
{
	if (ndr_desc->num_mappings > 1)
		return NULL;
Vishal Verma's avatar
Vishal Verma committed
1067
	ndr_desc->num_lanes = min(ndr_desc->num_lanes, ND_MAX_LANES);
1068
1069
1070
1071
1072
1073
1074
1075
	return nd_region_create(nvdimm_bus, ndr_desc, &nd_blk_device_type,
			__func__);
}
EXPORT_SYMBOL_GPL(nvdimm_blk_region_create);

struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
		struct nd_region_desc *ndr_desc)
{
Vishal Verma's avatar
Vishal Verma committed
1076
	ndr_desc->num_lanes = ND_MAX_LANES;
1077
1078
1079
1080
	return nd_region_create(nvdimm_bus, ndr_desc, &nd_volatile_device_type,
			__func__);
}
EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
1081

1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
{
	int rc = 0;

	if (!nd_region->flush)
		rc = generic_nvdimm_flush(nd_region);
	else {
		if (nd_region->flush(nd_region, bio))
			rc = -EIO;
	}

	return rc;
}
1095
1096
1097
1098
/**
 * nvdimm_flush - flush any posted write queues between the cpu and pmem media
 * @nd_region: blk or interleaved pmem region
 */
1099
int generic_nvdimm_flush(struct nd_region *nd_region)
1100
1101
{
	struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
Dan Williams's avatar
Dan Williams committed
1102
1103
1104
1105
1106
1107
1108
1109
	int i, idx;

	/*
	 * Try to encourage some diversity in flush hint addresses
	 * across cpus assuming a limited number of flush hints.
	 */
	idx = this_cpu_read(flush_idx);
	idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
1110
1111
1112
1113
1114

	/*
	 * The first wmb() is needed to 'sfence' all previous writes
	 * such that they are architecturally visible for the platform
	 * buffer flush.  Note that we've already arranged for pmem
1115
1116
	 * writes to avoid the cache via memcpy_flushcache().  The final
	 * wmb() ensures ordering for the NVDIMM flush write.
1117
1118
1119
	 */
	wmb();
	for (i = 0; i < nd_region->ndr_mappings; i++)
1120
1121
		if (ndrd_get_flush_wpq(ndrd, i, 0))
			writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
1122
	wmb();
1123
1124

	return 0;
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
}
EXPORT_SYMBOL_GPL(nvdimm_flush);

/**
 * nvdimm_has_flush - determine write flushing requirements
 * @nd_region: blk or interleaved pmem region
 *
 * Returns 1 if writes require flushing
 * Returns 0 if writes do not require flushing
 * Returns -ENXIO if flushing capability can not be determined
 */
int nvdimm_has_flush(struct nd_region *nd_region)
{
	int i;

1140
1141
1142
	/* no nvdimm or pmem api == flushing capability unknown */
	if (nd_region->ndr_mappings == 0
			|| !IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API))
1143
1144
		return -ENXIO;

1145
1146
1147
1148
1149
1150
	for (i = 0; i < nd_region->ndr_mappings; i++) {
		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
		struct nvdimm *nvdimm = nd_mapping->nvdimm;

		/* flush hints present / available */
		if (nvdimm->num_flush)
1151
			return 1;
1152
	}
1153
1154
1155
1156
1157
1158
1159
1160
1161

	/*
	 * The platform defines dimm devices without hints, assume
	 * platform persistence mechanism like ADR
	 */
	return 0;
}
EXPORT_SYMBOL_GPL(nvdimm_has_flush);

1162
1163
int nvdimm_has_cache(struct nd_region *nd_region)
{
1164
1165
	return is_nd_pmem(&nd_region->dev) &&
		!test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags);
1166
1167
1168
}
EXPORT_SYMBOL_GPL(nvdimm_has_cache);

1169
1170
bool is_nvdimm_sync(struct nd_region *nd_region)
{
1171
1172
1173
	if (is_nd_volatile(&nd_region->dev))
		return true;

1174
1175
1176
1177
1178
	return is_nd_pmem(&nd_region->dev) &&
		!test_bit(ND_REGION_ASYNC, &nd_region->flags);
}
EXPORT_SYMBOL_GPL(is_nvdimm_sync);

1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
struct conflict_context {
	struct nd_region *nd_region;
	resource_size_t start, size;
};

static int region_conflict(struct device *dev, void *data)
{
	struct nd_region *nd_region;
	struct conflict_context *ctx = data;
	resource_size_t res_end, region_end, region_start;

	if (!is_memory(dev))
		return 0;

	nd_region = to_nd_region(dev);
	if (nd_region == ctx->nd_region)
		return 0;

	res_end = ctx->start + ctx->size;
	region_start = nd_region->ndr_start;
	region_end = region_start + nd_region->ndr_size;
	if (ctx->start >= region_start && ctx->start < region_end)
		return -EBUSY;
	if (res_end > region_start && res_end <= region_end)
		return -EBUSY;
	return 0;
}

int nd_region_conflict(struct nd_region *nd_region, resource_size_t start,
		resource_size_t size)
{
	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
	struct conflict_context ctx = {
		.nd_region = nd_region,
		.start = start,
		.size = size,
	};

	return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict);
}

1220
1221
1222
1223
void __exit nd_region_devs_exit(void)
{
	ida_destroy(&region_ida);
}