init_64.c 77.8 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
Adrian Bunk's avatar
Adrian Bunk committed
2
/*
Linus Torvalds's avatar
Linus Torvalds committed
3
4
5
6
7
8
 *  arch/sparc64/mm/init.c
 *
 *  Copyright (C) 1996-1999 David S. Miller (davem@caip.rutgers.edu)
 *  Copyright (C) 1997-1999 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
 */
 
9
#include <linux/extable.h>
Linus Torvalds's avatar
Linus Torvalds committed
10
11
12
13
14
15
16
17
18
19
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/initrd.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
20
#include <linux/poison.h>
Linus Torvalds's avatar
Linus Torvalds committed
21
22
#include <linux/fs.h>
#include <linux/seq_file.h>
23
#include <linux/kprobes.h>
24
#include <linux/cache.h>
25
#include <linux/sort.h>
26
#include <linux/ioport.h>
27
#include <linux/percpu.h>
Yinghai Lu's avatar
Yinghai Lu committed
28
#include <linux/memblock.h>
David S. Miller's avatar
David S. Miller committed
29
#include <linux/mmzone.h>
30
#include <linux/gfp.h>
Linus Torvalds's avatar
Linus Torvalds committed
31
32
33
34
35
36
37
38

#include <asm/head.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/oplib.h>
#include <asm/iommu.h>
#include <asm/io.h>
39
#include <linux/uaccess.h>
Linus Torvalds's avatar
Linus Torvalds committed
40
41
42
43
44
45
46
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
#include <asm/dma.h>
#include <asm/starfire.h>
#include <asm/tlb.h>
#include <asm/spitfire.h>
#include <asm/sections.h>
47
#include <asm/tsb.h>
48
#include <asm/hypervisor.h>
49
#include <asm/prom.h>
50
#include <asm/mdesc.h>
51
#include <asm/cpudata.h>
52
#include <asm/setup.h>
53
#include <asm/irq.h>
Linus Torvalds's avatar
Linus Torvalds committed
54

Sam Ravnborg's avatar
Sam Ravnborg committed
55
#include "init_64.h"
56

57
unsigned long kern_linear_pte_xor[4] __read_mostly;
58
static unsigned long page_cache4v_flag;
59

60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/* A bitmap, two bits for every 256MB of physical memory.  These two
 * bits determine what page size we use for kernel linear
 * translations.  They form an index into kern_linear_pte_xor[].  The
 * value in the indexed slot is XOR'd with the TLB miss virtual
 * address to form the resulting TTE.  The mapping is:
 *
 *	0	==>	4MB
 *	1	==>	256MB
 *	2	==>	2GB
 *	3	==>	16GB
 *
 * All sun4v chips support 256MB pages.  Only SPARC-T4 and later
 * support 2GB pages, and hopefully future cpus will support the 16GB
 * pages as well.  For slots 2 and 3, we encode a 256MB TTE xor there
 * if these larger page sizes are not supported by the cpu.
 *
 * It would be nice to determine this from the machine description
 * 'cpu' properties, but we need to have this table setup before the
 * MDESC is initialized.
79
80
 */

81
#ifndef CONFIG_DEBUG_PAGEALLOC
82
83
84
/* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
 * Space is allocated for this right after the trap table in
 * arch/sparc64/kernel/head.S
85
86
 */
extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
87
#endif
88
extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
89

90
91
static unsigned long cpu_pgsz_mask;

92
#define MAX_BANKS	1024
93

94
95
static struct linux_prom64_registers pavail[MAX_BANKS];
static int pavail_ents;
96

97
98
u64 numa_latency[MAX_NUMNODES][MAX_NUMNODES];

99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
static int cmp_p64(const void *a, const void *b)
{
	const struct linux_prom64_registers *x = a, *y = b;

	if (x->phys_addr > y->phys_addr)
		return 1;
	if (x->phys_addr < y->phys_addr)
		return -1;
	return 0;
}

static void __init read_obp_memory(const char *property,
				   struct linux_prom64_registers *regs,
				   int *num_ents)
{
114
	phandle node = prom_finddevice("/memory");
115
116
117
118
119
120
121
122
123
124
125
126
127
	int prop_size = prom_getproplen(node, property);
	int ents, ret, i;

	ents = prop_size / sizeof(struct linux_prom64_registers);
	if (ents > MAX_BANKS) {
		prom_printf("The machine has more %s property entries than "
			    "this kernel can support (%d).\n",
			    property, MAX_BANKS);
		prom_halt();
	}

	ret = prom_getproperty(node, property, (char *) regs, prop_size);
	if (ret == -1) {
128
129
		prom_printf("Couldn't get %s property from /memory.\n",
				property);
130
131
132
133
134
135
136
137
138
139
140
		prom_halt();
	}

	/* Sanitize what we got from the firmware, by page aligning
	 * everything.
	 */
	for (i = 0; i < ents; i++) {
		unsigned long base, size;

		base = regs[i].phys_addr;
		size = regs[i].reg_size;
141

142
143
144
145
146
147
148
149
150
		size &= PAGE_MASK;
		if (base & ~PAGE_MASK) {
			unsigned long new_base = PAGE_ALIGN(base);

			size -= new_base - base;
			if ((long) size < 0L)
				size = 0UL;
			base = new_base;
		}
151
152
153
154
155
156
157
		if (size == 0UL) {
			/* If it is empty, simply get rid of it.
			 * This simplifies the logic of the other
			 * functions that process these arrays.
			 */
			memmove(&regs[i], &regs[i + 1],
				(ents - i - 1) * sizeof(regs[0]));
158
			i--;
159
160
			ents--;
			continue;
161
		}
162
163
		regs[i].phys_addr = base;
		regs[i].reg_size = size;
164
165
166
167
	}

	*num_ents = ents;

168
	sort(regs, ents, sizeof(struct linux_prom64_registers),
169
170
	     cmp_p64, NULL);
}
Linus Torvalds's avatar
Linus Torvalds committed
171

172
/* Kernel physical address base and size in bytes.  */
173
174
unsigned long kern_base __read_mostly;
unsigned long kern_size __read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed
175
176
177
178
179
180

/* Initial ramdisk setup */
extern unsigned long sparc_ramdisk_image64;
extern unsigned int sparc_ramdisk_image;
extern unsigned int sparc_ramdisk_size;

181
struct page *mem_map_zero __read_mostly;
182
EXPORT_SYMBOL(mem_map_zero);
Linus Torvalds's avatar
Linus Torvalds committed
183

184
185
186
187
188
189
unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly;

unsigned long sparc64_kern_pri_context __read_mostly;
unsigned long sparc64_kern_pri_nuc_bits __read_mostly;
unsigned long sparc64_kern_sec_context __read_mostly;

190
int num_kernel_image_mappings;
Linus Torvalds's avatar
Linus Torvalds committed
191
192
193
194
195
196
197
198

#ifdef CONFIG_DEBUG_DCFLUSH
atomic_t dcpage_flushes = ATOMIC_INIT(0);
#ifdef CONFIG_SMP
atomic_t dcpage_flushes_xcall = ATOMIC_INIT(0);
#endif
#endif

199
inline void flush_dcache_page_impl(struct page *page)
Linus Torvalds's avatar
Linus Torvalds committed
200
{
201
	BUG_ON(tlb_type == hypervisor);
Linus Torvalds's avatar
Linus Torvalds committed
202
203
204
205
206
207
208
#ifdef CONFIG_DEBUG_DCFLUSH
	atomic_inc(&dcpage_flushes);
#endif

#ifdef DCACHE_ALIASING_POSSIBLE
	__flush_dcache_page(page_address(page),
			    ((tlb_type == spitfire) &&
209
			     page_mapping_file(page) != NULL));
Linus Torvalds's avatar
Linus Torvalds committed
210
#else
211
	if (page_mapping_file(page) != NULL &&
Linus Torvalds's avatar
Linus Torvalds committed
212
213
214
215
216
217
	    tlb_type == spitfire)
		__flush_icache_page(__pa(page_address(page)));
#endif
}

#define PG_dcache_dirty		PG_arch_1
218
219
220
#define PG_dcache_cpu_shift	32UL
#define PG_dcache_cpu_mask	\
	((1UL<<ilog2(roundup_pow_of_two(NR_CPUS)))-1UL)
Linus Torvalds's avatar
Linus Torvalds committed
221
222

#define dcache_dirty_cpu(page) \
223
	(((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask)
Linus Torvalds's avatar
Linus Torvalds committed
224

225
static inline void set_dcache_dirty(struct page *page, int this_cpu)
Linus Torvalds's avatar
Linus Torvalds committed
226
227
{
	unsigned long mask = this_cpu;
228
229
230
231
232
	unsigned long non_cpu_bits;

	non_cpu_bits = ~(PG_dcache_cpu_mask << PG_dcache_cpu_shift);
	mask = (mask << PG_dcache_cpu_shift) | (1UL << PG_dcache_dirty);

Linus Torvalds's avatar
Linus Torvalds committed
233
234
235
236
237
238
239
	__asm__ __volatile__("1:\n\t"
			     "ldx	[%2], %%g7\n\t"
			     "and	%%g7, %1, %%g1\n\t"
			     "or	%%g1, %0, %%g1\n\t"
			     "casx	[%2], %%g7, %%g1\n\t"
			     "cmp	%%g7, %%g1\n\t"
			     "bne,pn	%%xcc, 1b\n\t"
240
			     " nop"
Linus Torvalds's avatar
Linus Torvalds committed
241
242
243
244
245
			     : /* no outputs */
			     : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags)
			     : "g1", "g7");
}

246
static inline void clear_dcache_dirty_cpu(struct page *page, unsigned long cpu)
Linus Torvalds's avatar
Linus Torvalds committed
247
248
249
250
251
252
{
	unsigned long mask = (1UL << PG_dcache_dirty);

	__asm__ __volatile__("! test_and_clear_dcache_dirty\n"
			     "1:\n\t"
			     "ldx	[%2], %%g7\n\t"
253
			     "srlx	%%g7, %4, %%g1\n\t"
Linus Torvalds's avatar
Linus Torvalds committed
254
255
256
257
258
259
260
			     "and	%%g1, %3, %%g1\n\t"
			     "cmp	%%g1, %0\n\t"
			     "bne,pn	%%icc, 2f\n\t"
			     " andn	%%g7, %1, %%g1\n\t"
			     "casx	[%2], %%g7, %%g1\n\t"
			     "cmp	%%g7, %%g1\n\t"
			     "bne,pn	%%xcc, 1b\n\t"
261
			     " nop\n"
Linus Torvalds's avatar
Linus Torvalds committed
262
263
264
			     "2:"
			     : /* no outputs */
			     : "r" (cpu), "r" (mask), "r" (&page->flags),
265
266
			       "i" (PG_dcache_cpu_mask),
			       "i" (PG_dcache_cpu_shift)
Linus Torvalds's avatar
Linus Torvalds committed
267
268
269
			     : "g1", "g7");
}

270
271
272
273
static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte)
{
	unsigned long tsb_addr = (unsigned long) ent;

274
	if (tlb_type == cheetah_plus || tlb_type == hypervisor)
275
276
277
278
279
		tsb_addr = __pa(tsb_addr);

	__tsb_insert(tsb_addr, tag, pte);
}

280
281
unsigned long _PAGE_ALL_SZ_BITS __read_mostly;

282
static void flush_dcache(unsigned long pfn)
Linus Torvalds's avatar
Linus Torvalds committed
283
{
284
	struct page *page;
285

286
	page = pfn_to_page(pfn);
287
	if (page) {
288
289
		unsigned long pg_flags;

290
291
		pg_flags = page->flags;
		if (pg_flags & (1UL << PG_dcache_dirty)) {
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
			int cpu = ((pg_flags >> PG_dcache_cpu_shift) &
				   PG_dcache_cpu_mask);
			int this_cpu = get_cpu();

			/* This is just to optimize away some function calls
			 * in the SMP case.
			 */
			if (cpu == this_cpu)
				flush_dcache_page_impl(page);
			else
				smp_flush_dcache_page_impl(page, cpu);

			clear_dcache_dirty_cpu(page, cpu);

			put_cpu();
		}
Linus Torvalds's avatar
Linus Torvalds committed
308
	}
309
310
}

311
312
313
314
315
316
317
318
/* mm->context.lock must be held */
static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index,
				    unsigned long tsb_hash_shift, unsigned long address,
				    unsigned long tte)
{
	struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb;
	unsigned long tag;

319
320
321
	if (unlikely(!tsb))
		return;

322
323
324
325
326
327
	tsb += ((address >> tsb_hash_shift) &
		(mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
	tag = (address >> 22UL);
	tsb_insert(tsb, tag, tte);
}

Nitin Gupta's avatar
Nitin Gupta committed
328
#ifdef CONFIG_HUGETLB_PAGE
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
static void __init add_huge_page_size(unsigned long size)
{
	unsigned int order;

	if (size_to_hstate(size))
		return;

	order = ilog2(size) - PAGE_SHIFT;
	hugetlb_add_hstate(order);
}

static int __init hugetlbpage_init(void)
{
	add_huge_page_size(1UL << HPAGE_64K_SHIFT);
	add_huge_page_size(1UL << HPAGE_SHIFT);
	add_huge_page_size(1UL << HPAGE_256MB_SHIFT);
	add_huge_page_size(1UL << HPAGE_2GB_SHIFT);

	return 0;
}

arch_initcall(hugetlbpage_init);

352
353
354
355
356
357
358
359
360
361
362
363
static void __init pud_huge_patch(void)
{
	struct pud_huge_patch_entry *p;
	unsigned long addr;

	p = &__pud_huge_patch;
	addr = p->addr;
	*(unsigned int *)addr = p->insn;

	__asm__ __volatile__("flush %0" : : "r" (addr));
}

Nitin Gupta's avatar
Nitin Gupta committed
364
365
366
367
368
369
370
371
372
373
374
375
static int __init setup_hugepagesz(char *string)
{
	unsigned long long hugepage_size;
	unsigned int hugepage_shift;
	unsigned short hv_pgsz_idx;
	unsigned int hv_pgsz_mask;
	int rc = 0;

	hugepage_size = memparse(string, &string);
	hugepage_shift = ilog2(hugepage_size);

	switch (hugepage_shift) {
376
377
378
379
380
	case HPAGE_16GB_SHIFT:
		hv_pgsz_mask = HV_PGSZ_MASK_16GB;
		hv_pgsz_idx = HV_PGSZ_IDX_16GB;
		pud_huge_patch();
		break;
381
382
383
384
	case HPAGE_2GB_SHIFT:
		hv_pgsz_mask = HV_PGSZ_MASK_2GB;
		hv_pgsz_idx = HV_PGSZ_IDX_2GB;
		break;
Nitin Gupta's avatar
Nitin Gupta committed
385
386
387
388
389
390
391
392
	case HPAGE_256MB_SHIFT:
		hv_pgsz_mask = HV_PGSZ_MASK_256MB;
		hv_pgsz_idx = HV_PGSZ_IDX_256MB;
		break;
	case HPAGE_SHIFT:
		hv_pgsz_mask = HV_PGSZ_MASK_4MB;
		hv_pgsz_idx = HV_PGSZ_IDX_4MB;
		break;
393
394
395
396
	case HPAGE_64K_SHIFT:
		hv_pgsz_mask = HV_PGSZ_MASK_64K;
		hv_pgsz_idx = HV_PGSZ_IDX_64K;
		break;
Nitin Gupta's avatar
Nitin Gupta committed
397
398
399
400
401
	default:
		hv_pgsz_mask = 0;
	}

	if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) {
402
403
		hugetlb_bad_size();
		pr_err("hugepagesz=%llu not supported by MMU.\n",
Nitin Gupta's avatar
Nitin Gupta committed
404
405
406
407
			hugepage_size);
		goto out;
	}

408
	add_huge_page_size(hugepage_size);
Nitin Gupta's avatar
Nitin Gupta committed
409
410
411
412
413
414
415
416
	rc = 1;

out:
	return rc;
}
__setup("hugepagesz=", setup_hugepagesz);
#endif	/* CONFIG_HUGETLB_PAGE */

417
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
418
419
{
	struct mm_struct *mm;
420
	unsigned long flags;
421
	bool is_huge_tsb;
422
	pte_t pte = *ptep;
423
424
425
426
427
428
429

	if (tlb_type != hypervisor) {
		unsigned long pfn = pte_pfn(pte);

		if (pfn_valid(pfn))
			flush_dcache(pfn);
	}
430
431

	mm = vma->vm_mm;
432

433
434
435
436
	/* Don't insert a non-valid PTE into the TSB, we'll deadlock.  */
	if (!pte_accessible(mm, pte))
		return;

437
438
	spin_lock_irqsave(&mm->context.lock, flags);

439
	is_huge_tsb = false;
440
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
	if (mm->context.hugetlb_pte_count || mm->context.thp_pte_count) {
		unsigned long hugepage_size = PAGE_SIZE;

		if (is_vm_hugetlb_page(vma))
			hugepage_size = huge_page_size(hstate_vma(vma));

		if (hugepage_size >= PUD_SIZE) {
			unsigned long mask = 0x1ffc00000UL;

			/* Transfer bits [32:22] from address to resolve
			 * at 4M granularity.
			 */
			pte_val(pte) &= ~mask;
			pte_val(pte) |= (address & mask);
		} else if (hugepage_size >= PMD_SIZE) {
			/* We are fabricating 8MB pages using 4MB
			 * real hw pages.
			 */
			pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
		}

		if (hugepage_size >= PMD_SIZE) {
			__update_mmu_tsb_insert(mm, MM_TSB_HUGE,
				REAL_HPAGE_SHIFT, address, pte_val(pte));
			is_huge_tsb = true;
		}
	}
468
#endif
469
	if (!is_huge_tsb)
470
471
		__update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
					address, pte_val(pte));
472
473

	spin_unlock_irqrestore(&mm->context.lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
474
475
476
477
}

void flush_dcache_page(struct page *page)
{
478
479
	struct address_space *mapping;
	int this_cpu;
Linus Torvalds's avatar
Linus Torvalds committed
480

481
482
483
	if (tlb_type == hypervisor)
		return;

484
485
486
487
488
489
490
491
492
	/* Do not bother with the expensive D-cache flush if it
	 * is merely the zero page.  The 'bigcore' testcase in GDB
	 * causes this case to run millions of times.
	 */
	if (page == ZERO_PAGE(0))
		return;

	this_cpu = get_cpu();

493
	mapping = page_mapping_file(page);
Linus Torvalds's avatar
Linus Torvalds committed
494
	if (mapping && !mapping_mapped(mapping)) {
495
		int dirty = test_bit(PG_dcache_dirty, &page->flags);
Linus Torvalds's avatar
Linus Torvalds committed
496
		if (dirty) {
497
498
			int dirty_cpu = dcache_dirty_cpu(page);

Linus Torvalds's avatar
Linus Torvalds committed
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
			if (dirty_cpu == this_cpu)
				goto out;
			smp_flush_dcache_page_impl(page, dirty_cpu);
		}
		set_dcache_dirty(page, this_cpu);
	} else {
		/* We could delay the flush for the !page_mapping
		 * case too.  But that case is for exec env/arg
		 * pages and those are %99 certainly going to get
		 * faulted into the tlb (and thus flushed) anyways.
		 */
		flush_dcache_page_impl(page);
	}

out:
	put_cpu();
}
516
EXPORT_SYMBOL(flush_dcache_page);
Linus Torvalds's avatar
Linus Torvalds committed
517

518
void __kprobes flush_icache_range(unsigned long start, unsigned long end)
Linus Torvalds's avatar
Linus Torvalds committed
519
{
520
	/* Cheetah and Hypervisor platform cpus have coherent I-cache. */
Linus Torvalds's avatar
Linus Torvalds committed
521
522
523
	if (tlb_type == spitfire) {
		unsigned long kaddr;

524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
		/* This code only runs on Spitfire cpus so this is
		 * why we can assume _PAGE_PADDR_4U.
		 */
		for (kaddr = start; kaddr < end; kaddr += PAGE_SIZE) {
			unsigned long paddr, mask = _PAGE_PADDR_4U;

			if (kaddr >= PAGE_OFFSET)
				paddr = kaddr & mask;
			else {
				pgd_t *pgdp = pgd_offset_k(kaddr);
				pud_t *pudp = pud_offset(pgdp, kaddr);
				pmd_t *pmdp = pmd_offset(pudp, kaddr);
				pte_t *ptep = pte_offset_kernel(pmdp, kaddr);

				paddr = pte_val(*ptep) & mask;
			}
			__flush_icache_page(paddr);
		}
Linus Torvalds's avatar
Linus Torvalds committed
542
543
	}
}
544
EXPORT_SYMBOL(flush_icache_range);
Linus Torvalds's avatar
Linus Torvalds committed
545
546
547

void mmu_info(struct seq_file *m)
{
548
549
550
551
552
553
	static const char *pgsz_strings[] = {
		"8K", "64K", "512K", "4MB", "32MB",
		"256MB", "2GB", "16GB",
	};
	int i, printed;

Linus Torvalds's avatar
Linus Torvalds committed
554
555
556
557
558
559
	if (tlb_type == cheetah)
		seq_printf(m, "MMU Type\t: Cheetah\n");
	else if (tlb_type == cheetah_plus)
		seq_printf(m, "MMU Type\t: Cheetah+\n");
	else if (tlb_type == spitfire)
		seq_printf(m, "MMU Type\t: Spitfire\n");
560
561
	else if (tlb_type == hypervisor)
		seq_printf(m, "MMU Type\t: Hypervisor (sun4v)\n");
Linus Torvalds's avatar
Linus Torvalds committed
562
563
564
	else
		seq_printf(m, "MMU Type\t: ???\n");

565
566
567
568
569
570
571
572
573
574
575
	seq_printf(m, "MMU PGSZs\t: ");
	printed = 0;
	for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) {
		if (cpu_pgsz_mask & (1UL << i)) {
			seq_printf(m, "%s%s",
				   printed ? "," : "", pgsz_strings[i]);
			printed++;
		}
	}
	seq_putc(m, '\n');

Linus Torvalds's avatar
Linus Torvalds committed
576
577
578
579
580
581
582
583
584
585
#ifdef CONFIG_DEBUG_DCFLUSH
	seq_printf(m, "DCPageFlushes\t: %d\n",
		   atomic_read(&dcpage_flushes));
#ifdef CONFIG_SMP
	seq_printf(m, "DCPageFlushesXC\t: %d\n",
		   atomic_read(&dcpage_flushes_xcall));
#endif /* CONFIG_SMP */
#endif /* CONFIG_DEBUG_DCFLUSH */
}

586
587
588
struct linux_prom_translation prom_trans[512] __read_mostly;
unsigned int prom_trans_ents __read_mostly;

Linus Torvalds's avatar
Linus Torvalds committed
589
590
unsigned long kern_locked_tte_data;

591
592
/* The obp translations are saved based on 8k pagesize, since obp can
 * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS ->
593
 * HI_OBP_ADDRESS range are handled in ktlb.S.
594
 */
595
596
597
598
599
600
static inline int in_obp_range(unsigned long vaddr)
{
	return (vaddr >= LOW_OBP_ADDRESS &&
		vaddr < HI_OBP_ADDRESS);
}

601
static int cmp_ptrans(const void *a, const void *b)
602
{
603
	const struct linux_prom_translation *x = a, *y = b;
604

605
606
607
608
609
	if (x->virt > y->virt)
		return 1;
	if (x->virt < y->virt)
		return -1;
	return 0;
610
611
}

612
/* Read OBP translations property into 'prom_trans[]'.  */
613
static void __init read_obp_translations(void)
614
{
615
	int n, node, ents, first, last, i;
Linus Torvalds's avatar
Linus Torvalds committed
616
617
618

	node = prom_finddevice("/virtual-memory");
	n = prom_getproplen(node, "translations");
619
	if (unlikely(n == 0 || n == -1)) {
620
		prom_printf("prom_mappings: Couldn't get size.\n");
Linus Torvalds's avatar
Linus Torvalds committed
621
622
		prom_halt();
	}
623
	if (unlikely(n > sizeof(prom_trans))) {
624
		prom_printf("prom_mappings: Size %d is too big.\n", n);
Linus Torvalds's avatar
Linus Torvalds committed
625
626
		prom_halt();
	}
627

628
	if ((n = prom_getproperty(node, "translations",
629
630
				  (char *)&prom_trans[0],
				  sizeof(prom_trans))) == -1) {
631
		prom_printf("prom_mappings: Couldn't get property.\n");
Linus Torvalds's avatar
Linus Torvalds committed
632
633
		prom_halt();
	}
634

635
	n = n / sizeof(struct linux_prom_translation);
636

637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
	ents = n;

	sort(prom_trans, ents, sizeof(struct linux_prom_translation),
	     cmp_ptrans, NULL);

	/* Now kick out all the non-OBP entries.  */
	for (i = 0; i < ents; i++) {
		if (in_obp_range(prom_trans[i].virt))
			break;
	}
	first = i;
	for (; i < ents; i++) {
		if (!in_obp_range(prom_trans[i].virt))
			break;
	}
	last = i;

	for (i = 0; i < (last - first); i++) {
		struct linux_prom_translation *src = &prom_trans[i + first];
		struct linux_prom_translation *dest = &prom_trans[i];

		*dest = *src;
	}
	for (; i < ents; i++) {
		struct linux_prom_translation *dest = &prom_trans[i];
		dest->virt = dest->size = dest->data = 0x0UL;
	}

	prom_trans_ents = last - first;

	if (tlb_type == spitfire) {
		/* Clear diag TTE bits. */
		for (i = 0; i < prom_trans_ents; i++)
			prom_trans[i].data &= ~0x0003fe0000000000UL;
	}
672
673
674
675
676

	/* Force execute bit on.  */
	for (i = 0; i < prom_trans_ents; i++)
		prom_trans[i].data |= (tlb_type == hypervisor ?
				       _PAGE_EXEC_4V : _PAGE_EXEC_4U);
677
}
Linus Torvalds's avatar
Linus Torvalds committed
678

679
680
681
682
static void __init hypervisor_tlb_lock(unsigned long vaddr,
				       unsigned long pte,
				       unsigned long mmu)
{
683
684
685
	unsigned long ret = sun4v_mmu_map_perm_addr(vaddr, 0, pte, mmu);

	if (ret != 0) {
686
		prom_printf("hypervisor_tlb_lock[%lx:%x:%lx:%lx]: "
687
			    "errors with %lx\n", vaddr, 0, pte, mmu, ret);
688
689
		prom_halt();
	}
690
691
}

692
693
static unsigned long kern_large_tte(unsigned long paddr);

694
static void __init remap_kernel(void)
695
696
{
	unsigned long phys_page, tte_vaddr, tte_data;
697
	int i, tlb_ent = sparc64_highest_locked_tlbent();
698

Linus Torvalds's avatar
Linus Torvalds committed
699
	tte_vaddr = (unsigned long) KERNBASE;
700
	phys_page = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB;
701
	tte_data = kern_large_tte(phys_page);
Linus Torvalds's avatar
Linus Torvalds committed
702
703
704

	kern_locked_tte_data = tte_data;

705
706
	/* Now lock us into the TLBs via Hypervisor or OBP. */
	if (tlb_type == hypervisor) {
707
		for (i = 0; i < num_kernel_image_mappings; i++) {
708
709
			hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU);
			hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU);
710
711
			tte_vaddr += 0x400000;
			tte_data += 0x400000;
712
713
		}
	} else {
714
715
716
717
718
		for (i = 0; i < num_kernel_image_mappings; i++) {
			prom_dtlb_load(tlb_ent - i, tte_data, tte_vaddr);
			prom_itlb_load(tlb_ent - i, tte_data, tte_vaddr);
			tte_vaddr += 0x400000;
			tte_data += 0x400000;
719
		}
720
		sparc64_highest_unlocked_tlb_ent = tlb_ent - i;
Linus Torvalds's avatar
Linus Torvalds committed
721
	}
722
723
724
725
726
727
	if (tlb_type == cheetah_plus) {
		sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 |
					    CTX_CHEETAH_PLUS_NUC);
		sparc64_kern_pri_nuc_bits = CTX_CHEETAH_PLUS_NUC;
		sparc64_kern_sec_context = CTX_CHEETAH_PLUS_CTX0;
	}
728
}
Linus Torvalds's avatar
Linus Torvalds committed
729

730

731
static void __init inherit_prom_mappings(void)
732
{
733
	/* Now fixup OBP's idea about where we really are mapped. */
734
	printk("Remapping the kernel... ");
735
	remap_kernel();
736
	printk("done.\n");
Linus Torvalds's avatar
Linus Torvalds committed
737
738
739
740
741
}

void prom_world(int enter)
{
	if (!enter)
742
		set_fs(get_fs());
Linus Torvalds's avatar
Linus Torvalds committed
743

744
	__asm__ __volatile__("flushw");
Linus Torvalds's avatar
Linus Torvalds committed
745
746
747
748
749
750
751
752
753
754
755
756
757
758
}

void __flush_dcache_range(unsigned long start, unsigned long end)
{
	unsigned long va;

	if (tlb_type == spitfire) {
		int n = 0;

		for (va = start; va < end; va += 32) {
			spitfire_put_dcache_tag(va & 0x3fe0, 0x0);
			if (++n >= 512)
				break;
		}
759
	} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
Linus Torvalds's avatar
Linus Torvalds committed
760
761
762
763
764
765
766
767
768
769
		start = __pa(start);
		end = __pa(end);
		for (va = start; va < end; va += 32)
			__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
					     "membar #Sync"
					     : /* no outputs */
					     : "r" (va),
					       "i" (ASI_DCACHE_INVALIDATE));
	}
}
770
EXPORT_SYMBOL(__flush_dcache_range);
Linus Torvalds's avatar
Linus Torvalds committed
771

772
773
/* get_new_mmu_context() uses "cache + 1".  */
DEFINE_SPINLOCK(ctx_alloc_lock);
774
unsigned long tlb_context_cache = CTX_FIRST_VERSION;
775
776
777
#define MAX_CTX_NR	(1UL << CTX_NR_BITS)
#define CTX_BMAP_SLOTS	BITS_TO_LONGS(MAX_CTX_NR)
DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
778
DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0};
779

Pavel Tatashin's avatar
Pavel Tatashin committed
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
static void mmu_context_wrap(void)
{
	unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK;
	unsigned long new_ver, new_ctx, old_ctx;
	struct mm_struct *mm;
	int cpu;

	bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS);

	/* Reserve kernel context */
	set_bit(0, mmu_context_bmap);

	new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION;
	if (unlikely(new_ver == 0))
		new_ver = CTX_FIRST_VERSION;
	tlb_context_cache = new_ver;

	/*
	 * Make sure that any new mm that are added into per_cpu_secondary_mm,
	 * are going to go through get_new_mmu_context() path.
	 */
	mb();

	/*
	 * Updated versions to current on those CPUs that had valid secondary
	 * contexts
	 */
	for_each_online_cpu(cpu) {
		/*
		 * If a new mm is stored after we took this mm from the array,
		 * it will go into get_new_mmu_context() path, because we
		 * already bumped the version in tlb_context_cache.
		 */
		mm = per_cpu(per_cpu_secondary_mm, cpu);

		if (unlikely(!mm || mm == &init_mm))
			continue;

		old_ctx = mm->context.sparc64_ctx_val;
		if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) {
			new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver;
			set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap);
			mm->context.sparc64_ctx_val = new_ctx;
		}
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
827
828
829
830
831
832
833
/* Caller does TLB context flushing on local CPU if necessary.
 * The caller also ensures that CTX_VALID(mm->context) is false.
 *
 * We must be careful about boundary cases so that we never
 * let the user have CTX 0 (nucleus) or we ever use a CTX
 * version of zero (and thus NO_CONTEXT would not be caught
 * by version mis-match tests in mmu_context.h).
834
835
 *
 * Always invoked with interrupts disabled.
Linus Torvalds's avatar
Linus Torvalds committed
836
837
838
839
840
841
 */
void get_new_mmu_context(struct mm_struct *mm)
{
	unsigned long ctx, new_ctx;
	unsigned long orig_pgsz_bits;

842
	spin_lock(&ctx_alloc_lock);
Pavel Tatashin's avatar
Pavel Tatashin committed
843
844
845
846
retry:
	/* wrap might have happened, test again if our context became valid */
	if (unlikely(CTX_VALID(mm->context)))
		goto out;
Linus Torvalds's avatar
Linus Torvalds committed
847
848
849
850
851
852
	orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
	ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
	new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
	if (new_ctx >= (1 << CTX_NR_BITS)) {
		new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
		if (new_ctx >= ctx) {
Pavel Tatashin's avatar
Pavel Tatashin committed
853
854
			mmu_context_wrap();
			goto retry;
Linus Torvalds's avatar
Linus Torvalds committed
855
856
		}
	}
857
858
	if (mm->context.sparc64_ctx_val)
		cpumask_clear(mm_cpumask(mm));
Linus Torvalds's avatar
Linus Torvalds committed
859
860
861
862
	mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63));
	new_ctx |= (tlb_context_cache & CTX_VERSION_MASK);
	tlb_context_cache = new_ctx;
	mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
Pavel Tatashin's avatar
Pavel Tatashin committed
863
out:
864
	spin_unlock(&ctx_alloc_lock);
Linus Torvalds's avatar
Linus Torvalds committed
865
866
}

David S. Miller's avatar
David S. Miller committed
867
868
869
870
static int numa_enabled = 1;
static int numa_debug;

static int __init early_numa(char *p)
Linus Torvalds's avatar
Linus Torvalds committed
871
{
David S. Miller's avatar
David S. Miller committed
872
873
874
875
876
	if (!p)
		return 0;

	if (strstr(p, "off"))
		numa_enabled = 0;
877

David S. Miller's avatar
David S. Miller committed
878
879
	if (strstr(p, "debug"))
		numa_debug = 1;
880

David S. Miller's avatar
David S. Miller committed
881
	return 0;
882
}
David S. Miller's avatar
David S. Miller committed
883
884
885
886
887
888
early_param("numa", early_numa);

#define numadbg(f, a...) \
do {	if (numa_debug) \
		printk(KERN_INFO f, ## a); \
} while (0)
889

890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
static void __init find_ramdisk(unsigned long phys_base)
{
#ifdef CONFIG_BLK_DEV_INITRD
	if (sparc_ramdisk_image || sparc_ramdisk_image64) {
		unsigned long ramdisk_image;

		/* Older versions of the bootloader only supported a
		 * 32-bit physical address for the ramdisk image
		 * location, stored at sparc_ramdisk_image.  Newer
		 * SILO versions set sparc_ramdisk_image to zero and
		 * provide a full 64-bit physical address at
		 * sparc_ramdisk_image64.
		 */
		ramdisk_image = sparc_ramdisk_image;
		if (!ramdisk_image)
			ramdisk_image = sparc_ramdisk_image64;

		/* Another bootloader quirk.  The bootloader normalizes
		 * the physical address to KERNBASE, so we have to
		 * factor that back out and add in the lowest valid
		 * physical page address to get the true physical address.
		 */
		ramdisk_image -= KERNBASE;
		ramdisk_image += phys_base;

David S. Miller's avatar
David S. Miller committed
915
916
917
		numadbg("Found ramdisk at physical address 0x%lx, size %u\n",
			ramdisk_image, sparc_ramdisk_size);

918
919
		initrd_start = ramdisk_image;
		initrd_end = ramdisk_image + sparc_ramdisk_size;
920

Yinghai Lu's avatar
Yinghai Lu committed
921
		memblock_reserve(initrd_start, sparc_ramdisk_size);
922
923
924

		initrd_start += PAGE_OFFSET;
		initrd_end += PAGE_OFFSET;
925
926
927
928
	}
#endif
}

David S. Miller's avatar
David S. Miller committed
929
930
struct node_mem_mask {
	unsigned long mask;
931
	unsigned long match;
David S. Miller's avatar
David S. Miller committed
932
933
934
935
};
static struct node_mem_mask node_masks[MAX_NUMNODES];
static int num_node_masks;

936
937
#ifdef CONFIG_NEED_MULTIPLE_NODES

938
939
940
941
942
943
944
945
946
947
struct mdesc_mlgroup {
	u64	node;
	u64	latency;
	u64	match;
	u64	mask;
};

static struct mdesc_mlgroup *mlgroups;
static int num_mlgroups;

David S. Miller's avatar
David S. Miller committed
948
949
950
951
952
953
954
955
956
957
958
int numa_cpu_lookup_table[NR_CPUS];
cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];

struct mdesc_mblock {
	u64	base;
	u64	size;
	u64	offset; /* RA-to-PA */
};
static struct mdesc_mblock *mblocks;
static int num_mblocks;

959
static struct mdesc_mblock * __init addr_to_mblock(unsigned long addr)
David S. Miller's avatar
David S. Miller committed
960
{
961
	struct mdesc_mblock *m = NULL;
David S. Miller's avatar
David S. Miller committed
962
963
964
	int i;

	for (i = 0; i < num_mblocks; i++) {
965
		m = &mblocks[i];
David S. Miller's avatar
David S. Miller committed
966
967
968
969
970
971

		if (addr >= m->base &&
		    addr < (m->base + m->size)) {
			break;
		}
	}
972
973

	return m;
David S. Miller's avatar
David S. Miller committed
974
975
}

976
static u64 __init memblock_nid_range_sun4u(u64 start, u64 end, int *nid)
David S. Miller's avatar
David S. Miller committed
977
{
978
	int prev_nid, new_nid;
David S. Miller's avatar
David S. Miller committed
979

980
981
982
983
	prev_nid = -1;
	for ( ; start < end; start += PAGE_SIZE) {
		for (new_nid = 0; new_nid < num_node_masks; new_nid++) {
			struct node_mem_mask *p = &node_masks[new_nid];
David S. Miller's avatar
David S. Miller committed
984

985
986
987
988
989
			if ((start & p->mask) == p->match) {
				if (prev_nid == -1)
					prev_nid = new_nid;
				break;
			}
990
		}
991
992
993
994
995
996
997
998
999
1000

		if (new_nid == num_node_masks) {
			prev_nid = 0;
			WARN_ONCE(1, "addr[%Lx] doesn't match a NUMA node rule. Some memory will be owned by node 0.",
				  start);
			break;
		}

		if (prev_nid != new_nid)
			break;
1001
	}
1002
	*nid = prev_nid;
1003

1004
	return start > end ? end : start;
David S. Miller's avatar
David S. Miller committed
1005
1006
}

1007
static u64 __init memblock_nid_range(u64 start, u64 end, int *nid)
David S. Miller's avatar
David S. Miller committed
1008
{
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
	u64 ret_end, pa_start, m_mask, m_match, m_end;
	struct mdesc_mblock *mblock;
	int _nid, i;

	if (tlb_type != hypervisor)
		return memblock_nid_range_sun4u(start, end, nid);

	mblock = addr_to_mblock(start);
	if (!mblock) {
		WARN_ONCE(1, "memblock_nid_range: Can't find mblock addr[%Lx]",
			  start);

		_nid = 0;
		ret_end = end;
		goto done;
	}

	pa_start = start + mblock->offset;
	m_match = 0;
	m_mask = 0;
David S. Miller's avatar
David S. Miller committed
1029

1030
1031
1032
1033
1034
1035
	for (_nid = 0; _nid < num_node_masks; _nid++) {
		struct node_mem_mask *const m = &node_masks[_nid];

		if ((pa_start & m->mask) == m->match) {
			m_match = m->match;
			m_mask = m->mask;
David S. Miller's avatar
David S. Miller committed
1036
			break;
1037
		}
David S. Miller's avatar
David S. Miller committed
1038
1039
	}

1040
1041
1042
1043
1044
1045
1046
1047
1048
	if (num_node_masks == _nid) {
		/* We could not find NUMA group, so default to 0, but lets
		 * search for latency group, so we could calculate the correct
		 * end address that we return
		 */
		_nid = 0;

		for (i = 0; i < num_mlgroups; i++) {
			struct mdesc_mlgroup *const m = &mlgroups[i];
1049

1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
			if ((pa_start & m->mask) == m->match) {
				m_match = m->match;
				m_mask = m->mask;
				break;
			}
		}

		if (i == num_mlgroups) {
			WARN_ONCE(1, "memblock_nid_range: Can't find latency group addr[%Lx]",
				  start);

			ret_end = end;
			goto done;
		}
	}

	/*
	 * Each latency group has match and mask, and each memory block has an
	 * offset.  An address belongs to a latency group if its address matches
	 * the following formula: ((addr + offset) & mask) == match
	 * It is, however, slow to check every single page if it matches a
	 * particular latency group. As optimization we calculate end value by
	 * using bit arithmetics.
	 */
	m_end = m_match + (1ul << __ffs(m_mask)) - mblock->offset;
	m_end += pa_start & ~((1ul << fls64(m_mask)) - 1);
	ret_end = m_end > end ? end : m_end;

done:
	*nid = _nid;
	return ret_end;
David S. Miller's avatar
David S. Miller committed
1081
1082
1083
1084
}
#endif

/* This must be invoked after performing all of the necessary
Tejun Heo's avatar
Tejun Heo committed
1085
 * memblock_set_node() calls for 'nid'.  We need to be able to get
David S. Miller's avatar
David S. Miller committed
1086
 * correct data from get_pfn_range_for_nid().
1087
 */
David S. Miller's avatar
David S. Miller committed
1088
1089
1090
static void __init allocate_node_data(int nid)
{
	struct pglist_data *p;
1091
	unsigned long start_pfn, end_pfn;
David S. Miller's avatar
David S. Miller committed
1092
#ifdef CONFIG_NEED_MULTIPLE_NODES
1093
1094
	unsigned long paddr;

1095
	paddr = memblock_alloc_try_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid);
David S. Miller's avatar
David S. Miller committed
1096
1097
1098
1099
1100
1101
1102
	if (!paddr) {
		prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid);
		prom_halt();
	}
	NODE_DATA(nid) = __va(paddr);
	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));

1103
	NODE_DATA(nid)->node_id = nid;
David S. Miller's avatar
David S. Miller committed
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
#endif

	p = NODE_DATA(nid);

	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
	p->node_start_pfn = start_pfn;
	p->node_spanned_pages = end_pfn - start_pfn;
}

static void init_node_masks_nonnuma(void)
1114
{
1115
#ifdef CONFIG_NEED_MULTIPLE_NODES
Linus Torvalds's avatar
Linus Torvalds committed
1116
	int i;
1117
#endif
Linus Torvalds's avatar
Linus Torvalds committed
1118

David S. Miller's avatar
David S. Miller committed
1119
	numadbg("Initializing tables for non-numa.\n");
1120

1121
1122
	node_masks[0].mask = 0;
	node_masks[0].match = 0;
David S. Miller's avatar
David S. Miller committed
1123
	num_node_masks = 1;
1124

1125
#ifdef CONFIG_NEED_MULTIPLE_NODES
David S. Miller's avatar
David S. Miller committed
1126
1127
	for (i = 0; i < NR_CPUS; i++)
		numa_cpu_lookup_table[i] = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1128

1129
	cpumask_setall(&numa_cpumask_lookup_table[0]);
1130
#endif
David S. Miller's avatar
David S. Miller committed
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
}

#ifdef CONFIG_NEED_MULTIPLE_NODES
struct pglist_data *node_data[MAX_NUMNODES];

EXPORT_SYMBOL(numa_cpu_lookup_table);
EXPORT_SYMBOL(numa_cpumask_lookup_table);
EXPORT_SYMBOL(node_data);

static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio,
				   u32 cfg_handle)
{
	u64 arc;

	mdesc_for_each_arc(arc, md, pio, MDESC_ARC_TYPE_FWD) {
		u64 target = mdesc_arc_target(md, arc);
		const u64 *val;

		val = mdesc_get_property(md, target,
					 "cfg-handle", NULL);
		if (val && *val == cfg_handle)
			return 0;
	}
	return -ENODEV;
}

static int scan_arcs_for_cfg_handle(struct mdesc_handle *md, u64 grp,
				    u32 cfg_handle)
{
	u64 arc, candidate, best_latency = ~(u64)0;

	candidate = MDESC_NODE_NULL;
	mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) {
		u64 target = mdesc_arc_target(md, arc);
		const char *name = mdesc_node_name(md, target);
		const u64 *val;

		if (strcmp(name, "pio-latency-group"))
			continue;

		val = mdesc_get_property(md, target, "latency", NULL);
		if (!val)
			continue;

		if (*val < best_latency) {
			candidate = target;
			best_latency = *val;
		}
	}

	if (candidate == MDESC_NODE_NULL)
		return -ENODEV;

	return scan_pio_for_cfg_handle(md, candidate, cfg_handle);
}

int of_node_to_nid(struct device_node *dp)
{
	const struct linux_prom64_registers *regs;
	struct mdesc_handle *md;
	u32 cfg_handle;
	int count, nid;
	u64 grp;

1195
1196
1197
1198
	/* This is the right thing to do on currently supported
	 * SUN4U NUMA platforms as well, as the PCI controller does
	 * not sit behind any particular memory controller.
	 */
David S. Miller's avatar
David S. Miller committed
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
	if (!mlgroups)
		return -1;

	regs = of_get_property(dp, "reg", NULL);
	if (!regs)
		return -1;

	cfg_handle = (regs->phys_addr >> 32UL) & 0x0fffffff;

	md = mdesc_grab();

	count = 0;
	nid = -1;
	mdesc_for_each_node_by_name(md, grp, "group") {
		if (!scan_arcs_for_cfg_handle(md, grp, cfg_handle)) {
			nid = count;
			break;
		}
		count++;
	}

	mdesc_release(md);

	return nid;
}

1225
static void __init add_node_ranges(void)
David S. Miller's avatar
David S. Miller committed
1226
{
1227
	struct memblock_region *reg;
1228
1229
1230
1231
	unsigned long prev_max;

memblock_resized:
	prev_max = memblock.memory.max;
David S. Miller's avatar
David S. Miller committed
1232

1233
1234
	for_each_memblock(memory, reg) {
		unsigned long size = reg->size;
David S. Miller's avatar
David S. Miller committed
1235
1236
		unsigned long start, end;

1237
		start = reg->base;
David S. Miller's avatar
David S. Miller committed
1238
1239
1240
1241
1242
		end = start + size;
		while (start < end) {
			unsigned long this_end;
			int nid;

1243
			this_end = memblock_nid_range(start, end, &nid);
David S. Miller's avatar
David S. Miller committed
1244

Tejun Heo's avatar
Tejun Heo committed
1245
			numadbg("Setting memblock NUMA node nid[%d] "
David S. Miller's avatar
David S. Miller committed
1246
1247
1248
				"start[%lx] end[%lx]\n",
				nid, start, this_end);

1249
1250
			memblock_set_node(start, this_end - start,
					  &memblock.memory, nid);
1251
1252
			if (memblock.memory.max != prev_max)
				goto memblock_resized;
David S. Miller's avatar
David S. Miller committed
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
			start = this_end;
		}
	}
}

static int __init grab_mlgroups(struct mdesc_handle *md)
{
	unsigned long paddr;
	int count = 0;
	u64 node;

	mdesc_for_each_node_by_name(md, node, "memory-latency-group")
		count++;
	if (!count)
		return -ENOENT;

Yinghai Lu's avatar
Yinghai Lu committed
1269
	paddr = memblock_alloc(count * sizeof(struct mdesc_mlgroup),
David S. Miller's avatar
David S. Miller committed
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
			  SMP_CACHE_BYTES);
	if (!paddr)
		return -ENOMEM;

	mlgroups = __va(paddr);
	num_mlgroups = count;

	count = 0;
	mdesc_for_each_node_by_name(md, node, "memory-latency-group") {
		struct mdesc_mlgroup *m = &mlgroups[count++];
		const u64 *val;

		m->node = node;

		val = mdesc_get_property(md, node, "latency", NULL);
		m->latency = *val;
		val = mdesc_get_property(md, node, "address-match", NULL);
		m->match = *val;
		val = mdesc_get_property(md, node, "address-mask", NULL);
		m->mask = *val;

1291
1292
		numadbg("MLGROUP[%d]: node[%llx] latency[%llx] "
			"match[%llx] mask[%llx]\n",
David S. Miller's avatar
David S. Miller committed
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
			count - 1, m->node, m->latency, m->match, m->mask);
	}

	return