init_64.c 73.2 KB
Newer Older
Adrian Bunk's avatar
Adrian Bunk committed
1
/*
Linus Torvalds's avatar
Linus Torvalds committed
2
3
4
5
6
7
 *  arch/sparc64/mm/init.c
 *
 *  Copyright (C) 1996-1999 David S. Miller (davem@caip.rutgers.edu)
 *  Copyright (C) 1997-1999 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
 */
 
8
#include <linux/extable.h>
Linus Torvalds's avatar
Linus Torvalds committed
9
10
11
12
13
14
15
16
17
18
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/initrd.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
19
#include <linux/poison.h>
Linus Torvalds's avatar
Linus Torvalds committed
20
21
#include <linux/fs.h>
#include <linux/seq_file.h>
22
#include <linux/kprobes.h>
23
#include <linux/cache.h>
24
#include <linux/sort.h>
25
#include <linux/ioport.h>
26
#include <linux/percpu.h>
Yinghai Lu's avatar
Yinghai Lu committed
27
#include <linux/memblock.h>
David S. Miller's avatar
David S. Miller committed
28
#include <linux/mmzone.h>
29
#include <linux/gfp.h>
Linus Torvalds's avatar
Linus Torvalds committed
30
31
32
33
34
35
36
37

#include <asm/head.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/oplib.h>
#include <asm/iommu.h>
#include <asm/io.h>
38
#include <linux/uaccess.h>
Linus Torvalds's avatar
Linus Torvalds committed
39
40
41
42
43
44
45
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
#include <asm/dma.h>
#include <asm/starfire.h>
#include <asm/tlb.h>
#include <asm/spitfire.h>
#include <asm/sections.h>
46
#include <asm/tsb.h>
47
#include <asm/hypervisor.h>
48
#include <asm/prom.h>
49
#include <asm/mdesc.h>
50
#include <asm/cpudata.h>
51
#include <asm/setup.h>
52
#include <asm/irq.h>
Linus Torvalds's avatar
Linus Torvalds committed
53

Sam Ravnborg's avatar
Sam Ravnborg committed
54
#include "init_64.h"
55

56
unsigned long kern_linear_pte_xor[4] __read_mostly;
57
static unsigned long page_cache4v_flag;
58

59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/* A bitmap, two bits for every 256MB of physical memory.  These two
 * bits determine what page size we use for kernel linear
 * translations.  They form an index into kern_linear_pte_xor[].  The
 * value in the indexed slot is XOR'd with the TLB miss virtual
 * address to form the resulting TTE.  The mapping is:
 *
 *	0	==>	4MB
 *	1	==>	256MB
 *	2	==>	2GB
 *	3	==>	16GB
 *
 * All sun4v chips support 256MB pages.  Only SPARC-T4 and later
 * support 2GB pages, and hopefully future cpus will support the 16GB
 * pages as well.  For slots 2 and 3, we encode a 256MB TTE xor there
 * if these larger page sizes are not supported by the cpu.
 *
 * It would be nice to determine this from the machine description
 * 'cpu' properties, but we need to have this table setup before the
 * MDESC is initialized.
78
79
 */

80
#ifndef CONFIG_DEBUG_PAGEALLOC
81
82
83
/* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
 * Space is allocated for this right after the trap table in
 * arch/sparc64/kernel/head.S
84
85
 */
extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
86
#endif
87
extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
88

89
90
static unsigned long cpu_pgsz_mask;

91
#define MAX_BANKS	1024
92

93
94
static struct linux_prom64_registers pavail[MAX_BANKS];
static int pavail_ents;
95

96
97
u64 numa_latency[MAX_NUMNODES][MAX_NUMNODES];

98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
static int cmp_p64(const void *a, const void *b)
{
	const struct linux_prom64_registers *x = a, *y = b;

	if (x->phys_addr > y->phys_addr)
		return 1;
	if (x->phys_addr < y->phys_addr)
		return -1;
	return 0;
}

static void __init read_obp_memory(const char *property,
				   struct linux_prom64_registers *regs,
				   int *num_ents)
{
113
	phandle node = prom_finddevice("/memory");
114
115
116
117
118
119
120
121
122
123
124
125
126
	int prop_size = prom_getproplen(node, property);
	int ents, ret, i;

	ents = prop_size / sizeof(struct linux_prom64_registers);
	if (ents > MAX_BANKS) {
		prom_printf("The machine has more %s property entries than "
			    "this kernel can support (%d).\n",
			    property, MAX_BANKS);
		prom_halt();
	}

	ret = prom_getproperty(node, property, (char *) regs, prop_size);
	if (ret == -1) {
127
128
		prom_printf("Couldn't get %s property from /memory.\n",
				property);
129
130
131
132
133
134
135
136
137
138
139
		prom_halt();
	}

	/* Sanitize what we got from the firmware, by page aligning
	 * everything.
	 */
	for (i = 0; i < ents; i++) {
		unsigned long base, size;

		base = regs[i].phys_addr;
		size = regs[i].reg_size;
140

141
142
143
144
145
146
147
148
149
		size &= PAGE_MASK;
		if (base & ~PAGE_MASK) {
			unsigned long new_base = PAGE_ALIGN(base);

			size -= new_base - base;
			if ((long) size < 0L)
				size = 0UL;
			base = new_base;
		}
150
151
152
153
154
155
156
		if (size == 0UL) {
			/* If it is empty, simply get rid of it.
			 * This simplifies the logic of the other
			 * functions that process these arrays.
			 */
			memmove(&regs[i], &regs[i + 1],
				(ents - i - 1) * sizeof(regs[0]));
157
			i--;
158
159
			ents--;
			continue;
160
		}
161
162
		regs[i].phys_addr = base;
		regs[i].reg_size = size;
163
164
165
166
	}

	*num_ents = ents;

167
	sort(regs, ents, sizeof(struct linux_prom64_registers),
168
169
	     cmp_p64, NULL);
}
Linus Torvalds's avatar
Linus Torvalds committed
170

171
/* Kernel physical address base and size in bytes.  */
172
173
unsigned long kern_base __read_mostly;
unsigned long kern_size __read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed
174
175
176
177
178
179

/* Initial ramdisk setup */
extern unsigned long sparc_ramdisk_image64;
extern unsigned int sparc_ramdisk_image;
extern unsigned int sparc_ramdisk_size;

180
struct page *mem_map_zero __read_mostly;
181
EXPORT_SYMBOL(mem_map_zero);
Linus Torvalds's avatar
Linus Torvalds committed
182

183
184
185
186
187
188
unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly;

unsigned long sparc64_kern_pri_context __read_mostly;
unsigned long sparc64_kern_pri_nuc_bits __read_mostly;
unsigned long sparc64_kern_sec_context __read_mostly;

189
int num_kernel_image_mappings;
Linus Torvalds's avatar
Linus Torvalds committed
190
191
192
193
194
195
196
197

#ifdef CONFIG_DEBUG_DCFLUSH
atomic_t dcpage_flushes = ATOMIC_INIT(0);
#ifdef CONFIG_SMP
atomic_t dcpage_flushes_xcall = ATOMIC_INIT(0);
#endif
#endif

198
inline void flush_dcache_page_impl(struct page *page)
Linus Torvalds's avatar
Linus Torvalds committed
199
{
200
	BUG_ON(tlb_type == hypervisor);
Linus Torvalds's avatar
Linus Torvalds committed
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#ifdef CONFIG_DEBUG_DCFLUSH
	atomic_inc(&dcpage_flushes);
#endif

#ifdef DCACHE_ALIASING_POSSIBLE
	__flush_dcache_page(page_address(page),
			    ((tlb_type == spitfire) &&
			     page_mapping(page) != NULL));
#else
	if (page_mapping(page) != NULL &&
	    tlb_type == spitfire)
		__flush_icache_page(__pa(page_address(page)));
#endif
}

#define PG_dcache_dirty		PG_arch_1
217
218
219
#define PG_dcache_cpu_shift	32UL
#define PG_dcache_cpu_mask	\
	((1UL<<ilog2(roundup_pow_of_two(NR_CPUS)))-1UL)
Linus Torvalds's avatar
Linus Torvalds committed
220
221

#define dcache_dirty_cpu(page) \
222
	(((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask)
Linus Torvalds's avatar
Linus Torvalds committed
223

224
static inline void set_dcache_dirty(struct page *page, int this_cpu)
Linus Torvalds's avatar
Linus Torvalds committed
225
226
{
	unsigned long mask = this_cpu;
227
228
229
230
231
	unsigned long non_cpu_bits;

	non_cpu_bits = ~(PG_dcache_cpu_mask << PG_dcache_cpu_shift);
	mask = (mask << PG_dcache_cpu_shift) | (1UL << PG_dcache_dirty);

Linus Torvalds's avatar
Linus Torvalds committed
232
233
234
235
236
237
238
	__asm__ __volatile__("1:\n\t"
			     "ldx	[%2], %%g7\n\t"
			     "and	%%g7, %1, %%g1\n\t"
			     "or	%%g1, %0, %%g1\n\t"
			     "casx	[%2], %%g7, %%g1\n\t"
			     "cmp	%%g7, %%g1\n\t"
			     "bne,pn	%%xcc, 1b\n\t"
239
			     " nop"
Linus Torvalds's avatar
Linus Torvalds committed
240
241
242
243
244
			     : /* no outputs */
			     : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags)
			     : "g1", "g7");
}

245
static inline void clear_dcache_dirty_cpu(struct page *page, unsigned long cpu)
Linus Torvalds's avatar
Linus Torvalds committed
246
247
248
249
250
251
{
	unsigned long mask = (1UL << PG_dcache_dirty);

	__asm__ __volatile__("! test_and_clear_dcache_dirty\n"
			     "1:\n\t"
			     "ldx	[%2], %%g7\n\t"
252
			     "srlx	%%g7, %4, %%g1\n\t"
Linus Torvalds's avatar
Linus Torvalds committed
253
254
255
256
257
258
259
			     "and	%%g1, %3, %%g1\n\t"
			     "cmp	%%g1, %0\n\t"
			     "bne,pn	%%icc, 2f\n\t"
			     " andn	%%g7, %1, %%g1\n\t"
			     "casx	[%2], %%g7, %%g1\n\t"
			     "cmp	%%g7, %%g1\n\t"
			     "bne,pn	%%xcc, 1b\n\t"
260
			     " nop\n"
Linus Torvalds's avatar
Linus Torvalds committed
261
262
263
			     "2:"
			     : /* no outputs */
			     : "r" (cpu), "r" (mask), "r" (&page->flags),
264
265
			       "i" (PG_dcache_cpu_mask),
			       "i" (PG_dcache_cpu_shift)
Linus Torvalds's avatar
Linus Torvalds committed
266
267
268
			     : "g1", "g7");
}

269
270
271
272
static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte)
{
	unsigned long tsb_addr = (unsigned long) ent;

273
	if (tlb_type == cheetah_plus || tlb_type == hypervisor)
274
275
276
277
278
		tsb_addr = __pa(tsb_addr);

	__tsb_insert(tsb_addr, tag, pte);
}

279
280
unsigned long _PAGE_ALL_SZ_BITS __read_mostly;

281
static void flush_dcache(unsigned long pfn)
Linus Torvalds's avatar
Linus Torvalds committed
282
{
283
	struct page *page;
284

285
	page = pfn_to_page(pfn);
286
	if (page) {
287
288
		unsigned long pg_flags;

289
290
		pg_flags = page->flags;
		if (pg_flags & (1UL << PG_dcache_dirty)) {
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
			int cpu = ((pg_flags >> PG_dcache_cpu_shift) &
				   PG_dcache_cpu_mask);
			int this_cpu = get_cpu();

			/* This is just to optimize away some function calls
			 * in the SMP case.
			 */
			if (cpu == this_cpu)
				flush_dcache_page_impl(page);
			else
				smp_flush_dcache_page_impl(page, cpu);

			clear_dcache_dirty_cpu(page, cpu);

			put_cpu();
		}
Linus Torvalds's avatar
Linus Torvalds committed
307
	}
308
309
}

310
311
312
313
314
315
316
317
/* mm->context.lock must be held */
static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index,
				    unsigned long tsb_hash_shift, unsigned long address,
				    unsigned long tte)
{
	struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb;
	unsigned long tag;

318
319
320
	if (unlikely(!tsb))
		return;

321
322
323
324
325
326
	tsb += ((address >> tsb_hash_shift) &
		(mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
	tag = (address >> 22UL);
	tsb_insert(tsb, tag, tte);
}

Nitin Gupta's avatar
Nitin Gupta committed
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
#ifdef CONFIG_HUGETLB_PAGE
static int __init setup_hugepagesz(char *string)
{
	unsigned long long hugepage_size;
	unsigned int hugepage_shift;
	unsigned short hv_pgsz_idx;
	unsigned int hv_pgsz_mask;
	int rc = 0;

	hugepage_size = memparse(string, &string);
	hugepage_shift = ilog2(hugepage_size);

	switch (hugepage_shift) {
	case HPAGE_256MB_SHIFT:
		hv_pgsz_mask = HV_PGSZ_MASK_256MB;
		hv_pgsz_idx = HV_PGSZ_IDX_256MB;
		break;
	case HPAGE_SHIFT:
		hv_pgsz_mask = HV_PGSZ_MASK_4MB;
		hv_pgsz_idx = HV_PGSZ_IDX_4MB;
		break;
348
349
350
351
	case HPAGE_64K_SHIFT:
		hv_pgsz_mask = HV_PGSZ_MASK_64K;
		hv_pgsz_idx = HV_PGSZ_IDX_64K;
		break;
Nitin Gupta's avatar
Nitin Gupta committed
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
	default:
		hv_pgsz_mask = 0;
	}

	if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) {
		pr_warn("hugepagesz=%llu not supported by MMU.\n",
			hugepage_size);
		goto out;
	}

	hugetlb_add_hstate(hugepage_shift - PAGE_SHIFT);
	rc = 1;

out:
	return rc;
}
__setup("hugepagesz=", setup_hugepagesz);
#endif	/* CONFIG_HUGETLB_PAGE */

371
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
372
373
{
	struct mm_struct *mm;
374
	unsigned long flags;
375
	pte_t pte = *ptep;
376
377
378
379
380
381
382

	if (tlb_type != hypervisor) {
		unsigned long pfn = pte_pfn(pte);

		if (pfn_valid(pfn))
			flush_dcache(pfn);
	}
383
384

	mm = vma->vm_mm;
385

386
387
388
389
	/* Don't insert a non-valid PTE into the TSB, we'll deadlock.  */
	if (!pte_accessible(mm, pte))
		return;

390
391
	spin_lock_irqsave(&mm->context.lock, flags);

392
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
393
	if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) &&
Nitin Gupta's avatar
Nitin Gupta committed
394
	    is_hugetlb_pmd(__pmd(pte_val(pte)))) {
395
396
		/* We are fabricating 8MB pages using 4MB real hw pages.  */
		pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
397
		__update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
398
					address, pte_val(pte));
399
	} else
400
#endif
401
402
		__update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
					address, pte_val(pte));
403
404

	spin_unlock_irqrestore(&mm->context.lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
405
406
407
408
}

void flush_dcache_page(struct page *page)
{
409
410
	struct address_space *mapping;
	int this_cpu;
Linus Torvalds's avatar
Linus Torvalds committed
411

412
413
414
	if (tlb_type == hypervisor)
		return;

415
416
417
418
419
420
421
422
423
424
	/* Do not bother with the expensive D-cache flush if it
	 * is merely the zero page.  The 'bigcore' testcase in GDB
	 * causes this case to run millions of times.
	 */
	if (page == ZERO_PAGE(0))
		return;

	this_cpu = get_cpu();

	mapping = page_mapping(page);
Linus Torvalds's avatar
Linus Torvalds committed
425
	if (mapping && !mapping_mapped(mapping)) {
426
		int dirty = test_bit(PG_dcache_dirty, &page->flags);
Linus Torvalds's avatar
Linus Torvalds committed
427
		if (dirty) {
428
429
			int dirty_cpu = dcache_dirty_cpu(page);

Linus Torvalds's avatar
Linus Torvalds committed
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
			if (dirty_cpu == this_cpu)
				goto out;
			smp_flush_dcache_page_impl(page, dirty_cpu);
		}
		set_dcache_dirty(page, this_cpu);
	} else {
		/* We could delay the flush for the !page_mapping
		 * case too.  But that case is for exec env/arg
		 * pages and those are %99 certainly going to get
		 * faulted into the tlb (and thus flushed) anyways.
		 */
		flush_dcache_page_impl(page);
	}

out:
	put_cpu();
}
447
EXPORT_SYMBOL(flush_dcache_page);
Linus Torvalds's avatar
Linus Torvalds committed
448

449
void __kprobes flush_icache_range(unsigned long start, unsigned long end)
Linus Torvalds's avatar
Linus Torvalds committed
450
{
451
	/* Cheetah and Hypervisor platform cpus have coherent I-cache. */
Linus Torvalds's avatar
Linus Torvalds committed
452
453
454
	if (tlb_type == spitfire) {
		unsigned long kaddr;

455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
		/* This code only runs on Spitfire cpus so this is
		 * why we can assume _PAGE_PADDR_4U.
		 */
		for (kaddr = start; kaddr < end; kaddr += PAGE_SIZE) {
			unsigned long paddr, mask = _PAGE_PADDR_4U;

			if (kaddr >= PAGE_OFFSET)
				paddr = kaddr & mask;
			else {
				pgd_t *pgdp = pgd_offset_k(kaddr);
				pud_t *pudp = pud_offset(pgdp, kaddr);
				pmd_t *pmdp = pmd_offset(pudp, kaddr);
				pte_t *ptep = pte_offset_kernel(pmdp, kaddr);

				paddr = pte_val(*ptep) & mask;
			}
			__flush_icache_page(paddr);
		}
Linus Torvalds's avatar
Linus Torvalds committed
473
474
	}
}
475
EXPORT_SYMBOL(flush_icache_range);
Linus Torvalds's avatar
Linus Torvalds committed
476
477
478

void mmu_info(struct seq_file *m)
{
479
480
481
482
483
484
	static const char *pgsz_strings[] = {
		"8K", "64K", "512K", "4MB", "32MB",
		"256MB", "2GB", "16GB",
	};
	int i, printed;

Linus Torvalds's avatar
Linus Torvalds committed
485
486
487
488
489
490
	if (tlb_type == cheetah)
		seq_printf(m, "MMU Type\t: Cheetah\n");
	else if (tlb_type == cheetah_plus)
		seq_printf(m, "MMU Type\t: Cheetah+\n");
	else if (tlb_type == spitfire)
		seq_printf(m, "MMU Type\t: Spitfire\n");
491
492
	else if (tlb_type == hypervisor)
		seq_printf(m, "MMU Type\t: Hypervisor (sun4v)\n");
Linus Torvalds's avatar
Linus Torvalds committed
493
494
495
	else
		seq_printf(m, "MMU Type\t: ???\n");

496
497
498
499
500
501
502
503
504
505
506
	seq_printf(m, "MMU PGSZs\t: ");
	printed = 0;
	for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) {
		if (cpu_pgsz_mask & (1UL << i)) {
			seq_printf(m, "%s%s",
				   printed ? "," : "", pgsz_strings[i]);
			printed++;
		}
	}
	seq_putc(m, '\n');

Linus Torvalds's avatar
Linus Torvalds committed
507
508
509
510
511
512
513
514
515
516
#ifdef CONFIG_DEBUG_DCFLUSH
	seq_printf(m, "DCPageFlushes\t: %d\n",
		   atomic_read(&dcpage_flushes));
#ifdef CONFIG_SMP
	seq_printf(m, "DCPageFlushesXC\t: %d\n",
		   atomic_read(&dcpage_flushes_xcall));
#endif /* CONFIG_SMP */
#endif /* CONFIG_DEBUG_DCFLUSH */
}

517
518
519
struct linux_prom_translation prom_trans[512] __read_mostly;
unsigned int prom_trans_ents __read_mostly;

Linus Torvalds's avatar
Linus Torvalds committed
520
521
unsigned long kern_locked_tte_data;

522
523
/* The obp translations are saved based on 8k pagesize, since obp can
 * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS ->
524
 * HI_OBP_ADDRESS range are handled in ktlb.S.
525
 */
526
527
528
529
530
531
static inline int in_obp_range(unsigned long vaddr)
{
	return (vaddr >= LOW_OBP_ADDRESS &&
		vaddr < HI_OBP_ADDRESS);
}

532
static int cmp_ptrans(const void *a, const void *b)
533
{
534
	const struct linux_prom_translation *x = a, *y = b;
535

536
537
538
539
540
	if (x->virt > y->virt)
		return 1;
	if (x->virt < y->virt)
		return -1;
	return 0;
541
542
}

543
/* Read OBP translations property into 'prom_trans[]'.  */
544
static void __init read_obp_translations(void)
545
{
546
	int n, node, ents, first, last, i;
Linus Torvalds's avatar
Linus Torvalds committed
547
548
549

	node = prom_finddevice("/virtual-memory");
	n = prom_getproplen(node, "translations");
550
	if (unlikely(n == 0 || n == -1)) {
551
		prom_printf("prom_mappings: Couldn't get size.\n");
Linus Torvalds's avatar
Linus Torvalds committed
552
553
		prom_halt();
	}
554
	if (unlikely(n > sizeof(prom_trans))) {
555
		prom_printf("prom_mappings: Size %d is too big.\n", n);
Linus Torvalds's avatar
Linus Torvalds committed
556
557
		prom_halt();
	}
558

559
	if ((n = prom_getproperty(node, "translations",
560
561
				  (char *)&prom_trans[0],
				  sizeof(prom_trans))) == -1) {
562
		prom_printf("prom_mappings: Couldn't get property.\n");
Linus Torvalds's avatar
Linus Torvalds committed
563
564
		prom_halt();
	}
565

566
	n = n / sizeof(struct linux_prom_translation);
567

568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
	ents = n;

	sort(prom_trans, ents, sizeof(struct linux_prom_translation),
	     cmp_ptrans, NULL);

	/* Now kick out all the non-OBP entries.  */
	for (i = 0; i < ents; i++) {
		if (in_obp_range(prom_trans[i].virt))
			break;
	}
	first = i;
	for (; i < ents; i++) {
		if (!in_obp_range(prom_trans[i].virt))
			break;
	}
	last = i;

	for (i = 0; i < (last - first); i++) {
		struct linux_prom_translation *src = &prom_trans[i + first];
		struct linux_prom_translation *dest = &prom_trans[i];

		*dest = *src;
	}
	for (; i < ents; i++) {
		struct linux_prom_translation *dest = &prom_trans[i];
		dest->virt = dest->size = dest->data = 0x0UL;
	}

	prom_trans_ents = last - first;

	if (tlb_type == spitfire) {
		/* Clear diag TTE bits. */
		for (i = 0; i < prom_trans_ents; i++)
			prom_trans[i].data &= ~0x0003fe0000000000UL;
	}
603
604
605
606
607

	/* Force execute bit on.  */
	for (i = 0; i < prom_trans_ents; i++)
		prom_trans[i].data |= (tlb_type == hypervisor ?
				       _PAGE_EXEC_4V : _PAGE_EXEC_4U);
608
}
Linus Torvalds's avatar
Linus Torvalds committed
609

610
611
612
613
static void __init hypervisor_tlb_lock(unsigned long vaddr,
				       unsigned long pte,
				       unsigned long mmu)
{
614
615
616
	unsigned long ret = sun4v_mmu_map_perm_addr(vaddr, 0, pte, mmu);

	if (ret != 0) {
617
		prom_printf("hypervisor_tlb_lock[%lx:%x:%lx:%lx]: "
618
			    "errors with %lx\n", vaddr, 0, pte, mmu, ret);
619
620
		prom_halt();
	}
621
622
}

623
624
static unsigned long kern_large_tte(unsigned long paddr);

625
static void __init remap_kernel(void)
626
627
{
	unsigned long phys_page, tte_vaddr, tte_data;
628
	int i, tlb_ent = sparc64_highest_locked_tlbent();
629

Linus Torvalds's avatar
Linus Torvalds committed
630
	tte_vaddr = (unsigned long) KERNBASE;
631
	phys_page = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB;
632
	tte_data = kern_large_tte(phys_page);
Linus Torvalds's avatar
Linus Torvalds committed
633
634
635

	kern_locked_tte_data = tte_data;

636
637
	/* Now lock us into the TLBs via Hypervisor or OBP. */
	if (tlb_type == hypervisor) {
638
		for (i = 0; i < num_kernel_image_mappings; i++) {
639
640
			hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU);
			hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU);
641
642
			tte_vaddr += 0x400000;
			tte_data += 0x400000;
643
644
		}
	} else {
645
646
647
648
649
		for (i = 0; i < num_kernel_image_mappings; i++) {
			prom_dtlb_load(tlb_ent - i, tte_data, tte_vaddr);
			prom_itlb_load(tlb_ent - i, tte_data, tte_vaddr);
			tte_vaddr += 0x400000;
			tte_data += 0x400000;
650
		}
651
		sparc64_highest_unlocked_tlb_ent = tlb_ent - i;
Linus Torvalds's avatar
Linus Torvalds committed
652
	}
653
654
655
656
657
658
	if (tlb_type == cheetah_plus) {
		sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 |
					    CTX_CHEETAH_PLUS_NUC);
		sparc64_kern_pri_nuc_bits = CTX_CHEETAH_PLUS_NUC;
		sparc64_kern_sec_context = CTX_CHEETAH_PLUS_CTX0;
	}
659
}
Linus Torvalds's avatar
Linus Torvalds committed
660

661

662
static void __init inherit_prom_mappings(void)
663
{
664
	/* Now fixup OBP's idea about where we really are mapped. */
665
	printk("Remapping the kernel... ");
666
	remap_kernel();
667
	printk("done.\n");
Linus Torvalds's avatar
Linus Torvalds committed
668
669
670
671
672
}

void prom_world(int enter)
{
	if (!enter)
673
		set_fs(get_fs());
Linus Torvalds's avatar
Linus Torvalds committed
674

675
	__asm__ __volatile__("flushw");
Linus Torvalds's avatar
Linus Torvalds committed
676
677
678
679
680
681
682
683
684
685
686
687
688
689
}

void __flush_dcache_range(unsigned long start, unsigned long end)
{
	unsigned long va;

	if (tlb_type == spitfire) {
		int n = 0;

		for (va = start; va < end; va += 32) {
			spitfire_put_dcache_tag(va & 0x3fe0, 0x0);
			if (++n >= 512)
				break;
		}
690
	} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
Linus Torvalds's avatar
Linus Torvalds committed
691
692
693
694
695
696
697
698
699
700
		start = __pa(start);
		end = __pa(end);
		for (va = start; va < end; va += 32)
			__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
					     "membar #Sync"
					     : /* no outputs */
					     : "r" (va),
					       "i" (ASI_DCACHE_INVALIDATE));
	}
}
701
EXPORT_SYMBOL(__flush_dcache_range);
Linus Torvalds's avatar
Linus Torvalds committed
702

703
704
705
706
707
708
709
/* get_new_mmu_context() uses "cache + 1".  */
DEFINE_SPINLOCK(ctx_alloc_lock);
unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1;
#define MAX_CTX_NR	(1UL << CTX_NR_BITS)
#define CTX_BMAP_SLOTS	BITS_TO_LONGS(MAX_CTX_NR)
DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);

Linus Torvalds's avatar
Linus Torvalds committed
710
711
712
713
714
715
716
/* Caller does TLB context flushing on local CPU if necessary.
 * The caller also ensures that CTX_VALID(mm->context) is false.
 *
 * We must be careful about boundary cases so that we never
 * let the user have CTX 0 (nucleus) or we ever use a CTX
 * version of zero (and thus NO_CONTEXT would not be caught
 * by version mis-match tests in mmu_context.h).
717
718
 *
 * Always invoked with interrupts disabled.
Linus Torvalds's avatar
Linus Torvalds committed
719
720
721
722
723
 */
void get_new_mmu_context(struct mm_struct *mm)
{
	unsigned long ctx, new_ctx;
	unsigned long orig_pgsz_bits;
724
	int new_version;
Linus Torvalds's avatar
Linus Torvalds committed
725

726
	spin_lock(&ctx_alloc_lock);
Linus Torvalds's avatar
Linus Torvalds committed
727
728
729
	orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
	ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
	new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
730
	new_version = 0;
Linus Torvalds's avatar
Linus Torvalds committed
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
	if (new_ctx >= (1 << CTX_NR_BITS)) {
		new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
		if (new_ctx >= ctx) {
			int i;
			new_ctx = (tlb_context_cache & CTX_VERSION_MASK) +
				CTX_FIRST_VERSION;
			if (new_ctx == 1)
				new_ctx = CTX_FIRST_VERSION;

			/* Don't call memset, for 16 entries that's just
			 * plain silly...
			 */
			mmu_context_bmap[0] = 3;
			mmu_context_bmap[1] = 0;
			mmu_context_bmap[2] = 0;
			mmu_context_bmap[3] = 0;
			for (i = 4; i < CTX_BMAP_SLOTS; i += 4) {
				mmu_context_bmap[i + 0] = 0;
				mmu_context_bmap[i + 1] = 0;
				mmu_context_bmap[i + 2] = 0;
				mmu_context_bmap[i + 3] = 0;
			}
753
			new_version = 1;
Linus Torvalds's avatar
Linus Torvalds committed
754
755
756
757
758
759
760
761
			goto out;
		}
	}
	mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63));
	new_ctx |= (tlb_context_cache & CTX_VERSION_MASK);
out:
	tlb_context_cache = new_ctx;
	mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
762
	spin_unlock(&ctx_alloc_lock);
763
764
765

	if (unlikely(new_version))
		smp_new_mmu_context_version();
Linus Torvalds's avatar
Linus Torvalds committed
766
767
}

David S. Miller's avatar
David S. Miller committed
768
769
770
771
static int numa_enabled = 1;
static int numa_debug;

static int __init early_numa(char *p)
Linus Torvalds's avatar
Linus Torvalds committed
772
{
David S. Miller's avatar
David S. Miller committed
773
774
775
776
777
	if (!p)
		return 0;

	if (strstr(p, "off"))
		numa_enabled = 0;
778

David S. Miller's avatar
David S. Miller committed
779
780
	if (strstr(p, "debug"))
		numa_debug = 1;
781

David S. Miller's avatar
David S. Miller committed
782
	return 0;
783
}
David S. Miller's avatar
David S. Miller committed
784
785
786
787
788
789
early_param("numa", early_numa);

#define numadbg(f, a...) \
do {	if (numa_debug) \
		printk(KERN_INFO f, ## a); \
} while (0)
790

791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
static void __init find_ramdisk(unsigned long phys_base)
{
#ifdef CONFIG_BLK_DEV_INITRD
	if (sparc_ramdisk_image || sparc_ramdisk_image64) {
		unsigned long ramdisk_image;

		/* Older versions of the bootloader only supported a
		 * 32-bit physical address for the ramdisk image
		 * location, stored at sparc_ramdisk_image.  Newer
		 * SILO versions set sparc_ramdisk_image to zero and
		 * provide a full 64-bit physical address at
		 * sparc_ramdisk_image64.
		 */
		ramdisk_image = sparc_ramdisk_image;
		if (!ramdisk_image)
			ramdisk_image = sparc_ramdisk_image64;

		/* Another bootloader quirk.  The bootloader normalizes
		 * the physical address to KERNBASE, so we have to
		 * factor that back out and add in the lowest valid
		 * physical page address to get the true physical address.
		 */
		ramdisk_image -= KERNBASE;
		ramdisk_image += phys_base;

David S. Miller's avatar
David S. Miller committed
816
817
818
		numadbg("Found ramdisk at physical address 0x%lx, size %u\n",
			ramdisk_image, sparc_ramdisk_size);

819
820
		initrd_start = ramdisk_image;
		initrd_end = ramdisk_image + sparc_ramdisk_size;
821

Yinghai Lu's avatar
Yinghai Lu committed
822
		memblock_reserve(initrd_start, sparc_ramdisk_size);
823
824
825

		initrd_start += PAGE_OFFSET;
		initrd_end += PAGE_OFFSET;
826
827
828
829
	}
#endif
}

David S. Miller's avatar
David S. Miller committed
830
831
832
833
834
835
836
struct node_mem_mask {
	unsigned long mask;
	unsigned long val;
};
static struct node_mem_mask node_masks[MAX_NUMNODES];
static int num_node_masks;

837
838
#ifdef CONFIG_NEED_MULTIPLE_NODES

David S. Miller's avatar
David S. Miller committed
839
840
841
842
843
844
845
846
847
848
int numa_cpu_lookup_table[NR_CPUS];
cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];

struct mdesc_mblock {
	u64	base;
	u64	size;
	u64	offset; /* RA-to-PA */
};
static struct mdesc_mblock *mblocks;
static int num_mblocks;
849
850
static int find_numa_node_for_addr(unsigned long pa,
				   struct node_mem_mask *pnode_mask);
David S. Miller's avatar
David S. Miller committed
851

852
static unsigned long __init ra_to_pa(unsigned long addr)
David S. Miller's avatar
David S. Miller committed
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
{
	int i;

	for (i = 0; i < num_mblocks; i++) {
		struct mdesc_mblock *m = &mblocks[i];

		if (addr >= m->base &&
		    addr < (m->base + m->size)) {
			addr += m->offset;
			break;
		}
	}
	return addr;
}

868
static int __init find_node(unsigned long addr)
David S. Miller's avatar
David S. Miller committed
869
{
870
871
872
	static bool search_mdesc = true;
	static struct node_mem_mask last_mem_mask = { ~0UL, ~0UL };
	static int last_index;
David S. Miller's avatar
David S. Miller committed
873
874
875
876
877
878
879
880
881
	int i;

	addr = ra_to_pa(addr);
	for (i = 0; i < num_node_masks; i++) {
		struct node_mem_mask *p = &node_masks[i];

		if ((addr & p->mask) == p->val)
			return i;
	}
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
	/* The following condition has been observed on LDOM guests because
	 * node_masks only contains the best latency mask and value.
	 * LDOM guest's mdesc can contain a single latency group to
	 * cover multiple address range. Print warning message only if the
	 * address cannot be found in node_masks nor mdesc.
	 */
	if ((search_mdesc) &&
	    ((addr & last_mem_mask.mask) != last_mem_mask.val)) {
		/* find the available node in the mdesc */
		last_index = find_numa_node_for_addr(addr, &last_mem_mask);
		numadbg("find_node: latency group for address 0x%lx is %d\n",
			addr, last_index);
		if ((last_index < 0) || (last_index >= num_node_masks)) {
			/* WARN_ONCE() and use default group 0 */
			WARN_ONCE(1, "find_node: A physical address doesn't match a NUMA node rule. Some physical memory will be owned by node 0.");
			search_mdesc = false;
			last_index = 0;
		}
	}

	return last_index;
David S. Miller's avatar
David S. Miller committed
903
904
}

905
static u64 __init memblock_nid_range(u64 start, u64 end, int *nid)
David S. Miller's avatar
David S. Miller committed
906
907
908
909
910
911
912
913
914
915
916
{
	*nid = find_node(start);
	start += PAGE_SIZE;
	while (start < end) {
		int n = find_node(start);

		if (n != *nid)
			break;
		start += PAGE_SIZE;
	}

917
918
919
	if (start > end)
		start = end;

David S. Miller's avatar
David S. Miller committed
920
921
922
923
924
	return start;
}
#endif

/* This must be invoked after performing all of the necessary
Tejun Heo's avatar
Tejun Heo committed
925
 * memblock_set_node() calls for 'nid'.  We need to be able to get
David S. Miller's avatar
David S. Miller committed
926
 * correct data from get_pfn_range_for_nid().
927
 */
David S. Miller's avatar
David S. Miller committed
928
929
930
static void __init allocate_node_data(int nid)
{
	struct pglist_data *p;
931
	unsigned long start_pfn, end_pfn;
David S. Miller's avatar
David S. Miller committed
932
#ifdef CONFIG_NEED_MULTIPLE_NODES
933
934
	unsigned long paddr;

935
	paddr = memblock_alloc_try_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid);
David S. Miller's avatar
David S. Miller committed
936
937
938
939
940
941
942
	if (!paddr) {
		prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid);
		prom_halt();
	}
	NODE_DATA(nid) = __va(paddr);
	memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));

943
	NODE_DATA(nid)->node_id = nid;
David S. Miller's avatar
David S. Miller committed
944
945
946
947
948
949
950
951
952
953
#endif

	p = NODE_DATA(nid);

	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
	p->node_start_pfn = start_pfn;
	p->node_spanned_pages = end_pfn - start_pfn;
}

static void init_node_masks_nonnuma(void)
954
{
955
#ifdef CONFIG_NEED_MULTIPLE_NODES
Linus Torvalds's avatar
Linus Torvalds committed
956
	int i;
957
#endif
Linus Torvalds's avatar
Linus Torvalds committed
958

David S. Miller's avatar
David S. Miller committed
959
	numadbg("Initializing tables for non-numa.\n");
960

David S. Miller's avatar
David S. Miller committed
961
962
	node_masks[0].mask = node_masks[0].val = 0;
	num_node_masks = 1;
963

964
#ifdef CONFIG_NEED_MULTIPLE_NODES
David S. Miller's avatar
David S. Miller committed
965
966
	for (i = 0; i < NR_CPUS; i++)
		numa_cpu_lookup_table[i] = 0;
Linus Torvalds's avatar
Linus Torvalds committed
967

968
	cpumask_setall(&numa_cpumask_lookup_table[0]);
969
#endif
David S. Miller's avatar
David S. Miller committed
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
}

#ifdef CONFIG_NEED_MULTIPLE_NODES
struct pglist_data *node_data[MAX_NUMNODES];

EXPORT_SYMBOL(numa_cpu_lookup_table);
EXPORT_SYMBOL(numa_cpumask_lookup_table);
EXPORT_SYMBOL(node_data);

struct mdesc_mlgroup {
	u64	node;
	u64	latency;
	u64	match;
	u64	mask;
};
static struct mdesc_mlgroup *mlgroups;
static int num_mlgroups;

static int scan_pio_for_cfg_handle(struct mdesc_handle *md, u64 pio,
				   u32 cfg_handle)
{
	u64 arc;

	mdesc_for_each_arc(arc, md, pio, MDESC_ARC_TYPE_FWD) {
		u64 target = mdesc_arc_target(md, arc);
		const u64 *val;

		val = mdesc_get_property(md, target,
					 "cfg-handle", NULL);
		if (val && *val == cfg_handle)
			return 0;
For faster browsing, not all history is shown. View entire blame