init_64.c 73.7 KB
Newer Older
Adrian Bunk's avatar
Adrian Bunk committed
1
/*
Linus Torvalds's avatar
Linus Torvalds committed
2
3
4
5
6
7
 *  arch/sparc64/mm/init.c
 *
 *  Copyright (C) 1996-1999 David S. Miller (davem@caip.rutgers.edu)
 *  Copyright (C) 1997-1999 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
 */
 
8
#include <linux/extable.h>
Linus Torvalds's avatar
Linus Torvalds committed
9
10
11
12
13
14
15
16
17
18
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/initrd.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
19
#include <linux/poison.h>
Linus Torvalds's avatar
Linus Torvalds committed
20
21
#include <linux/fs.h>
#include <linux/seq_file.h>
22
#include <linux/kprobes.h>
23
#include <linux/cache.h>
24
#include <linux/sort.h>
25
#include <linux/ioport.h>
26
#include <linux/percpu.h>
Yinghai Lu's avatar
Yinghai Lu committed
27
#include <linux/memblock.h>
David S. Miller's avatar
David S. Miller committed
28
#include <linux/mmzone.h>
29
#include <linux/gfp.h>
Linus Torvalds's avatar
Linus Torvalds committed
30
31
32
33
34
35
36
37

#include <asm/head.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/oplib.h>
#include <asm/iommu.h>
#include <asm/io.h>
38
#include <linux/uaccess.h>
Linus Torvalds's avatar
Linus Torvalds committed
39
40
41
42
43
44
45
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
#include <asm/dma.h>
#include <asm/starfire.h>
#include <asm/tlb.h>
#include <asm/spitfire.h>
#include <asm/sections.h>
46
#include <asm/tsb.h>
47
#include <asm/hypervisor.h>
48
#include <asm/prom.h>
49
#include <asm/mdesc.h>
50
#include <asm/cpudata.h>
51
#include <asm/setup.h>
52
#include <asm/irq.h>
Linus Torvalds's avatar
Linus Torvalds committed
53

Sam Ravnborg's avatar
Sam Ravnborg committed
54
#include "init_64.h"
55

56
unsigned long kern_linear_pte_xor[4] __read_mostly;
57
static unsigned long page_cache4v_flag;
58

59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/* A bitmap, two bits for every 256MB of physical memory.  These two
 * bits determine what page size we use for kernel linear
 * translations.  They form an index into kern_linear_pte_xor[].  The
 * value in the indexed slot is XOR'd with the TLB miss virtual
 * address to form the resulting TTE.  The mapping is:
 *
 *	0	==>	4MB
 *	1	==>	256MB
 *	2	==>	2GB
 *	3	==>	16GB
 *
 * All sun4v chips support 256MB pages.  Only SPARC-T4 and later
 * support 2GB pages, and hopefully future cpus will support the 16GB
 * pages as well.  For slots 2 and 3, we encode a 256MB TTE xor there
 * if these larger page sizes are not supported by the cpu.
 *
 * It would be nice to determine this from the machine description
 * 'cpu' properties, but we need to have this table setup before the
 * MDESC is initialized.
78
79
 */

80
#ifndef CONFIG_DEBUG_PAGEALLOC
81
82
83
/* A special kernel TSB for 4MB, 256MB, 2GB and 16GB linear mappings.
 * Space is allocated for this right after the trap table in
 * arch/sparc64/kernel/head.S
84
85
 */
extern struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
86
#endif
87
extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
88

89
90
static unsigned long cpu_pgsz_mask;

91
#define MAX_BANKS	1024
92

93
94
static struct linux_prom64_registers pavail[MAX_BANKS];
static int pavail_ents;
95

96
97
u64 numa_latency[MAX_NUMNODES][MAX_NUMNODES];

98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
static int cmp_p64(const void *a, const void *b)
{
	const struct linux_prom64_registers *x = a, *y = b;

	if (x->phys_addr > y->phys_addr)
		return 1;
	if (x->phys_addr < y->phys_addr)
		return -1;
	return 0;
}

static void __init read_obp_memory(const char *property,
				   struct linux_prom64_registers *regs,
				   int *num_ents)
{
113
	phandle node = prom_finddevice("/memory");
114
115
116
117
118
119
120
121
122
123
124
125
126
	int prop_size = prom_getproplen(node, property);
	int ents, ret, i;

	ents = prop_size / sizeof(struct linux_prom64_registers);
	if (ents > MAX_BANKS) {
		prom_printf("The machine has more %s property entries than "
			    "this kernel can support (%d).\n",
			    property, MAX_BANKS);
		prom_halt();
	}

	ret = prom_getproperty(node, property, (char *) regs, prop_size);
	if (ret == -1) {
127
128
		prom_printf("Couldn't get %s property from /memory.\n",
				property);
129
130
131
132
133
134
135
136
137
138
139
		prom_halt();
	}

	/* Sanitize what we got from the firmware, by page aligning
	 * everything.
	 */
	for (i = 0; i < ents; i++) {
		unsigned long base, size;

		base = regs[i].phys_addr;
		size = regs[i].reg_size;
140

141
142
143
144
145
146
147
148
149
		size &= PAGE_MASK;
		if (base & ~PAGE_MASK) {
			unsigned long new_base = PAGE_ALIGN(base);

			size -= new_base - base;
			if ((long) size < 0L)
				size = 0UL;
			base = new_base;
		}
150
151
152
153
154
155
156
		if (size == 0UL) {
			/* If it is empty, simply get rid of it.
			 * This simplifies the logic of the other
			 * functions that process these arrays.
			 */
			memmove(&regs[i], &regs[i + 1],
				(ents - i - 1) * sizeof(regs[0]));
157
			i--;
158
159
			ents--;
			continue;
160
		}
161
162
		regs[i].phys_addr = base;
		regs[i].reg_size = size;
163
164
165
166
	}

	*num_ents = ents;

167
	sort(regs, ents, sizeof(struct linux_prom64_registers),
168
169
	     cmp_p64, NULL);
}
Linus Torvalds's avatar
Linus Torvalds committed
170

171
/* Kernel physical address base and size in bytes.  */
172
173
unsigned long kern_base __read_mostly;
unsigned long kern_size __read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed
174
175
176
177
178
179

/* Initial ramdisk setup */
extern unsigned long sparc_ramdisk_image64;
extern unsigned int sparc_ramdisk_image;
extern unsigned int sparc_ramdisk_size;

180
struct page *mem_map_zero __read_mostly;
181
EXPORT_SYMBOL(mem_map_zero);
Linus Torvalds's avatar
Linus Torvalds committed
182

183
184
185
186
187
188
unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly;

unsigned long sparc64_kern_pri_context __read_mostly;
unsigned long sparc64_kern_pri_nuc_bits __read_mostly;
unsigned long sparc64_kern_sec_context __read_mostly;

189
int num_kernel_image_mappings;
Linus Torvalds's avatar
Linus Torvalds committed
190
191
192
193
194
195
196
197

#ifdef CONFIG_DEBUG_DCFLUSH
atomic_t dcpage_flushes = ATOMIC_INIT(0);
#ifdef CONFIG_SMP
atomic_t dcpage_flushes_xcall = ATOMIC_INIT(0);
#endif
#endif

198
inline void flush_dcache_page_impl(struct page *page)
Linus Torvalds's avatar
Linus Torvalds committed
199
{
200
	BUG_ON(tlb_type == hypervisor);
Linus Torvalds's avatar
Linus Torvalds committed
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#ifdef CONFIG_DEBUG_DCFLUSH
	atomic_inc(&dcpage_flushes);
#endif

#ifdef DCACHE_ALIASING_POSSIBLE
	__flush_dcache_page(page_address(page),
			    ((tlb_type == spitfire) &&
			     page_mapping(page) != NULL));
#else
	if (page_mapping(page) != NULL &&
	    tlb_type == spitfire)
		__flush_icache_page(__pa(page_address(page)));
#endif
}

#define PG_dcache_dirty		PG_arch_1
217
218
219
#define PG_dcache_cpu_shift	32UL
#define PG_dcache_cpu_mask	\
	((1UL<<ilog2(roundup_pow_of_two(NR_CPUS)))-1UL)
Linus Torvalds's avatar
Linus Torvalds committed
220
221

#define dcache_dirty_cpu(page) \
222
	(((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask)
Linus Torvalds's avatar
Linus Torvalds committed
223

224
static inline void set_dcache_dirty(struct page *page, int this_cpu)
Linus Torvalds's avatar
Linus Torvalds committed
225
226
{
	unsigned long mask = this_cpu;
227
228
229
230
231
	unsigned long non_cpu_bits;

	non_cpu_bits = ~(PG_dcache_cpu_mask << PG_dcache_cpu_shift);
	mask = (mask << PG_dcache_cpu_shift) | (1UL << PG_dcache_dirty);

Linus Torvalds's avatar
Linus Torvalds committed
232
233
234
235
236
237
238
	__asm__ __volatile__("1:\n\t"
			     "ldx	[%2], %%g7\n\t"
			     "and	%%g7, %1, %%g1\n\t"
			     "or	%%g1, %0, %%g1\n\t"
			     "casx	[%2], %%g7, %%g1\n\t"
			     "cmp	%%g7, %%g1\n\t"
			     "bne,pn	%%xcc, 1b\n\t"
239
			     " nop"
Linus Torvalds's avatar
Linus Torvalds committed
240
241
242
243
244
			     : /* no outputs */
			     : "r" (mask), "r" (non_cpu_bits), "r" (&page->flags)
			     : "g1", "g7");
}

245
static inline void clear_dcache_dirty_cpu(struct page *page, unsigned long cpu)
Linus Torvalds's avatar
Linus Torvalds committed
246
247
248
249
250
251
{
	unsigned long mask = (1UL << PG_dcache_dirty);

	__asm__ __volatile__("! test_and_clear_dcache_dirty\n"
			     "1:\n\t"
			     "ldx	[%2], %%g7\n\t"
252
			     "srlx	%%g7, %4, %%g1\n\t"
Linus Torvalds's avatar
Linus Torvalds committed
253
254
255
256
257
258
259
			     "and	%%g1, %3, %%g1\n\t"
			     "cmp	%%g1, %0\n\t"
			     "bne,pn	%%icc, 2f\n\t"
			     " andn	%%g7, %1, %%g1\n\t"
			     "casx	[%2], %%g7, %%g1\n\t"
			     "cmp	%%g7, %%g1\n\t"
			     "bne,pn	%%xcc, 1b\n\t"
260
			     " nop\n"
Linus Torvalds's avatar
Linus Torvalds committed
261
262
263
			     "2:"
			     : /* no outputs */
			     : "r" (cpu), "r" (mask), "r" (&page->flags),
264
265
			       "i" (PG_dcache_cpu_mask),
			       "i" (PG_dcache_cpu_shift)
Linus Torvalds's avatar
Linus Torvalds committed
266
267
268
			     : "g1", "g7");
}

269
270
271
272
static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte)
{
	unsigned long tsb_addr = (unsigned long) ent;

273
	if (tlb_type == cheetah_plus || tlb_type == hypervisor)
274
275
276
277
278
		tsb_addr = __pa(tsb_addr);

	__tsb_insert(tsb_addr, tag, pte);
}

279
280
unsigned long _PAGE_ALL_SZ_BITS __read_mostly;

281
static void flush_dcache(unsigned long pfn)
Linus Torvalds's avatar
Linus Torvalds committed
282
{
283
	struct page *page;
284

285
	page = pfn_to_page(pfn);
286
	if (page) {
287
288
		unsigned long pg_flags;

289
290
		pg_flags = page->flags;
		if (pg_flags & (1UL << PG_dcache_dirty)) {
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
			int cpu = ((pg_flags >> PG_dcache_cpu_shift) &
				   PG_dcache_cpu_mask);
			int this_cpu = get_cpu();

			/* This is just to optimize away some function calls
			 * in the SMP case.
			 */
			if (cpu == this_cpu)
				flush_dcache_page_impl(page);
			else
				smp_flush_dcache_page_impl(page, cpu);

			clear_dcache_dirty_cpu(page, cpu);

			put_cpu();
		}
Linus Torvalds's avatar
Linus Torvalds committed
307
	}
308
309
}

310
311
312
313
314
315
316
317
/* mm->context.lock must be held */
static void __update_mmu_tsb_insert(struct mm_struct *mm, unsigned long tsb_index,
				    unsigned long tsb_hash_shift, unsigned long address,
				    unsigned long tte)
{
	struct tsb *tsb = mm->context.tsb_block[tsb_index].tsb;
	unsigned long tag;

318
319
320
	if (unlikely(!tsb))
		return;

321
322
323
324
325
326
	tsb += ((address >> tsb_hash_shift) &
		(mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
	tag = (address >> 22UL);
	tsb_insert(tsb, tag, tte);
}

Nitin Gupta's avatar
Nitin Gupta committed
327
328
329
330
331
332
333
334
335
336
337
338
339
#ifdef CONFIG_HUGETLB_PAGE
static int __init setup_hugepagesz(char *string)
{
	unsigned long long hugepage_size;
	unsigned int hugepage_shift;
	unsigned short hv_pgsz_idx;
	unsigned int hv_pgsz_mask;
	int rc = 0;

	hugepage_size = memparse(string, &string);
	hugepage_shift = ilog2(hugepage_size);

	switch (hugepage_shift) {
340
341
342
343
	case HPAGE_2GB_SHIFT:
		hv_pgsz_mask = HV_PGSZ_MASK_2GB;
		hv_pgsz_idx = HV_PGSZ_IDX_2GB;
		break;
Nitin Gupta's avatar
Nitin Gupta committed
344
345
346
347
348
349
350
351
	case HPAGE_256MB_SHIFT:
		hv_pgsz_mask = HV_PGSZ_MASK_256MB;
		hv_pgsz_idx = HV_PGSZ_IDX_256MB;
		break;
	case HPAGE_SHIFT:
		hv_pgsz_mask = HV_PGSZ_MASK_4MB;
		hv_pgsz_idx = HV_PGSZ_IDX_4MB;
		break;
352
353
354
355
	case HPAGE_64K_SHIFT:
		hv_pgsz_mask = HV_PGSZ_MASK_64K;
		hv_pgsz_idx = HV_PGSZ_IDX_64K;
		break;
Nitin Gupta's avatar
Nitin Gupta committed
356
357
358
359
360
	default:
		hv_pgsz_mask = 0;
	}

	if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) {
361
362
		hugetlb_bad_size();
		pr_err("hugepagesz=%llu not supported by MMU.\n",
Nitin Gupta's avatar
Nitin Gupta committed
363
364
365
366
367
368
369
370
371
372
373
374
375
			hugepage_size);
		goto out;
	}

	hugetlb_add_hstate(hugepage_shift - PAGE_SHIFT);
	rc = 1;

out:
	return rc;
}
__setup("hugepagesz=", setup_hugepagesz);
#endif	/* CONFIG_HUGETLB_PAGE */

376
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
377
378
{
	struct mm_struct *mm;
379
	unsigned long flags;
380
	pte_t pte = *ptep;
381
382
383
384
385
386
387

	if (tlb_type != hypervisor) {
		unsigned long pfn = pte_pfn(pte);

		if (pfn_valid(pfn))
			flush_dcache(pfn);
	}
388
389

	mm = vma->vm_mm;
390

391
392
393
394
	/* Don't insert a non-valid PTE into the TSB, we'll deadlock.  */
	if (!pte_accessible(mm, pte))
		return;

395
396
	spin_lock_irqsave(&mm->context.lock, flags);

397
#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
398
	if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) &&
Nitin Gupta's avatar
Nitin Gupta committed
399
	    is_hugetlb_pmd(__pmd(pte_val(pte)))) {
400
401
		/* We are fabricating 8MB pages using 4MB real hw pages.  */
		pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
402
		__update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
403
					address, pte_val(pte));
404
	} else
405
#endif
406
407
		__update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
					address, pte_val(pte));
408
409

	spin_unlock_irqrestore(&mm->context.lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
410
411
412
413
}

void flush_dcache_page(struct page *page)
{
414
415
	struct address_space *mapping;
	int this_cpu;
Linus Torvalds's avatar
Linus Torvalds committed
416

417
418
419
	if (tlb_type == hypervisor)
		return;

420
421
422
423
424
425
426
427
428
429
	/* Do not bother with the expensive D-cache flush if it
	 * is merely the zero page.  The 'bigcore' testcase in GDB
	 * causes this case to run millions of times.
	 */
	if (page == ZERO_PAGE(0))
		return;

	this_cpu = get_cpu();

	mapping = page_mapping(page);
Linus Torvalds's avatar
Linus Torvalds committed
430
	if (mapping && !mapping_mapped(mapping)) {
431
		int dirty = test_bit(PG_dcache_dirty, &page->flags);
Linus Torvalds's avatar
Linus Torvalds committed
432
		if (dirty) {
433
434
			int dirty_cpu = dcache_dirty_cpu(page);

Linus Torvalds's avatar
Linus Torvalds committed
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
			if (dirty_cpu == this_cpu)
				goto out;
			smp_flush_dcache_page_impl(page, dirty_cpu);
		}
		set_dcache_dirty(page, this_cpu);
	} else {
		/* We could delay the flush for the !page_mapping
		 * case too.  But that case is for exec env/arg
		 * pages and those are %99 certainly going to get
		 * faulted into the tlb (and thus flushed) anyways.
		 */
		flush_dcache_page_impl(page);
	}

out:
	put_cpu();
}
452
EXPORT_SYMBOL(flush_dcache_page);
Linus Torvalds's avatar
Linus Torvalds committed
453

454
void __kprobes flush_icache_range(unsigned long start, unsigned long end)
Linus Torvalds's avatar
Linus Torvalds committed
455
{
456
	/* Cheetah and Hypervisor platform cpus have coherent I-cache. */
Linus Torvalds's avatar
Linus Torvalds committed
457
458
459
	if (tlb_type == spitfire) {
		unsigned long kaddr;

460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
		/* This code only runs on Spitfire cpus so this is
		 * why we can assume _PAGE_PADDR_4U.
		 */
		for (kaddr = start; kaddr < end; kaddr += PAGE_SIZE) {
			unsigned long paddr, mask = _PAGE_PADDR_4U;

			if (kaddr >= PAGE_OFFSET)
				paddr = kaddr & mask;
			else {
				pgd_t *pgdp = pgd_offset_k(kaddr);
				pud_t *pudp = pud_offset(pgdp, kaddr);
				pmd_t *pmdp = pmd_offset(pudp, kaddr);
				pte_t *ptep = pte_offset_kernel(pmdp, kaddr);

				paddr = pte_val(*ptep) & mask;
			}
			__flush_icache_page(paddr);
		}
Linus Torvalds's avatar
Linus Torvalds committed
478
479
	}
}
480
EXPORT_SYMBOL(flush_icache_range);
Linus Torvalds's avatar
Linus Torvalds committed
481
482
483

void mmu_info(struct seq_file *m)
{
484
485
486
487
488
489
	static const char *pgsz_strings[] = {
		"8K", "64K", "512K", "4MB", "32MB",
		"256MB", "2GB", "16GB",
	};
	int i, printed;

Linus Torvalds's avatar
Linus Torvalds committed
490
491
492
493
494
495
	if (tlb_type == cheetah)
		seq_printf(m, "MMU Type\t: Cheetah\n");
	else if (tlb_type == cheetah_plus)
		seq_printf(m, "MMU Type\t: Cheetah+\n");
	else if (tlb_type == spitfire)
		seq_printf(m, "MMU Type\t: Spitfire\n");
496
497
	else if (tlb_type == hypervisor)
		seq_printf(m, "MMU Type\t: Hypervisor (sun4v)\n");
Linus Torvalds's avatar
Linus Torvalds committed
498
499
500
	else
		seq_printf(m, "MMU Type\t: ???\n");

501
502
503
504
505
506
507
508
509
510
511
	seq_printf(m, "MMU PGSZs\t: ");
	printed = 0;
	for (i = 0; i < ARRAY_SIZE(pgsz_strings); i++) {
		if (cpu_pgsz_mask & (1UL << i)) {
			seq_printf(m, "%s%s",
				   printed ? "," : "", pgsz_strings[i]);
			printed++;
		}
	}
	seq_putc(m, '\n');

Linus Torvalds's avatar
Linus Torvalds committed
512
513
514
515
516
517
518
519
520
521
#ifdef CONFIG_DEBUG_DCFLUSH
	seq_printf(m, "DCPageFlushes\t: %d\n",
		   atomic_read(&dcpage_flushes));
#ifdef CONFIG_SMP
	seq_printf(m, "DCPageFlushesXC\t: %d\n",
		   atomic_read(&dcpage_flushes_xcall));
#endif /* CONFIG_SMP */
#endif /* CONFIG_DEBUG_DCFLUSH */
}

522
523
524
struct linux_prom_translation prom_trans[512] __read_mostly;
unsigned int prom_trans_ents __read_mostly;

Linus Torvalds's avatar
Linus Torvalds committed
525
526
unsigned long kern_locked_tte_data;

527
528
/* The obp translations are saved based on 8k pagesize, since obp can
 * use a mixture of pagesizes. Misses to the LOW_OBP_ADDRESS ->
529
 * HI_OBP_ADDRESS range are handled in ktlb.S.
530
 */
531
532
533
534
535
536
static inline int in_obp_range(unsigned long vaddr)
{
	return (vaddr >= LOW_OBP_ADDRESS &&
		vaddr < HI_OBP_ADDRESS);
}

537
static int cmp_ptrans(const void *a, const void *b)
538
{
539
	const struct linux_prom_translation *x = a, *y = b;
540

541
542
543
544
545
	if (x->virt > y->virt)
		return 1;
	if (x->virt < y->virt)
		return -1;
	return 0;
546
547
}

548
/* Read OBP translations property into 'prom_trans[]'.  */
549
static void __init read_obp_translations(void)
550
{
551
	int n, node, ents, first, last, i;
Linus Torvalds's avatar
Linus Torvalds committed
552
553
554

	node = prom_finddevice("/virtual-memory");
	n = prom_getproplen(node, "translations");
555
	if (unlikely(n == 0 || n == -1)) {
556
		prom_printf("prom_mappings: Couldn't get size.\n");
Linus Torvalds's avatar
Linus Torvalds committed
557
558
		prom_halt();
	}
559
	if (unlikely(n > sizeof(prom_trans))) {
560
		prom_printf("prom_mappings: Size %d is too big.\n", n);
Linus Torvalds's avatar
Linus Torvalds committed
561
562
		prom_halt();
	}
563

564
	if ((n = prom_getproperty(node, "translations",
565
566
				  (char *)&prom_trans[0],
				  sizeof(prom_trans))) == -1) {
567
		prom_printf("prom_mappings: Couldn't get property.\n");
Linus Torvalds's avatar
Linus Torvalds committed
568
569
		prom_halt();
	}
570

571
	n = n / sizeof(struct linux_prom_translation);
572

573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
	ents = n;

	sort(prom_trans, ents, sizeof(struct linux_prom_translation),
	     cmp_ptrans, NULL);

	/* Now kick out all the non-OBP entries.  */
	for (i = 0; i < ents; i++) {
		if (in_obp_range(prom_trans[i].virt))
			break;
	}
	first = i;
	for (; i < ents; i++) {
		if (!in_obp_range(prom_trans[i].virt))
			break;
	}
	last = i;

	for (i = 0; i < (last - first); i++) {
		struct linux_prom_translation *src = &prom_trans[i + first];
		struct linux_prom_translation *dest = &prom_trans[i];

		*dest = *src;
	}
	for (; i < ents; i++) {
		struct linux_prom_translation *dest = &prom_trans[i];
		dest->virt = dest->size = dest->data = 0x0UL;
	}

	prom_trans_ents = last - first;

	if (tlb_type == spitfire) {
		/* Clear diag TTE bits. */
		for (i = 0; i < prom_trans_ents; i++)
			prom_trans[i].data &= ~0x0003fe0000000000UL;
	}
608
609
610
611
612

	/* Force execute bit on.  */
	for (i = 0; i < prom_trans_ents; i++)
		prom_trans[i].data |= (tlb_type == hypervisor ?
				       _PAGE_EXEC_4V : _PAGE_EXEC_4U);
613
}
Linus Torvalds's avatar
Linus Torvalds committed
614

615
616
617
618
static void __init hypervisor_tlb_lock(unsigned long vaddr,
				       unsigned long pte,
				       unsigned long mmu)
{
619
620
621
	unsigned long ret = sun4v_mmu_map_perm_addr(vaddr, 0, pte, mmu);

	if (ret != 0) {
622
		prom_printf("hypervisor_tlb_lock[%lx:%x:%lx:%lx]: "
623
			    "errors with %lx\n", vaddr, 0, pte, mmu, ret);
624
625
		prom_halt();
	}
626
627
}

628
629
static unsigned long kern_large_tte(unsigned long paddr);

630
static void __init remap_kernel(void)
631
632
{
	unsigned long phys_page, tte_vaddr, tte_data;
633
	int i, tlb_ent = sparc64_highest_locked_tlbent();
634

Linus Torvalds's avatar
Linus Torvalds committed
635
	tte_vaddr = (unsigned long) KERNBASE;
636
	phys_page = (prom_boot_mapping_phys_low >> ILOG2_4MB) << ILOG2_4MB;
637
	tte_data = kern_large_tte(phys_page);
Linus Torvalds's avatar
Linus Torvalds committed
638
639
640

	kern_locked_tte_data = tte_data;

641
642
	/* Now lock us into the TLBs via Hypervisor or OBP. */
	if (tlb_type == hypervisor) {
643
		for (i = 0; i < num_kernel_image_mappings; i++) {
644
645
			hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_DMMU);
			hypervisor_tlb_lock(tte_vaddr, tte_data, HV_MMU_IMMU);
646
647
			tte_vaddr += 0x400000;
			tte_data += 0x400000;
648
649
		}
	} else {
650
651
652
653
654
		for (i = 0; i < num_kernel_image_mappings; i++) {
			prom_dtlb_load(tlb_ent - i, tte_data, tte_vaddr);
			prom_itlb_load(tlb_ent - i, tte_data, tte_vaddr);
			tte_vaddr += 0x400000;
			tte_data += 0x400000;
655
		}
656
		sparc64_highest_unlocked_tlb_ent = tlb_ent - i;
Linus Torvalds's avatar
Linus Torvalds committed
657
	}
658
659
660
661
662
663
	if (tlb_type == cheetah_plus) {
		sparc64_kern_pri_context = (CTX_CHEETAH_PLUS_CTX0 |
					    CTX_CHEETAH_PLUS_NUC);
		sparc64_kern_pri_nuc_bits = CTX_CHEETAH_PLUS_NUC;
		sparc64_kern_sec_context = CTX_CHEETAH_PLUS_CTX0;
	}
664
}
Linus Torvalds's avatar
Linus Torvalds committed
665

666

667
static void __init inherit_prom_mappings(void)
668
{
669
	/* Now fixup OBP's idea about where we really are mapped. */
670
	printk("Remapping the kernel... ");
671
	remap_kernel();
672
	printk("done.\n");
Linus Torvalds's avatar
Linus Torvalds committed
673
674
675
676
677
}

void prom_world(int enter)
{
	if (!enter)
678
		set_fs(get_fs());
Linus Torvalds's avatar
Linus Torvalds committed
679

680
	__asm__ __volatile__("flushw");
Linus Torvalds's avatar
Linus Torvalds committed
681
682
683
684
685
686
687
688
689
690
691
692
693
694
}

void __flush_dcache_range(unsigned long start, unsigned long end)
{
	unsigned long va;

	if (tlb_type == spitfire) {
		int n = 0;

		for (va = start; va < end; va += 32) {
			spitfire_put_dcache_tag(va & 0x3fe0, 0x0);
			if (++n >= 512)
				break;
		}
695
	} else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
Linus Torvalds's avatar
Linus Torvalds committed
696
697
698
699
700
701
702
703
704
705
		start = __pa(start);
		end = __pa(end);
		for (va = start; va < end; va += 32)
			__asm__ __volatile__("stxa %%g0, [%0] %1\n\t"
					     "membar #Sync"
					     : /* no outputs */
					     : "r" (va),
					       "i" (ASI_DCACHE_INVALIDATE));
	}
}
706
EXPORT_SYMBOL(__flush_dcache_range);
Linus Torvalds's avatar
Linus Torvalds committed
707

708
709
/* get_new_mmu_context() uses "cache + 1".  */
DEFINE_SPINLOCK(ctx_alloc_lock);
710
unsigned long tlb_context_cache = CTX_FIRST_VERSION;
711
712
713
#define MAX_CTX_NR	(1UL << CTX_NR_BITS)
#define CTX_BMAP_SLOTS	BITS_TO_LONGS(MAX_CTX_NR)
DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
714
DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0};
715

Linus Torvalds's avatar
Linus Torvalds committed
716
717
718
719
720
721
722
/* Caller does TLB context flushing on local CPU if necessary.
 * The caller also ensures that CTX_VALID(mm->context) is false.
 *
 * We must be careful about boundary cases so that we never
 * let the user have CTX 0 (nucleus) or we ever use a CTX
 * version of zero (and thus NO_CONTEXT would not be caught
 * by version mis-match tests in mmu_context.h).
723
724
 *
 * Always invoked with interrupts disabled.
Linus Torvalds's avatar
Linus Torvalds committed
725
726
727
728
729
 */
void get_new_mmu_context(struct mm_struct *mm)
{
	unsigned long ctx, new_ctx;
	unsigned long orig_pgsz_bits;
730
	int new_version;
Linus Torvalds's avatar
Linus Torvalds committed
731

732
	spin_lock(&ctx_alloc_lock);
Linus Torvalds's avatar
Linus Torvalds committed
733
734
735
	orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
	ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
	new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
736
	new_version = 0;
Linus Torvalds's avatar
Linus Torvalds committed
737
738
739
740
741
	if (new_ctx >= (1 << CTX_NR_BITS)) {
		new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
		if (new_ctx >= ctx) {
			int i;
			new_ctx = (tlb_context_cache & CTX_VERSION_MASK) +
742
				CTX_FIRST_VERSION + 1;
Linus Torvalds's avatar
Linus Torvalds committed
743
			if (new_ctx == 1)
744
				new_ctx = CTX_FIRST_VERSION + 1;
Linus Torvalds's avatar
Linus Torvalds committed
745
746
747
748
749
750
751
752
753
754
755
756
757
758

			/* Don't call memset, for 16 entries that's just
			 * plain silly...
			 */
			mmu_context_bmap[0] = 3;
			mmu_context_bmap[1] = 0;
			mmu_context_bmap[2] = 0;
			mmu_context_bmap[3] = 0;
			for (i = 4; i < CTX_BMAP_SLOTS; i += 4) {
				mmu_context_bmap[i + 0] = 0;
				mmu_context_bmap[i + 1] = 0;
				mmu_context_bmap[i + 2] = 0;
				mmu_context_bmap[i + 3] = 0;
			}
759
			new_version = 1;
Linus Torvalds's avatar
Linus Torvalds committed
760
761
762
			goto out;
		}
	}
763
764
	if (mm->context.sparc64_ctx_val)
		cpumask_clear(mm_cpumask(mm));
Linus Torvalds's avatar
Linus Torvalds committed
765
766
767
768
769
	mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63));
	new_ctx |= (tlb_context_cache & CTX_VERSION_MASK);
out:
	tlb_context_cache = new_ctx;
	mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
770
	spin_unlock(&ctx_alloc_lock);
771
772
773

	if (unlikely(new_version))
		smp_new_mmu_context_version();
Linus Torvalds's avatar
Linus Torvalds committed
774
775
}

David S. Miller's avatar
David S. Miller committed
776
777
778
779
static int numa_enabled = 1;
static int numa_debug;

static int __init early_numa(char *p)
Linus Torvalds's avatar
Linus Torvalds committed
780
{
David S. Miller's avatar
David S. Miller committed
781
782
783
784
785
	if (!p)
		return 0;

	if (strstr(p, "off"))
		numa_enabled = 0;
786

David S. Miller's avatar
David S. Miller committed
787
788
	if (strstr(p, "debug"))
		numa_debug = 1;
789

David S. Miller's avatar
David S. Miller committed
790
	return 0;
791
}
David S. Miller's avatar
David S. Miller committed
792
793
794
795
796
797
early_param("numa", early_numa);

#define numadbg(f, a...) \
do {	if (numa_debug) \
		printk(KERN_INFO f, ## a); \
} while (0)
798

799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
static void __init find_ramdisk(unsigned long phys_base)
{
#ifdef CONFIG_BLK_DEV_INITRD
	if (sparc_ramdisk_image || sparc_ramdisk_image64) {
		unsigned long ramdisk_image;

		/* Older versions of the bootloader only supported a
		 * 32-bit physical address for the ramdisk image
		 * location, stored at sparc_ramdisk_image.  Newer
		 * SILO versions set sparc_ramdisk_image to zero and
		 * provide a full 64-bit physical address at
		 * sparc_ramdisk_image64.
		 */
		ramdisk_image = sparc_ramdisk_image;
		if (!ramdisk_image)
			ramdisk_image = sparc_ramdisk_image64;

		/* Another bootloader quirk.  The bootloader normalizes
		 * the physical address to KERNBASE, so we have to
		 * factor that back out and add in the lowest valid
		 * physical page address to get the true physical address.
		 */
		ramdisk_image -= KERNBASE;
		ramdisk_image += phys_base;

David S. Miller's avatar
David S. Miller committed
824
825
826
		numadbg("Found ramdisk at physical address 0x%lx, size %u\n",
			ramdisk_image, sparc_ramdisk_size);

827
828
		initrd_start = ramdisk_image;
		initrd_end = ramdisk_image + sparc_ramdisk_size;
829

Yinghai Lu's avatar
Yinghai Lu committed
830
		memblock_reserve(initrd_start, sparc_ramdisk_size);
831
832
833

		initrd_start += PAGE_OFFSET;
		initrd_end += PAGE_OFFSET;
834
835
836
837
	}
#endif
}

David S. Miller's avatar
David S. Miller committed
838
839
struct node_mem_mask {
	unsigned long mask;
840
	unsigned long match;
David S. Miller's avatar
David S. Miller committed
841
842
843
844
};
static struct node_mem_mask node_masks[MAX_NUMNODES];
static int num_node_masks;

845
846
#ifdef CONFIG_NEED_MULTIPLE_NODES

847
848
849
850
851
852
853
854
855
856
struct mdesc_mlgroup {
	u64	node;
	u64	latency;
	u64	match;
	u64	mask;
};

static struct mdesc_mlgroup *mlgroups;
static int num_mlgroups;

David S. Miller's avatar
David S. Miller committed
857
858
859
860
861
862
863
864
865
866
867
int numa_cpu_lookup_table[NR_CPUS];
cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];

struct mdesc_mblock {
	u64	base;
	u64	size;
	u64	offset; /* RA-to-PA */
};
static struct mdesc_mblock *mblocks;
static int num_mblocks;

868
static struct mdesc_mblock * __init addr_to_mblock(unsigned long addr)
David S. Miller's avatar
David S. Miller committed
869
{
870
	struct mdesc_mblock *m = NULL;
David S. Miller's avatar
David S. Miller committed
871
872
873
	int i;

	for (i = 0; i < num_mblocks; i++) {
874
		m = &mblocks[i];
David S. Miller's avatar
David S. Miller committed
875
876
877
878
879
880

		if (addr >= m->base &&
		    addr < (m->base + m->size)) {
			break;
		}
	}
881
882

	return m;
David S. Miller's avatar
David S. Miller committed
883
884
}

885
static u64 __init memblock_nid_range_sun4u(u64 start, u64 end, int *nid)
David S. Miller's avatar
David S. Miller committed
886
{
887
	int prev_nid, new_nid;
David S. Miller's avatar
David S. Miller committed
888

889
890
891
892
	prev_nid = -1;
	for ( ; start < end; start += PAGE_SIZE) {
		for (new_nid = 0; new_nid < num_node_masks; new_nid++) {
			struct node_mem_mask *p = &node_masks[new_nid];
David S. Miller's avatar
David S. Miller committed
893

894
895
896
897
898
			if ((start & p->mask) == p->match) {
				if (prev_nid == -1)
					prev_nid = new_nid;
				break;
			}
899
		}
900
901
902
903
904
905
906
907
908
909

		if (new_nid == num_node_masks) {
			prev_nid = 0;
			WARN_ONCE(1, "addr[%Lx] doesn't match a NUMA node rule. Some memory will be owned by node 0.",
				  start);
			break;
		}

		if (prev_nid != new_nid)
			break;
910
	}
911
	*nid = prev_nid;
912

913
	return start > end ? end : start;
David S. Miller's avatar
David S. Miller committed
914
915
}

916
static u64 __init memblock_nid_range(u64 start, u64 end, int *nid)
David S. Miller's avatar
David S. Miller committed
917
{
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
	u64 ret_end, pa_start, m_mask, m_match, m_end;
	struct mdesc_mblock *mblock;
	int _nid, i;

	if (tlb_type != hypervisor)
		return memblock_nid_range_sun4u(start, end, nid);

	mblock = addr_to_mblock(start);
	if (!mblock) {
		WARN_ONCE(1, "memblock_nid_range: Can't find mblock addr[%Lx]",
			  start);

		_nid = 0;
		ret_end = end;
		goto done;
	}

	pa_start = start + mblock->offset;
	m_match = 0;
	m_mask = 0;
David S. Miller's avatar
David S. Miller committed
938

939
940
941
942
943
944
	for (_nid = 0; _nid < num_node_masks; _nid++) {
		struct node_mem_mask *const m = &node_masks[_nid];

		if ((pa_start & m->mask) == m->match) {
			m_match = m->match;
			m_mask = m->mask;
David S. Miller's avatar
David S. Miller committed
945
			break;
946
		}
David S. Miller's avatar
David S. Miller committed
947
948
	}

949
950
951
952
953
954
955
956
957
	if (num_node_masks == _nid) {
		/* We could not find NUMA group, so default to 0, but lets
		 * search for latency group, so we could calculate the correct
		 * end address that we return
		 */
		_nid = 0;

		for (i = 0; i < num_mlgroups; i++) {
			struct mdesc_mlgroup *const m = &mlgroups[i];
958

959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
			if ((pa_start & m->mask) == m->match) {
				m_match = m->match;
				m_mask = m->mask;
				break;
			}
		}

		if (i == num_mlgroups) {
			WARN_ONCE(1, "memblock_nid_range: Can't find latency group addr[%Lx]",
				  start);

			ret_end = end;
			goto done;
		}
	}

	/*
	 * Each latency group has match and mask, and each memory block has an
	 * offset.  An address belongs to a latency group if its address matches
	 * the following formula: ((addr + offset) & mask) == match
	 * It is, however, slow to check every single page if it matches a
	 * particular latency group. As optimization we calculate end value by
	 * using bit arithmetics.
	 */
	m_end = m_match + (1ul << __ffs(m_mask)) - mblock->offset;
	m_end += pa_start & ~((1ul << fls64(m_mask)) - 1);
	ret_end = m_end > end ? end : m_end;

done:
	*nid = _nid;
	return ret_end;
David S. Miller's avatar
David S. Miller committed
990
991
992
993
}
#endif

/* This must be invoked after performing all of the necessary
Tejun Heo's avatar
Tejun Heo committed
994
 * memblock_set_node() calls for 'nid'.  We need to be able to get
David S. Miller's avatar
David S. Miller committed
995
 * correct data from get_pfn_range_for_nid().
996
 */
David S. Miller's avatar
David S. Miller committed
997
998
999
static void __init allocate_node_data(int nid)
{
	struct pglist_data *p;
1000
	unsigned long start_pfn, end_pfn;
For faster browsing, not all history is shown. View entire blame