init_64.c 11.2 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
/*
 *  PowerPC version
 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
 *
 *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
 *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
 *    Copyright (C) 1996 Paul Mackerras
 *
 *  Derived from "arch/i386/mm/init.c"
 *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
 *
 *  Dave Engebretsen <engebret@us.ibm.com>
 *      Rework for PPC64 port.
 */

17
18
#undef DEBUG

19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/stddef.h>
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/highmem.h>
#include <linux/idr.h>
#include <linux/nodemask.h>
#include <linux/module.h>
36
#include <linux/poison.h>
Yinghai Lu's avatar
Yinghai Lu committed
37
#include <linux/memblock.h>
38
#include <linux/hugetlb.h>
39
#include <linux/slab.h>
40
41
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
42
#include <linux/memremap.h>
43
44
45
46
47
48
49
50
51

#include <asm/pgalloc.h>
#include <asm/page.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
52
#include <linux/uaccess.h>
53
54
55
56
57
58
59
60
61
62
#include <asm/smp.h>
#include <asm/machdep.h>
#include <asm/tlb.h>
#include <asm/eeh.h>
#include <asm/processor.h>
#include <asm/mmzone.h>
#include <asm/cputable.h>
#include <asm/sections.h>
#include <asm/iommu.h>
#include <asm/vdso.h>
63

64
#include <mm/mmu_decl.h>
65

66
phys_addr_t memstart_addr = ~0;
67
EXPORT_SYMBOL_GPL(memstart_addr);
68
phys_addr_t kernstart_addr;
69
EXPORT_SYMBOL_GPL(kernstart_addr);
70

71
72
73
74
75
76
77
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
 * Given an address within the vmemmap, determine the pfn of the page that
 * represents the start of the section it is within.  Note that we have to
 * do this by hand as the proffered address may not be correctly aligned.
 * Subtraction of non-aligned pointers produces undefined results.
 */
78
static unsigned long __meminit vmemmap_section_start(unsigned long page)
79
80
81
82
83
84
85
86
87
88
89
90
{
	unsigned long offset = page - ((unsigned long)(vmemmap));

	/* Return the pfn of the start of the section. */
	return (offset / sizeof(struct page)) & PAGE_SECTION_MASK;
}

/*
 * Check if this vmemmap page is already initialised.  If any section
 * which overlaps this vmemmap page is initialised then this page is
 * initialised already.
 */
91
static int __meminit vmemmap_populated(unsigned long start, int page_size)
92
93
{
	unsigned long end = start + page_size;
94
	start = (unsigned long)(pfn_to_page(vmemmap_section_start(start)));
95
96

	for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page)))
97
		if (pfn_valid(page_to_pfn((struct page *)start)))
98
99
100
101
102
			return 1;

	return 0;
}

103
104
105
106
107
108
109
110
111
112
113
114
115
/*
 * vmemmap virtual address space management does not have a traditonal page
 * table to track which virtual struct pages are backed by physical mapping.
 * The virtual to physical mappings are tracked in a simple linked list
 * format. 'vmemmap_list' maintains the entire vmemmap physical mapping at
 * all times where as the 'next' list maintains the available
 * vmemmap_backing structures which have been deleted from the
 * 'vmemmap_global' list during system runtime (memory hotplug remove
 * operation). The freed 'vmemmap_backing' structures are reused later when
 * new requests come in without allocating fresh memory. This pointer also
 * tracks the allocated 'vmemmap_backing' structures as we allocate one
 * full page memory at a time when we dont have any.
 */
116
struct vmemmap_backing *vmemmap_list;
117
static struct vmemmap_backing *next;
118
119
120
121
122
123
124
125

/*
 * The same pointer 'next' tracks individual chunks inside the allocated
 * full page during the boot time and again tracks the freeed nodes during
 * runtime. It is racy but it does not happen as they are separated by the
 * boot process. Will create problem if some how we have memory hotplug
 * operation during boot !!
 */
126
127
static int num_left;
static int num_freed;
128
129
130

static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node)
{
131
132
133
134
135
136
137
138
139
	struct vmemmap_backing *vmem_back;
	/* get from freed entries first */
	if (num_freed) {
		num_freed--;
		vmem_back = next;
		next = next->list;

		return vmem_back;
	}
140
141

	/* allocate a page when required and hand out chunks */
142
	if (!num_left) {
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
		next = vmemmap_alloc_block(PAGE_SIZE, node);
		if (unlikely(!next)) {
			WARN_ON(1);
			return NULL;
		}
		num_left = PAGE_SIZE / sizeof(struct vmemmap_backing);
	}

	num_left--;

	return next++;
}

static __meminit void vmemmap_list_populate(unsigned long phys,
					    unsigned long start,
					    int node)
{
	struct vmemmap_backing *vmem_back;

	vmem_back = vmemmap_list_alloc(node);
	if (unlikely(!vmem_back)) {
		WARN_ON(1);
		return;
	}

	vmem_back->phys = phys;
	vmem_back->virt_addr = start;
	vmem_back->list = vmemmap_list;

	vmemmap_list = vmem_back;
}

175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
static bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
				unsigned long page_size)
{
	unsigned long nr_pfn = page_size / sizeof(struct page);
	unsigned long start_pfn = page_to_pfn((struct page *)start);

	if ((start_pfn + nr_pfn) > altmap->end_pfn)
		return true;

	if (start_pfn < altmap->base_pfn)
		return true;

	return false;
}

190
191
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
		struct vmem_altmap *altmap)
Li Zhong's avatar
Li Zhong committed
192
193
194
195
196
197
198
199
200
{
	unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;

	/* Align to the page size of the linear mapping. */
	start = _ALIGN_DOWN(start, page_size);

	pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);

	for (; start < end; start += page_size) {
201
		void *p = NULL;
202
		int rc;
Li Zhong's avatar
Li Zhong committed
203
204
205
206

		if (vmemmap_populated(start, page_size))
			continue;

207
208
209
210
211
		/*
		 * Allocate from the altmap first if we have one. This may
		 * fail due to alignment issues when using 16MB hugepages, so
		 * fall back to system memory if the altmap allocation fail.
		 */
212
		if (altmap && !altmap_cross_boundary(altmap, start, page_size)) {
213
			p = altmap_alloc_block_buf(page_size, altmap);
214
215
216
			if (!p)
				pr_debug("altmap block allocation failed, falling back to system memory");
		}
217
		if (!p)
218
			p = vmemmap_alloc_block_buf(page_size, node);
Li Zhong's avatar
Li Zhong committed
219
220
221
222
223
224
225
226
		if (!p)
			return -ENOMEM;

		vmemmap_list_populate(__pa(p), start, node);

		pr_debug("      * %016lx..%016lx allocated at %p\n",
			 start, start + page_size, p);

227
228
		rc = vmemmap_create_mapping(start, page_size, __pa(p));
		if (rc < 0) {
229
230
			pr_warn("%s: Unable to create vmemmap mapping: %d\n",
				__func__, rc);
231
232
			return -EFAULT;
		}
Li Zhong's avatar
Li Zhong committed
233
234
235
236
237
238
	}

	return 0;
}

#ifdef CONFIG_MEMORY_HOTPLUG
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
static unsigned long vmemmap_list_free(unsigned long start)
{
	struct vmemmap_backing *vmem_back, *vmem_back_prev;

	vmem_back_prev = vmem_back = vmemmap_list;

	/* look for it with prev pointer recorded */
	for (; vmem_back; vmem_back = vmem_back->list) {
		if (vmem_back->virt_addr == start)
			break;
		vmem_back_prev = vmem_back;
	}

	if (unlikely(!vmem_back)) {
		WARN_ON(1);
		return 0;
	}

	/* remove it from vmemmap_list */
	if (vmem_back == vmemmap_list) /* remove head */
		vmemmap_list = vmem_back->list;
	else
		vmem_back_prev->list = vmem_back->list;

	/* next point to this freed entry */
	vmem_back->list = next;
	next = vmem_back;
	num_freed++;

	return vmem_back->phys;
}

271
272
void __ref vmemmap_free(unsigned long start, unsigned long end,
		struct vmem_altmap *altmap)
273
{
274
	unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
275
	unsigned long page_order = get_order(page_size);
276
277
	unsigned long alt_start = ~0, alt_end = ~0;
	unsigned long base_pfn;
278
279

	start = _ALIGN_DOWN(start, page_size);
280
281
282
283
284
	if (altmap) {
		alt_start = altmap->base_pfn;
		alt_end = altmap->base_pfn + altmap->reserve +
			  altmap->free + altmap->alloc + altmap->align;
	}
285

Li Zhong's avatar
Li Zhong committed
286
	pr_debug("vmemmap_free %lx...%lx\n", start, end);
287

288
	for (; start < end; start += page_size) {
289
290
		unsigned long nr_pages, addr;
		struct page *page;
291

Li Zhong's avatar
Li Zhong committed
292
293
294
295
296
		/*
		 * the section has already be marked as invalid, so
		 * vmemmap_populated() true means some other sections still
		 * in this page, so skip it.
		 */
297
298
299
		if (vmemmap_populated(start, page_size))
			continue;

Li Zhong's avatar
Li Zhong committed
300
		addr = vmemmap_list_free(start);
301
302
303
304
305
		if (!addr)
			continue;

		page = pfn_to_page(addr >> PAGE_SHIFT);
		nr_pages = 1 << page_order;
306
		base_pfn = PHYS_PFN(addr);
307

308
		if (base_pfn >= alt_start && base_pfn < alt_end) {
309
310
			vmem_altmap_free(altmap, nr_pages);
		} else if (PageReserved(page)) {
311
312
313
314
315
316
317
318
319
320
321
322
323
			/* allocated from bootmem */
			if (page_size < PAGE_SIZE) {
				/*
				 * this shouldn't happen, but if it is
				 * the case, leave the memory there
				 */
				WARN_ON_ONCE(1);
			} else {
				while (nr_pages--)
					free_reserved_page(page++);
			}
		} else {
			free_pages((unsigned long)(__va(addr)), page_order);
Li Zhong's avatar
Li Zhong committed
324
		}
325
326

		vmemmap_remove_mapping(start, page_size);
327
	}
328
}
Li Zhong's avatar
Li Zhong committed
329
#endif
330
331
332
333
void register_page_bootmem_memmap(unsigned long section_nr,
				  struct page *start_page, unsigned long size)
{
}
334

335
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
336

337
#ifdef CONFIG_PPC_BOOK3S_64
338
339
static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);

340
341
static int __init parse_disable_radix(char *p)
{
342
343
	bool val;

344
	if (!p)
345
346
347
348
349
350
		val = true;
	else if (kstrtobool(p, &val))
		return -EINVAL;

	disable_radix = val;

351
352
353
354
	return 0;
}
early_param("disable_radix", parse_disable_radix);

355
/*
356
357
358
 * If we're running under a hypervisor, we need to check the contents of
 * /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do
 * radix.  If not, we clear the radix feature bit so we fall back to hash.
359
 */
360
static void __init early_check_vec5(void)
361
362
363
364
{
	unsigned long root, chosen;
	int size;
	const u8 *vec5;
365
	u8 mmu_supported;
366
367
368

	root = of_get_flat_dt_root();
	chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
369
370
	if (chosen == -FDT_ERR_NOTFOUND) {
		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
371
		return;
372
	}
373
	vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
374
375
	if (!vec5) {
		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
376
		return;
377
378
	}
	if (size <= OV5_INDX(OV5_MMU_SUPPORT)) {
379
		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
		return;
	}

	/* Check for supported configuration */
	mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] &
			OV5_FEAT(OV5_MMU_SUPPORT);
	if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) {
		/* Hypervisor only supports radix - check enabled && GTSE */
		if (!early_radix_enabled()) {
			pr_warn("WARNING: Ignoring cmdline option disable_radix\n");
		}
		if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
						OV5_FEAT(OV5_RADIX_GTSE))) {
			pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n");
		}
		/* Do radix anyway - the hypervisor said we had to */
		cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
	} else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) {
		/* Hypervisor only supports hash - disable radix */
		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
	}
401
402
}

403
404
void __init mmu_early_init_devtree(void)
{
405
	/* Disable radix mode based on kernel command line. */
406
	if (disable_radix)
407
		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
408

409
410
411
412
413
414
	/*
	 * Check /chosen/ibm,architecture-vec-5 if running as a guest.
	 * When running bare-metal, we can use radix if we like
	 * even though the ibm,architecture-vec-5 property created by
	 * skiboot doesn't have the necessary bits set.
	 */
415
	if (!(mfmsr() & MSR_HV))
416
417
		early_check_vec5();

418
	if (early_radix_enabled())
419
420
		radix__early_init_devtree();
	else
421
		hash__early_init_devtree();
422
}
423
#endif /* CONFIG_PPC_BOOK3S_64 */