memblock.c 50.7 KB
Newer Older
Yinghai Lu's avatar
Yinghai Lu committed
1
2
3
4
5
6
7
8
9
10
11
12
13
/*
 * Procedures for maintaining information about logical memory blocks.
 *
 * Peter Bergner, IBM Corp.	June 2001.
 * Copyright (C) 2001 Peter Bergner.
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
14
#include <linux/slab.h>
Yinghai Lu's avatar
Yinghai Lu committed
15
16
#include <linux/init.h>
#include <linux/bitops.h>
17
#include <linux/poison.h>
18
#include <linux/pfn.h>
19
#include <linux/debugfs.h>
20
#include <linux/kmemleak.h>
21
#include <linux/seq_file.h>
Yinghai Lu's avatar
Yinghai Lu committed
22
#include <linux/memblock.h>
23
#include <linux/bootmem.h>
Yinghai Lu's avatar
Yinghai Lu committed
24

25
#include <asm/sections.h>
26
27
28
#include <linux/io.h>

#include "internal.h"
29

Tejun Heo's avatar
Tejun Heo committed
30
31
static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
32
33
34
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock;
#endif
Tejun Heo's avatar
Tejun Heo committed
35
36
37
38
39

struct memblock memblock __initdata_memblock = {
	.memory.regions		= memblock_memory_init_regions,
	.memory.cnt		= 1,	/* empty dummy entry */
	.memory.max		= INIT_MEMBLOCK_REGIONS,
40
	.memory.name		= "memory",
Tejun Heo's avatar
Tejun Heo committed
41
42
43
44

	.reserved.regions	= memblock_reserved_init_regions,
	.reserved.cnt		= 1,	/* empty dummy entry */
	.reserved.max		= INIT_MEMBLOCK_REGIONS,
45
	.reserved.name		= "reserved",
Tejun Heo's avatar
Tejun Heo committed
46

47
48
49
50
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
	.physmem.regions	= memblock_physmem_init_regions,
	.physmem.cnt		= 1,	/* empty dummy entry */
	.physmem.max		= INIT_PHYSMEM_REGIONS,
51
	.physmem.name		= "physmem",
52
53
#endif

54
	.bottom_up		= false,
Tejun Heo's avatar
Tejun Heo committed
55
56
	.current_limit		= MEMBLOCK_ALLOC_ANYWHERE,
};
Yinghai Lu's avatar
Yinghai Lu committed
57

58
int memblock_debug __initdata_memblock;
59
static bool system_has_some_mirror __initdata_memblock = false;
60
static int memblock_can_resize __initdata_memblock;
61
62
static int memblock_memory_in_slab __initdata_memblock = 0;
static int memblock_reserved_in_slab __initdata_memblock = 0;
Yinghai Lu's avatar
Yinghai Lu committed
63

64
65
66
67
68
ulong __init_memblock choose_memblock_flags(void)
{
	return system_has_some_mirror ? MEMBLOCK_MIRROR : MEMBLOCK_NONE;
}

69
70
71
/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */
static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
{
72
	return *size = min(*size, PHYS_ADDR_MAX - base);
73
74
}

75
76
77
/*
 * Address comparison utilities
 */
78
static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
79
				       phys_addr_t base2, phys_addr_t size2)
Yinghai Lu's avatar
Yinghai Lu committed
80
81
82
83
{
	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
}

84
bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
85
					phys_addr_t base, phys_addr_t size)
86
87
88
{
	unsigned long i;

89
90
91
	for (i = 0; i < type->cnt; i++)
		if (memblock_addrs_overlap(base, size, type->regions[i].base,
					   type->regions[i].size))
92
			break;
93
	return i < type->cnt;
94
95
}

96
97
98
99
100
101
/*
 * __memblock_find_range_bottom_up - find free area utility in bottom-up
 * @start: start of candidate range
 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
 * @size: size of free area to find
 * @align: alignment of free area to find
102
 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
103
 * @flags: pick from blocks based on memory attributes
104
105
106
107
108
109
110
111
 *
 * Utility called from memblock_find_in_range_node(), find free area bottom-up.
 *
 * RETURNS:
 * Found address on success, 0 on failure.
 */
static phys_addr_t __init_memblock
__memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
112
113
				phys_addr_t size, phys_addr_t align, int nid,
				ulong flags)
114
115
116
117
{
	phys_addr_t this_start, this_end, cand;
	u64 i;

118
	for_each_free_mem_range(i, nid, flags, &this_start, &this_end, NULL) {
119
120
121
122
123
124
125
126
127
128
129
		this_start = clamp(this_start, start, end);
		this_end = clamp(this_end, start, end);

		cand = round_up(this_start, align);
		if (cand < this_end && this_end - cand >= size)
			return cand;
	}

	return 0;
}

130
/**
131
 * __memblock_find_range_top_down - find free area utility, in top-down
132
133
134
135
 * @start: start of candidate range
 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
 * @size: size of free area to find
 * @align: alignment of free area to find
136
 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
137
 * @flags: pick from blocks based on memory attributes
138
 *
139
 * Utility called from memblock_find_in_range_node(), find free area top-down.
140
141
 *
 * RETURNS:
142
 * Found address on success, 0 on failure.
143
 */
144
145
static phys_addr_t __init_memblock
__memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
146
147
			       phys_addr_t size, phys_addr_t align, int nid,
			       ulong flags)
148
149
150
151
{
	phys_addr_t this_start, this_end, cand;
	u64 i;

152
153
	for_each_free_mem_range_reverse(i, nid, flags, &this_start, &this_end,
					NULL) {
154
155
156
157
158
159
160
161
162
163
		this_start = clamp(this_start, start, end);
		this_end = clamp(this_end, start, end);

		if (this_end < size)
			continue;

		cand = round_down(this_end - size, align);
		if (cand >= this_start)
			return cand;
	}
164

165
166
	return 0;
}
167

168
169
170
171
/**
 * memblock_find_in_range_node - find free area in given range and node
 * @size: size of free area to find
 * @align: alignment of free area to find
172
173
 * @start: start of candidate range
 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
174
 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
175
 * @flags: pick from blocks based on memory attributes
176
177
178
 *
 * Find @size free area aligned to @align in the specified range and node.
 *
179
180
181
182
183
184
185
186
 * When allocation direction is bottom-up, the @start should be greater
 * than the end of the kernel image. Otherwise, it will be trimmed. The
 * reason is that we want the bottom-up allocation just near the kernel
 * image so it is highly likely that the allocated memory and the kernel
 * will reside in the same node.
 *
 * If bottom-up allocation failed, will try to allocate memory top-down.
 *
187
 * RETURNS:
188
 * Found address on success, 0 on failure.
189
 */
190
191
phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
					phys_addr_t align, phys_addr_t start,
192
					phys_addr_t end, int nid, ulong flags)
193
{
194
	phys_addr_t kernel_end, ret;
195

196
197
198
199
200
201
202
	/* pump up @end */
	if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
		end = memblock.current_limit;

	/* avoid allocating the first page */
	start = max_t(phys_addr_t, start, PAGE_SIZE);
	end = max(start, end);
203
204
205
206
207
208
209
210
211
212
213
214
215
216
	kernel_end = __pa_symbol(_end);

	/*
	 * try bottom-up allocation only when bottom-up mode
	 * is set and @end is above the kernel image.
	 */
	if (memblock_bottom_up() && end > kernel_end) {
		phys_addr_t bottom_up_start;

		/* make sure we will allocate above the kernel */
		bottom_up_start = max(start, kernel_end);

		/* ok, try bottom-up allocation first */
		ret = __memblock_find_range_bottom_up(bottom_up_start, end,
217
						      size, align, nid, flags);
218
219
220
221
222
223
224
225
226
227
228
229
230
		if (ret)
			return ret;

		/*
		 * we always limit bottom-up allocation above the kernel,
		 * but top-down allocation doesn't have the limit, so
		 * retrying top-down allocation may succeed when bottom-up
		 * allocation failed.
		 *
		 * bottom-up allocation is expected to be fail very rarely,
		 * so we use WARN_ONCE() here to see the stack trace if
		 * fail happens.
		 */
Joe Perches's avatar
Joe Perches committed
231
		WARN_ONCE(1, "memblock: bottom-up allocation failed, memory hotunplug may be affected\n");
232
	}
233

234
235
	return __memblock_find_range_top_down(start, end, size, align, nid,
					      flags);
236
237
}

238
239
240
241
242
243
244
245
246
247
/**
 * memblock_find_in_range - find free area in given range
 * @start: start of candidate range
 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
 * @size: size of free area to find
 * @align: alignment of free area to find
 *
 * Find @size free area aligned to @align in the specified range.
 *
 * RETURNS:
248
 * Found address on success, 0 on failure.
249
 */
250
251
252
phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
					phys_addr_t end, phys_addr_t size,
					phys_addr_t align)
253
{
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
	phys_addr_t ret;
	ulong flags = choose_memblock_flags();

again:
	ret = memblock_find_in_range_node(size, align, start, end,
					    NUMA_NO_NODE, flags);

	if (!ret && (flags & MEMBLOCK_MIRROR)) {
		pr_warn("Could not allocate %pap bytes of mirrored memory\n",
			&size);
		flags &= ~MEMBLOCK_MIRROR;
		goto again;
	}

	return ret;
269
270
}

271
static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
Yinghai Lu's avatar
Yinghai Lu committed
272
{
273
	type->total_size -= type->regions[r].size;
Tejun Heo's avatar
Tejun Heo committed
274
275
	memmove(&type->regions[r], &type->regions[r + 1],
		(type->cnt - (r + 1)) * sizeof(type->regions[r]));
276
	type->cnt--;
Yinghai Lu's avatar
Yinghai Lu committed
277

278
279
	/* Special case for empty arrays */
	if (type->cnt == 0) {
280
		WARN_ON(type->total_size != 0);
281
282
283
		type->cnt = 1;
		type->regions[0].base = 0;
		type->regions[0].size = 0;
284
		type->regions[0].flags = 0;
Tejun Heo's avatar
Tejun Heo committed
285
		memblock_set_region_node(&type->regions[0], MAX_NUMNODES);
286
	}
Yinghai Lu's avatar
Yinghai Lu committed
287
288
}

289
#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK
290
291
292
293
/**
 * Discard memory and reserved arrays if they were allocated
 */
void __init memblock_discard(void)
294
{
295
	phys_addr_t addr, size;
296

297
298
299
300
301
302
	if (memblock.reserved.regions != memblock_reserved_init_regions) {
		addr = __pa(memblock.reserved.regions);
		size = PAGE_ALIGN(sizeof(struct memblock_region) *
				  memblock.reserved.max);
		__memblock_free_late(addr, size);
	}
303

304
	if (memblock.memory.regions != memblock_memory_init_regions) {
305
306
307
308
309
		addr = __pa(memblock.memory.regions);
		size = PAGE_ALIGN(sizeof(struct memblock_region) *
				  memblock.memory.max);
		__memblock_free_late(addr, size);
	}
310
311
312
}
#endif

313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
/**
 * memblock_double_array - double the size of the memblock regions array
 * @type: memblock type of the regions array being doubled
 * @new_area_start: starting address of memory range to avoid overlap with
 * @new_area_size: size of memory range to avoid overlap with
 *
 * Double the size of the @type regions array. If memblock is being used to
 * allocate memory for a new reserved regions array and there is a previously
 * allocated memory range [@new_area_start,@new_area_start+@new_area_size]
 * waiting to be reserved, ensure the memory used by the new array does
 * not overlap.
 *
 * RETURNS:
 * 0 on success, -1 on failure.
 */
static int __init_memblock memblock_double_array(struct memblock_type *type,
						phys_addr_t new_area_start,
						phys_addr_t new_area_size)
331
332
{
	struct memblock_region *new_array, *old_array;
333
	phys_addr_t old_alloc_size, new_alloc_size;
334
335
	phys_addr_t old_size, new_size, addr;
	int use_slab = slab_is_available();
336
	int *in_slab;
337
338
339
340
341
342
343
344
345
346

	/* We don't allow resizing until we know about the reserved regions
	 * of memory that aren't suitable for allocation
	 */
	if (!memblock_can_resize)
		return -1;

	/* Calculate new doubled size */
	old_size = type->max * sizeof(struct memblock_region);
	new_size = old_size << 1;
347
348
349
350
351
352
	/*
	 * We need to allocated new one align to PAGE_SIZE,
	 *   so we can free them completely later.
	 */
	old_alloc_size = PAGE_ALIGN(old_size);
	new_alloc_size = PAGE_ALIGN(new_size);
353

354
355
356
357
358
359
	/* Retrieve the slab flag */
	if (type == &memblock.memory)
		in_slab = &memblock_memory_in_slab;
	else
		in_slab = &memblock_reserved_in_slab;

360
361
362
	/* Try to find some space for it.
	 *
	 * WARNING: We assume that either slab_is_available() and we use it or
363
364
365
	 * we use MEMBLOCK for allocations. That means that this is unsafe to
	 * use when bootmem is currently active (unless bootmem itself is
	 * implemented on top of MEMBLOCK which isn't the case yet)
366
367
	 *
	 * This should however not be an issue for now, as we currently only
368
369
	 * call into MEMBLOCK while it's still active, or much later when slab
	 * is active for memory hotplug operations
370
371
372
	 */
	if (use_slab) {
		new_array = kmalloc(new_size, GFP_KERNEL);
Tejun Heo's avatar
Tejun Heo committed
373
		addr = new_array ? __pa(new_array) : 0;
374
	} else {
375
376
377
378
379
380
		/* only exclude range when trying to double reserved.regions */
		if (type != &memblock.reserved)
			new_area_start = new_area_size = 0;

		addr = memblock_find_in_range(new_area_start + new_area_size,
						memblock.current_limit,
381
						new_alloc_size, PAGE_SIZE);
382
383
		if (!addr && new_area_size)
			addr = memblock_find_in_range(0,
384
385
				min(new_area_start, memblock.current_limit),
				new_alloc_size, PAGE_SIZE);
386

387
		new_array = addr ? __va(addr) : NULL;
388
	}
Tejun Heo's avatar
Tejun Heo committed
389
	if (!addr) {
390
		pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
391
		       type->name, type->max, type->max * 2);
392
393
394
		return -1;
	}

395
	memblock_dbg("memblock: %s is doubled to %ld at [%#010llx-%#010llx]",
396
			type->name, type->max * 2, (u64)addr,
397
			(u64)addr + new_size - 1);
398

399
400
401
402
	/*
	 * Found space, we now need to move the array over before we add the
	 * reserved region since it may be our reserved array itself that is
	 * full.
403
404
405
406
407
408
409
	 */
	memcpy(new_array, type->regions, old_size);
	memset(new_array + type->max, 0, old_size);
	old_array = type->regions;
	type->regions = new_array;
	type->max <<= 1;

410
	/* Free old array. We needn't free it if the array is the static one */
411
412
413
414
	if (*in_slab)
		kfree(old_array);
	else if (old_array != memblock_memory_init_regions &&
		 old_array != memblock_reserved_init_regions)
415
		memblock_free(__pa(old_array), old_alloc_size);
416

417
418
419
	/*
	 * Reserve the new array if that comes from the memblock.  Otherwise, we
	 * needn't do it
420
421
	 */
	if (!use_slab)
422
		BUG_ON(memblock_reserve(addr, new_alloc_size));
423
424
425
426

	/* Update slab flag */
	*in_slab = use_slab;

427
428
429
	return 0;
}

430
431
432
433
434
435
436
/**
 * memblock_merge_regions - merge neighboring compatible regions
 * @type: memblock type to scan
 *
 * Scan @type and merge neighboring compatible regions.
 */
static void __init_memblock memblock_merge_regions(struct memblock_type *type)
Yinghai Lu's avatar
Yinghai Lu committed
437
{
438
	int i = 0;
Yinghai Lu's avatar
Yinghai Lu committed
439

440
441
442
443
	/* cnt never goes below 1 */
	while (i < type->cnt - 1) {
		struct memblock_region *this = &type->regions[i];
		struct memblock_region *next = &type->regions[i + 1];
Yinghai Lu's avatar
Yinghai Lu committed
444

Tejun Heo's avatar
Tejun Heo committed
445
446
		if (this->base + this->size != next->base ||
		    memblock_get_region_node(this) !=
447
448
		    memblock_get_region_node(next) ||
		    this->flags != next->flags) {
449
450
451
			BUG_ON(this->base + this->size > next->base);
			i++;
			continue;
452
453
		}

454
		this->size += next->size;
455
456
		/* move forward from next + 1, index of which is i + 2 */
		memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next));
457
		type->cnt--;
Yinghai Lu's avatar
Yinghai Lu committed
458
	}
459
}
Yinghai Lu's avatar
Yinghai Lu committed
460

461
462
/**
 * memblock_insert_region - insert new memblock region
463
464
465
466
467
 * @type:	memblock type to insert into
 * @idx:	index for the insertion point
 * @base:	base address of the new region
 * @size:	size of the new region
 * @nid:	node id of the new region
468
 * @flags:	flags of the new region
469
470
 *
 * Insert new memblock region [@base,@base+@size) into @type at @idx.
471
 * @type must already have extra room to accommodate the new region.
472
473
474
 */
static void __init_memblock memblock_insert_region(struct memblock_type *type,
						   int idx, phys_addr_t base,
475
476
						   phys_addr_t size,
						   int nid, unsigned long flags)
477
478
479
480
481
482
483
{
	struct memblock_region *rgn = &type->regions[idx];

	BUG_ON(type->cnt >= type->max);
	memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn));
	rgn->base = base;
	rgn->size = size;
484
	rgn->flags = flags;
Tejun Heo's avatar
Tejun Heo committed
485
	memblock_set_region_node(rgn, nid);
486
	type->cnt++;
487
	type->total_size += size;
488
489
490
}

/**
491
 * memblock_add_range - add new memblock region
492
493
494
 * @type: memblock type to add new region into
 * @base: base address of the new region
 * @size: size of the new region
495
 * @nid: nid of the new region
496
 * @flags: flags of the new region
497
498
499
500
501
502
503
504
505
 *
 * Add new memblock region [@base,@base+@size) into @type.  The new region
 * is allowed to overlap with existing ones - overlaps don't affect already
 * existing regions.  @type is guaranteed to be minimal (all neighbouring
 * compatible regions are merged) after the addition.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
506
int __init_memblock memblock_add_range(struct memblock_type *type,
507
508
				phys_addr_t base, phys_addr_t size,
				int nid, unsigned long flags)
509
510
{
	bool insert = false;
511
512
	phys_addr_t obase = base;
	phys_addr_t end = base + memblock_cap_size(base, &size);
513
514
	int idx, nr_new;
	struct memblock_region *rgn;
515

516
517
518
	if (!size)
		return 0;

519
520
	/* special case for empty array */
	if (type->regions[0].size == 0) {
521
		WARN_ON(type->cnt != 1 || type->total_size);
522
523
		type->regions[0].base = base;
		type->regions[0].size = size;
524
		type->regions[0].flags = flags;
525
		memblock_set_region_node(&type->regions[0], nid);
526
		type->total_size = size;
527
		return 0;
Yinghai Lu's avatar
Yinghai Lu committed
528
	}
529
530
531
532
repeat:
	/*
	 * The following is executed twice.  Once with %false @insert and
	 * then with %true.  The first counts the number of regions needed
533
	 * to accommodate the new area.  The second actually inserts them.
534
	 */
535
536
	base = obase;
	nr_new = 0;
Yinghai Lu's avatar
Yinghai Lu committed
537

538
	for_each_memblock_type(idx, type, rgn) {
539
540
541
542
		phys_addr_t rbase = rgn->base;
		phys_addr_t rend = rbase + rgn->size;

		if (rbase >= end)
Yinghai Lu's avatar
Yinghai Lu committed
543
			break;
544
545
546
547
548
549
550
		if (rend <= base)
			continue;
		/*
		 * @rgn overlaps.  If it separates the lower part of new
		 * area, insert that portion.
		 */
		if (rbase > base) {
551
552
553
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
			WARN_ON(nid != memblock_get_region_node(rgn));
#endif
554
			WARN_ON(flags != rgn->flags);
555
556
			nr_new++;
			if (insert)
557
				memblock_insert_region(type, idx++, base,
558
559
						       rbase - base, nid,
						       flags);
Yinghai Lu's avatar
Yinghai Lu committed
560
		}
561
562
		/* area below @rend is dealt with, forget about it */
		base = min(rend, end);
Yinghai Lu's avatar
Yinghai Lu committed
563
	}
564
565
566
567
568

	/* insert the remaining portion */
	if (base < end) {
		nr_new++;
		if (insert)
569
			memblock_insert_region(type, idx, base, end - base,
570
					       nid, flags);
Yinghai Lu's avatar
Yinghai Lu committed
571
572
	}

573
574
575
	if (!nr_new)
		return 0;

576
577
578
	/*
	 * If this was the first round, resize array and repeat for actual
	 * insertions; otherwise, merge and return.
579
	 */
580
581
	if (!insert) {
		while (type->cnt + nr_new > type->max)
582
			if (memblock_double_array(type, obase, size) < 0)
583
584
585
586
587
588
				return -ENOMEM;
		insert = true;
		goto repeat;
	} else {
		memblock_merge_regions(type);
		return 0;
589
	}
Yinghai Lu's avatar
Yinghai Lu committed
590
591
}

592
593
594
int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
				       int nid)
{
595
	return memblock_add_range(&memblock.memory, base, size, nid, 0);
596
597
}

598
int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
599
{
600
601
602
603
	phys_addr_t end = base + size - 1;

	memblock_dbg("memblock_add: [%pa-%pa] %pF\n",
		     &base, &end, (void *)_RET_IP_);
604

605
	return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0);
Yinghai Lu's avatar
Yinghai Lu committed
606
607
}

608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
/**
 * memblock_isolate_range - isolate given range into disjoint memblocks
 * @type: memblock type to isolate range for
 * @base: base of range to isolate
 * @size: size of range to isolate
 * @start_rgn: out parameter for the start of isolated region
 * @end_rgn: out parameter for the end of isolated region
 *
 * Walk @type and ensure that regions don't cross the boundaries defined by
 * [@base,@base+@size).  Crossing regions are split at the boundaries,
 * which may create at most two more regions.  The index of the first
 * region inside the range is returned in *@start_rgn and end in *@end_rgn.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
static int __init_memblock memblock_isolate_range(struct memblock_type *type,
					phys_addr_t base, phys_addr_t size,
					int *start_rgn, int *end_rgn)
{
628
	phys_addr_t end = base + memblock_cap_size(base, &size);
629
630
	int idx;
	struct memblock_region *rgn;
631
632
633

	*start_rgn = *end_rgn = 0;

634
635
636
	if (!size)
		return 0;

637
638
	/* we'll create at most two more regions */
	while (type->cnt + 2 > type->max)
639
		if (memblock_double_array(type, base, size) < 0)
640
641
			return -ENOMEM;

642
	for_each_memblock_type(idx, type, rgn) {
643
644
645
646
647
648
649
650
651
652
653
654
655
656
		phys_addr_t rbase = rgn->base;
		phys_addr_t rend = rbase + rgn->size;

		if (rbase >= end)
			break;
		if (rend <= base)
			continue;

		if (rbase < base) {
			/*
			 * @rgn intersects from below.  Split and continue
			 * to process the next region - the new top half.
			 */
			rgn->base = base;
657
658
			rgn->size -= base - rbase;
			type->total_size -= base - rbase;
659
			memblock_insert_region(type, idx, rbase, base - rbase,
660
661
					       memblock_get_region_node(rgn),
					       rgn->flags);
662
663
664
665
666
667
		} else if (rend > end) {
			/*
			 * @rgn intersects from above.  Split and redo the
			 * current region - the new bottom half.
			 */
			rgn->base = end;
668
669
			rgn->size -= end - rbase;
			type->total_size -= end - rbase;
670
			memblock_insert_region(type, idx--, rbase, end - rbase,
671
672
					       memblock_get_region_node(rgn),
					       rgn->flags);
673
674
675
		} else {
			/* @rgn is fully contained, record it */
			if (!*end_rgn)
676
677
				*start_rgn = idx;
			*end_rgn = idx + 1;
678
679
680
681
682
683
		}
	}

	return 0;
}

684
static int __init_memblock memblock_remove_range(struct memblock_type *type,
685
					  phys_addr_t base, phys_addr_t size)
Yinghai Lu's avatar
Yinghai Lu committed
686
{
687
688
	int start_rgn, end_rgn;
	int i, ret;
Yinghai Lu's avatar
Yinghai Lu committed
689

690
691
692
	ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
	if (ret)
		return ret;
Yinghai Lu's avatar
Yinghai Lu committed
693

694
695
	for (i = end_rgn - 1; i >= start_rgn; i--)
		memblock_remove_region(type, i);
696
	return 0;
Yinghai Lu's avatar
Yinghai Lu committed
697
698
}

699
int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
Yinghai Lu's avatar
Yinghai Lu committed
700
{
701
702
703
704
705
	phys_addr_t end = base + size - 1;

	memblock_dbg("memblock_remove: [%pa-%pa] %pS\n",
		     &base, &end, (void *)_RET_IP_);

706
	return memblock_remove_range(&memblock.memory, base, size);
Yinghai Lu's avatar
Yinghai Lu committed
707
708
}

709

710
int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
Yinghai Lu's avatar
Yinghai Lu committed
711
{
712
713
714
715
	phys_addr_t end = base + size - 1;

	memblock_dbg("   memblock_free: [%pa-%pa] %pF\n",
		     &base, &end, (void *)_RET_IP_);
716

717
	kmemleak_free_part_phys(base, size);
718
	return memblock_remove_range(&memblock.reserved, base, size);
Yinghai Lu's avatar
Yinghai Lu committed
719
720
}

721
int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
Yinghai Lu's avatar
Yinghai Lu committed
722
{
723
724
725
726
	phys_addr_t end = base + size - 1;

	memblock_dbg("memblock_reserve: [%pa-%pa] %pF\n",
		     &base, &end, (void *)_RET_IP_);
Yinghai Lu's avatar
Yinghai Lu committed
727

728
	return memblock_add_range(&memblock.reserved, base, size, MAX_NUMNODES, 0);
Yinghai Lu's avatar
Yinghai Lu committed
729
730
}

731
732
/**
 *
733
 * This function isolates region [@base, @base + @size), and sets/clears flag
734
 *
735
 * Return 0 on success, -errno on failure.
736
 */
737
738
static int __init_memblock memblock_setclr_flag(phys_addr_t base,
				phys_addr_t size, int set, int flag)
739
740
741
742
743
744
745
746
747
{
	struct memblock_type *type = &memblock.memory;
	int i, ret, start_rgn, end_rgn;

	ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
	if (ret)
		return ret;

	for (i = start_rgn; i < end_rgn; i++)
748
749
750
751
		if (set)
			memblock_set_region_flags(&type->regions[i], flag);
		else
			memblock_clear_region_flags(&type->regions[i], flag);
752
753
754
755
756
757

	memblock_merge_regions(type);
	return 0;
}

/**
758
 * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG.
759
760
761
 * @base: the base phys addr of the region
 * @size: the size of the region
 *
762
 * Return 0 on success, -errno on failure.
763
764
765
766
767
768
769
770
771
772
 */
int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
{
	return memblock_setclr_flag(base, size, 1, MEMBLOCK_HOTPLUG);
}

/**
 * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region.
 * @base: the base phys addr of the region
 * @size: the size of the region
773
 *
774
 * Return 0 on success, -errno on failure.
775
776
777
 */
int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
{
778
	return memblock_setclr_flag(base, size, 0, MEMBLOCK_HOTPLUG);
779
780
}

781
782
783
784
785
/**
 * memblock_mark_mirror - Mark mirrored memory with flag MEMBLOCK_MIRROR.
 * @base: the base phys addr of the region
 * @size: the size of the region
 *
786
 * Return 0 on success, -errno on failure.
787
788
789
790
791
792
793
794
 */
int __init_memblock memblock_mark_mirror(phys_addr_t base, phys_addr_t size)
{
	system_has_some_mirror = true;

	return memblock_setclr_flag(base, size, 1, MEMBLOCK_MIRROR);
}

795
796
797
798
799
800
801
802
803
804
805
/**
 * memblock_mark_nomap - Mark a memory region with flag MEMBLOCK_NOMAP.
 * @base: the base phys addr of the region
 * @size: the size of the region
 *
 * Return 0 on success, -errno on failure.
 */
int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
{
	return memblock_setclr_flag(base, size, 1, MEMBLOCK_NOMAP);
}
806

807
808
809
810
811
812
813
814
815
816
817
818
/**
 * memblock_clear_nomap - Clear flag MEMBLOCK_NOMAP for a specified region.
 * @base: the base phys addr of the region
 * @size: the size of the region
 *
 * Return 0 on success, -errno on failure.
 */
int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size)
{
	return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP);
}

819
820
821
822
823
824
825
826
827
828
829
830
/**
 * __next_reserved_mem_region - next function for for_each_reserved_region()
 * @idx: pointer to u64 loop variable
 * @out_start: ptr to phys_addr_t for start address of the region, can be %NULL
 * @out_end: ptr to phys_addr_t for end address of the region, can be %NULL
 *
 * Iterate over all reserved memory regions.
 */
void __init_memblock __next_reserved_mem_region(u64 *idx,
					   phys_addr_t *out_start,
					   phys_addr_t *out_end)
{
831
	struct memblock_type *type = &memblock.reserved;
832

833
	if (*idx < type->cnt) {
834
		struct memblock_region *r = &type->regions[*idx];
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
		phys_addr_t base = r->base;
		phys_addr_t size = r->size;

		if (out_start)
			*out_start = base;
		if (out_end)
			*out_end = base + size - 1;

		*idx += 1;
		return;
	}

	/* signal end of iteration */
	*idx = ULLONG_MAX;
}

851
/**
852
 * __next__mem_range - next function for for_each_free_mem_range() etc.
853
 * @idx: pointer to u64 loop variable
854
 * @nid: node selector, %NUMA_NO_NODE for all nodes
855
 * @flags: pick from blocks based on memory attributes
856
857
 * @type_a: pointer to memblock_type from where the range is taken
 * @type_b: pointer to memblock_type which excludes memory from being taken
Wanpeng Li's avatar
Wanpeng Li committed
858
859
860
 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
 * @out_nid: ptr to int for nid of the range, can be %NULL
861
 *
862
 * Find the first area from *@idx which matches @nid, fill the out
863
 * parameters, and update *@idx for the next iteration.  The lower 32bit of
864
865
 * *@idx contains index into type_a and the upper 32bit indexes the
 * areas before each region in type_b.	For example, if type_b regions
866
867
868
869
870
871
872
873
874
875
876
 * look like the following,
 *
 *	0:[0-16), 1:[32-48), 2:[128-130)
 *
 * The upper 32bit indexes the following regions.
 *
 *	0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX)
 *
 * As both region arrays are sorted, the function advances the two indices
 * in lockstep and returns each intersection.
 */
877
void __init_memblock __next_mem_range(u64 *idx, int nid, ulong flags,
878
879
880
881
				      struct memblock_type *type_a,
				      struct memblock_type *type_b,
				      phys_addr_t *out_start,
				      phys_addr_t *out_end, int *out_nid)
882
{
883
884
	int idx_a = *idx & 0xffffffff;
	int idx_b = *idx >> 32;
885

886
887
	if (WARN_ONCE(nid == MAX_NUMNODES,
	"Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
888
		nid = NUMA_NO_NODE;
889

890
891
892
	for (; idx_a < type_a->cnt; idx_a++) {
		struct memblock_region *m = &type_a->regions[idx_a];

893
894
		phys_addr_t m_start = m->base;
		phys_addr_t m_end = m->base + m->size;
895
		int	    m_nid = memblock_get_region_node(m);
896
897

		/* only memory regions are associated with nodes, check it */
898
		if (nid != NUMA_NO_NODE && nid != m_nid)
899
900
			continue;

901
902
903
904
		/* skip hotpluggable memory regions if needed */
		if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
			continue;

905
906
907
908
		/* if we want mirror memory skip non-mirror memory regions */
		if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
			continue;

909
910
911
912
		/* skip nomap memory unless we were asked for it explicitly */
		if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
			continue;

913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
		if (!type_b) {
			if (out_start)
				*out_start = m_start;
			if (out_end)
				*out_end = m_end;
			if (out_nid)
				*out_nid = m_nid;
			idx_a++;
			*idx = (u32)idx_a | (u64)idx_b << 32;
			return;
		}

		/* scan areas before each reservation */
		for (; idx_b < type_b->cnt + 1; idx_b++) {
			struct memblock_region *r;
			phys_addr_t r_start;
			phys_addr_t r_end;

			r = &type_b->regions[idx_b];
			r_start = idx_b ? r[-1].base + r[-1].size : 0;
			r_end = idx_b < type_b->cnt ?
934
				r->base : PHYS_ADDR_MAX;
935

936
937
938
939
			/*
			 * if idx_b advanced past idx_a,
			 * break out to advance idx_a
			 */
940
941
942
943
944
			if (r_start >= m_end)
				break;
			/* if the two regions intersect, we're done */
			if (m_start < r_end) {
				if (out_start)
945
946
					*out_start =
						max(m_start, r_start);
947
948
949
				if (out_end)
					*out_end = min(m_end, r_end);
				if (out_nid)
950
					*out_nid = m_nid;
951
				/*
952
953
				 * The region which ends first is
				 * advanced for the next iteration.
954
955
				 */
				if (m_end <= r_end)
956
					idx_a++;
957
				else
958
959
					idx_b++;
				*idx = (u32)idx_a | (u64)idx_b << 32;
960
961
962
963
964
965
966
967
968
				return;
			}
		}
	}

	/* signal end of iteration */
	*idx = ULLONG_MAX;
}

969
/**
970
971
972
973
974
 * __next_mem_range_rev - generic next function for for_each_*_range_rev()
 *
 * Finds the next range from type_a which is not marked as unsuitable
 * in type_b.
 *
975
 * @idx: pointer to u64 loop variable
976
 * @nid: node selector, %NUMA_NO_NODE for all nodes
977
 * @flags: pick from blocks based on memory attributes
978
979
 * @type_a: pointer to memblock_type from where the range is taken
 * @type_b: pointer to memblock_type which excludes memory from being taken
Wanpeng Li's avatar
Wanpeng Li committed
980
981
982
 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
 * @out_nid: ptr to int for nid of the range, can be %NULL
983
 *
984
 * Reverse of __next_mem_range().
985
 */
986
void __init_memblock __next_mem_range_rev(u64 *idx, int nid, ulong flags,
987
988
989
990
					  struct memblock_type *type_a,
					  struct memblock_type *type_b,
					  phys_addr_t *out_start,
					  phys_addr_t *out_end, int *out_nid)
991
{
992
993
	int idx_a = *idx & 0xffffffff;
	int idx_b = *idx >> 32;
994

995
996
	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
		nid = NUMA_NO_NODE;
997
998

	if (*idx == (u64)ULLONG_MAX) {
999
		idx_a = type_a->cnt - 1;
1000
1001
1002
1003
		if (type_b != NULL)
			idx_b = type_b->cnt;
		else
			idx_b = 0;
1004
1005
	}

1006
1007
1008
	for (; idx_a >= 0; idx_a--) {
		struct memblock_region *m = &type_a->regions[idx_a];

1009
1010
		phys_addr_t m_start = m->base;
		phys_addr_t m_end = m->base + m->size;
1011
		int m_nid = memblock_get_region_node(m);
1012
1013

		/* only memory regions are associated with nodes, check it */
1014
		if (nid != NUMA_NO_NODE && nid != m_nid)
1015
1016
			continue;

1017
1018
1019
1020
		/* skip hotpluggable memory regions if needed */
		if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
			continue;

1021
1022
1023
1024
		/* if we want mirror memory skip non-mirror memory regions */
		if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
			continue;

1025
1026
1027
1028
		/* skip nomap memory unless we were asked for it explicitly */
		if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
			continue;

1029
1030
1031
1032
1033
1034
1035
		if (!type_b) {
			if (out_start)
				*out_start = m_start;
			if (out_end)
				*out_end = m_end;
			if (out_nid)
				*out_nid = m_nid;
1036
			idx_a--;
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
			*idx = (u32)idx_a | (u64)idx_b << 32;
			return;
		}

		/* scan areas before each reservation */
		for (; idx_b >= 0; idx_b--) {
			struct memblock_region *r;
			phys_addr_t r_start;
			phys_addr_t r_end;

			r = &type_b->regions[idx_b];
			r_start = idx_b ? r[-1].base + r[-1].size : 0;
			r_end = idx_b < type_b->cnt ?
1050
				r->base : PHYS_ADDR_MAX;
1051
1052
1053
1054
			/*
			 * if idx_b advanced past idx_a,
			 * break out to advance idx_a
			 */
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064

			if (r_end <= m_start)
				break;
			/* if the two regions intersect, we're done */
			if (m_end > r_start) {
				if (out_start)
					*out_start = max(m_start, r_start);
				if (out_end)
					*out_end = min(m_end, r_end);
				if (out_nid)
1065
					*out_nid = m_nid;
1066
				if (m_start >= r_start)
1067
					idx_a--;
1068
				else