memblock.c 43.3 KB
Newer Older
Yinghai Lu's avatar
Yinghai Lu committed
1
2
3
4
5
6
7
8
9
10
11
12
13
/*
 * Procedures for maintaining information about logical memory blocks.
 *
 * Peter Bergner, IBM Corp.	June 2001.
 * Copyright (C) 2001 Peter Bergner.
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
14
#include <linux/slab.h>
Yinghai Lu's avatar
Yinghai Lu committed
15
16
#include <linux/init.h>
#include <linux/bitops.h>
17
#include <linux/poison.h>
18
#include <linux/pfn.h>
19
20
#include <linux/debugfs.h>
#include <linux/seq_file.h>
Yinghai Lu's avatar
Yinghai Lu committed
21
22
#include <linux/memblock.h>

23
#include <asm-generic/sections.h>
24
25
26
#include <linux/io.h>

#include "internal.h"
27

Tejun Heo's avatar
Tejun Heo committed
28
29
30
31
32
33
34
35
36
37
38
39
static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;

struct memblock memblock __initdata_memblock = {
	.memory.regions		= memblock_memory_init_regions,
	.memory.cnt		= 1,	/* empty dummy entry */
	.memory.max		= INIT_MEMBLOCK_REGIONS,

	.reserved.regions	= memblock_reserved_init_regions,
	.reserved.cnt		= 1,	/* empty dummy entry */
	.reserved.max		= INIT_MEMBLOCK_REGIONS,

40
	.bottom_up		= false,
Tejun Heo's avatar
Tejun Heo committed
41
42
	.current_limit		= MEMBLOCK_ALLOC_ANYWHERE,
};
Yinghai Lu's avatar
Yinghai Lu committed
43

44
int memblock_debug __initdata_memblock;
45
46
47
#ifdef CONFIG_MOVABLE_NODE
bool movable_node_enabled __initdata_memblock = false;
#endif
48
static int memblock_can_resize __initdata_memblock;
49
50
static int memblock_memory_in_slab __initdata_memblock = 0;
static int memblock_reserved_in_slab __initdata_memblock = 0;
Yinghai Lu's avatar
Yinghai Lu committed
51

52
/* inline so we don't get a warning when pr_debug is compiled out */
53
54
static __init_memblock const char *
memblock_type_name(struct memblock_type *type)
55
56
57
58
59
60
61
62
63
{
	if (type == &memblock.memory)
		return "memory";
	else if (type == &memblock.reserved)
		return "reserved";
	else
		return "unknown";
}

64
65
66
67
68
69
/* adjust *@size so that (@base + *@size) doesn't overflow, return new size */
static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
{
	return *size = min(*size, (phys_addr_t)ULLONG_MAX - base);
}

70
71
72
/*
 * Address comparison utilities
 */
73
static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
74
				       phys_addr_t base2, phys_addr_t size2)
Yinghai Lu's avatar
Yinghai Lu committed
75
76
77
78
{
	return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
}

79
80
static long __init_memblock memblock_overlaps_region(struct memblock_type *type,
					phys_addr_t base, phys_addr_t size)
81
82
83
84
85
86
87
88
89
90
91
92
93
{
	unsigned long i;

	for (i = 0; i < type->cnt; i++) {
		phys_addr_t rgnbase = type->regions[i].base;
		phys_addr_t rgnsize = type->regions[i].size;
		if (memblock_addrs_overlap(base, size, rgnbase, rgnsize))
			break;
	}

	return (i < type->cnt) ? i : -1;
}

94
95
96
97
98
99
/*
 * __memblock_find_range_bottom_up - find free area utility in bottom-up
 * @start: start of candidate range
 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
 * @size: size of free area to find
 * @align: alignment of free area to find
100
 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
 *
 * Utility called from memblock_find_in_range_node(), find free area bottom-up.
 *
 * RETURNS:
 * Found address on success, 0 on failure.
 */
static phys_addr_t __init_memblock
__memblock_find_range_bottom_up(phys_addr_t start, phys_addr_t end,
				phys_addr_t size, phys_addr_t align, int nid)
{
	phys_addr_t this_start, this_end, cand;
	u64 i;

	for_each_free_mem_range(i, nid, &this_start, &this_end, NULL) {
		this_start = clamp(this_start, start, end);
		this_end = clamp(this_end, start, end);

		cand = round_up(this_start, align);
		if (cand < this_end && this_end - cand >= size)
			return cand;
	}

	return 0;
}

126
/**
127
 * __memblock_find_range_top_down - find free area utility, in top-down
128
129
130
131
 * @start: start of candidate range
 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
 * @size: size of free area to find
 * @align: alignment of free area to find
132
 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
133
 *
134
 * Utility called from memblock_find_in_range_node(), find free area top-down.
135
136
 *
 * RETURNS:
137
 * Found address on success, 0 on failure.
138
 */
139
140
141
static phys_addr_t __init_memblock
__memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
			       phys_addr_t size, phys_addr_t align, int nid)
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
{
	phys_addr_t this_start, this_end, cand;
	u64 i;

	for_each_free_mem_range_reverse(i, nid, &this_start, &this_end, NULL) {
		this_start = clamp(this_start, start, end);
		this_end = clamp(this_end, start, end);

		if (this_end < size)
			continue;

		cand = round_down(this_end - size, align);
		if (cand >= this_start)
			return cand;
	}
157

158
159
	return 0;
}
160

161
162
163
164
/**
 * memblock_find_in_range_node - find free area in given range and node
 * @size: size of free area to find
 * @align: alignment of free area to find
165
166
 * @start: start of candidate range
 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
167
 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
168
169
170
 *
 * Find @size free area aligned to @align in the specified range and node.
 *
171
172
173
174
175
176
177
178
 * When allocation direction is bottom-up, the @start should be greater
 * than the end of the kernel image. Otherwise, it will be trimmed. The
 * reason is that we want the bottom-up allocation just near the kernel
 * image so it is highly likely that the allocated memory and the kernel
 * will reside in the same node.
 *
 * If bottom-up allocation failed, will try to allocate memory top-down.
 *
179
 * RETURNS:
180
 * Found address on success, 0 on failure.
181
 */
182
183
184
phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
					phys_addr_t align, phys_addr_t start,
					phys_addr_t end, int nid)
185
{
186
187
188
	int ret;
	phys_addr_t kernel_end;

189
190
191
192
193
194
195
	/* pump up @end */
	if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
		end = memblock.current_limit;

	/* avoid allocating the first page */
	start = max_t(phys_addr_t, start, PAGE_SIZE);
	end = max(start, end);
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
	kernel_end = __pa_symbol(_end);

	/*
	 * try bottom-up allocation only when bottom-up mode
	 * is set and @end is above the kernel image.
	 */
	if (memblock_bottom_up() && end > kernel_end) {
		phys_addr_t bottom_up_start;

		/* make sure we will allocate above the kernel */
		bottom_up_start = max(start, kernel_end);

		/* ok, try bottom-up allocation first */
		ret = __memblock_find_range_bottom_up(bottom_up_start, end,
						      size, align, nid);
		if (ret)
			return ret;

		/*
		 * we always limit bottom-up allocation above the kernel,
		 * but top-down allocation doesn't have the limit, so
		 * retrying top-down allocation may succeed when bottom-up
		 * allocation failed.
		 *
		 * bottom-up allocation is expected to be fail very rarely,
		 * so we use WARN_ONCE() here to see the stack trace if
		 * fail happens.
		 */
		WARN_ONCE(1, "memblock: bottom-up allocation failed, "
			     "memory hotunplug may be affected\n");
	}
227
228
229
230

	return __memblock_find_range_top_down(start, end, size, align, nid);
}

231
232
233
234
235
236
237
238
239
240
/**
 * memblock_find_in_range - find free area in given range
 * @start: start of candidate range
 * @end: end of candidate range, can be %MEMBLOCK_ALLOC_{ANYWHERE|ACCESSIBLE}
 * @size: size of free area to find
 * @align: alignment of free area to find
 *
 * Find @size free area aligned to @align in the specified range.
 *
 * RETURNS:
241
 * Found address on success, 0 on failure.
242
 */
243
244
245
phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
					phys_addr_t end, phys_addr_t size,
					phys_addr_t align)
246
{
247
	return memblock_find_in_range_node(size, align, start, end,
248
					    NUMA_NO_NODE);
249
250
}

251
static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)
Yinghai Lu's avatar
Yinghai Lu committed
252
{
253
	type->total_size -= type->regions[r].size;
Tejun Heo's avatar
Tejun Heo committed
254
255
	memmove(&type->regions[r], &type->regions[r + 1],
		(type->cnt - (r + 1)) * sizeof(type->regions[r]));
256
	type->cnt--;
Yinghai Lu's avatar
Yinghai Lu committed
257

258
259
	/* Special case for empty arrays */
	if (type->cnt == 0) {
260
		WARN_ON(type->total_size != 0);
261
262
263
		type->cnt = 1;
		type->regions[0].base = 0;
		type->regions[0].size = 0;
264
		type->regions[0].flags = 0;
Tejun Heo's avatar
Tejun Heo committed
265
		memblock_set_region_node(&type->regions[0], MAX_NUMNODES);
266
	}
Yinghai Lu's avatar
Yinghai Lu committed
267
268
}

269
270
271
272
273
274
phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info(
					phys_addr_t *addr)
{
	if (memblock.reserved.regions == memblock_reserved_init_regions)
		return 0;

275
276
277
278
279
280
281
282
283
284
285
286
287
	/*
	 * Don't allow nobootmem allocator to free reserved memory regions
	 * array if
	 *  - CONFIG_DEBUG_FS is enabled;
	 *  - CONFIG_ARCH_DISCARD_MEMBLOCK is not enabled;
	 *  - reserved memory regions array have been resized during boot.
	 * Otherwise debug_fs entry "sys/kernel/debug/memblock/reserved"
	 * will show garbage instead of state of memory reservations.
	 */
	if (IS_ENABLED(CONFIG_DEBUG_FS) &&
	    !IS_ENABLED(CONFIG_ARCH_DISCARD_MEMBLOCK))
		return 0;

288
289
290
291
292
293
	*addr = __pa(memblock.reserved.regions);

	return PAGE_ALIGN(sizeof(struct memblock_region) *
			  memblock.reserved.max);
}

294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
/**
 * memblock_double_array - double the size of the memblock regions array
 * @type: memblock type of the regions array being doubled
 * @new_area_start: starting address of memory range to avoid overlap with
 * @new_area_size: size of memory range to avoid overlap with
 *
 * Double the size of the @type regions array. If memblock is being used to
 * allocate memory for a new reserved regions array and there is a previously
 * allocated memory range [@new_area_start,@new_area_start+@new_area_size]
 * waiting to be reserved, ensure the memory used by the new array does
 * not overlap.
 *
 * RETURNS:
 * 0 on success, -1 on failure.
 */
static int __init_memblock memblock_double_array(struct memblock_type *type,
						phys_addr_t new_area_start,
						phys_addr_t new_area_size)
312
313
{
	struct memblock_region *new_array, *old_array;
314
	phys_addr_t old_alloc_size, new_alloc_size;
315
316
	phys_addr_t old_size, new_size, addr;
	int use_slab = slab_is_available();
317
	int *in_slab;
318
319
320
321
322
323
324
325
326
327

	/* We don't allow resizing until we know about the reserved regions
	 * of memory that aren't suitable for allocation
	 */
	if (!memblock_can_resize)
		return -1;

	/* Calculate new doubled size */
	old_size = type->max * sizeof(struct memblock_region);
	new_size = old_size << 1;
328
329
330
331
332
333
	/*
	 * We need to allocated new one align to PAGE_SIZE,
	 *   so we can free them completely later.
	 */
	old_alloc_size = PAGE_ALIGN(old_size);
	new_alloc_size = PAGE_ALIGN(new_size);
334

335
336
337
338
339
340
	/* Retrieve the slab flag */
	if (type == &memblock.memory)
		in_slab = &memblock_memory_in_slab;
	else
		in_slab = &memblock_reserved_in_slab;

341
342
343
	/* Try to find some space for it.
	 *
	 * WARNING: We assume that either slab_is_available() and we use it or
344
345
346
	 * we use MEMBLOCK for allocations. That means that this is unsafe to
	 * use when bootmem is currently active (unless bootmem itself is
	 * implemented on top of MEMBLOCK which isn't the case yet)
347
348
	 *
	 * This should however not be an issue for now, as we currently only
349
350
	 * call into MEMBLOCK while it's still active, or much later when slab
	 * is active for memory hotplug operations
351
352
353
	 */
	if (use_slab) {
		new_array = kmalloc(new_size, GFP_KERNEL);
Tejun Heo's avatar
Tejun Heo committed
354
		addr = new_array ? __pa(new_array) : 0;
355
	} else {
356
357
358
359
360
361
		/* only exclude range when trying to double reserved.regions */
		if (type != &memblock.reserved)
			new_area_start = new_area_size = 0;

		addr = memblock_find_in_range(new_area_start + new_area_size,
						memblock.current_limit,
362
						new_alloc_size, PAGE_SIZE);
363
364
		if (!addr && new_area_size)
			addr = memblock_find_in_range(0,
365
366
				min(new_area_start, memblock.current_limit),
				new_alloc_size, PAGE_SIZE);
367

368
		new_array = addr ? __va(addr) : NULL;
369
	}
Tejun Heo's avatar
Tejun Heo committed
370
	if (!addr) {
371
372
373
374
375
		pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
		       memblock_type_name(type), type->max, type->max * 2);
		return -1;
	}

376
377
378
	memblock_dbg("memblock: %s is doubled to %ld at [%#010llx-%#010llx]",
			memblock_type_name(type), type->max * 2, (u64)addr,
			(u64)addr + new_size - 1);
379

380
381
382
383
	/*
	 * Found space, we now need to move the array over before we add the
	 * reserved region since it may be our reserved array itself that is
	 * full.
384
385
386
387
388
389
390
	 */
	memcpy(new_array, type->regions, old_size);
	memset(new_array + type->max, 0, old_size);
	old_array = type->regions;
	type->regions = new_array;
	type->max <<= 1;

391
	/* Free old array. We needn't free it if the array is the static one */
392
393
394
395
	if (*in_slab)
		kfree(old_array);
	else if (old_array != memblock_memory_init_regions &&
		 old_array != memblock_reserved_init_regions)
396
		memblock_free(__pa(old_array), old_alloc_size);
397

398
399
400
	/*
	 * Reserve the new array if that comes from the memblock.  Otherwise, we
	 * needn't do it
401
402
	 */
	if (!use_slab)
403
		BUG_ON(memblock_reserve(addr, new_alloc_size));
404
405
406
407

	/* Update slab flag */
	*in_slab = use_slab;

408
409
410
	return 0;
}

411
412
413
414
415
416
417
/**
 * memblock_merge_regions - merge neighboring compatible regions
 * @type: memblock type to scan
 *
 * Scan @type and merge neighboring compatible regions.
 */
static void __init_memblock memblock_merge_regions(struct memblock_type *type)
Yinghai Lu's avatar
Yinghai Lu committed
418
{
419
	int i = 0;
Yinghai Lu's avatar
Yinghai Lu committed
420

421
422
423
424
	/* cnt never goes below 1 */
	while (i < type->cnt - 1) {
		struct memblock_region *this = &type->regions[i];
		struct memblock_region *next = &type->regions[i + 1];
Yinghai Lu's avatar
Yinghai Lu committed
425

Tejun Heo's avatar
Tejun Heo committed
426
427
		if (this->base + this->size != next->base ||
		    memblock_get_region_node(this) !=
428
429
		    memblock_get_region_node(next) ||
		    this->flags != next->flags) {
430
431
432
			BUG_ON(this->base + this->size > next->base);
			i++;
			continue;
433
434
		}

435
		this->size += next->size;
436
437
		/* move forward from next + 1, index of which is i + 2 */
		memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next));
438
		type->cnt--;
Yinghai Lu's avatar
Yinghai Lu committed
439
	}
440
}
Yinghai Lu's avatar
Yinghai Lu committed
441

442
443
/**
 * memblock_insert_region - insert new memblock region
444
445
446
447
448
 * @type:	memblock type to insert into
 * @idx:	index for the insertion point
 * @base:	base address of the new region
 * @size:	size of the new region
 * @nid:	node id of the new region
449
 * @flags:	flags of the new region
450
451
452
453
454
455
 *
 * Insert new memblock region [@base,@base+@size) into @type at @idx.
 * @type must already have extra room to accomodate the new region.
 */
static void __init_memblock memblock_insert_region(struct memblock_type *type,
						   int idx, phys_addr_t base,
456
457
						   phys_addr_t size,
						   int nid, unsigned long flags)
458
459
460
461
462
463
464
{
	struct memblock_region *rgn = &type->regions[idx];

	BUG_ON(type->cnt >= type->max);
	memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn));
	rgn->base = base;
	rgn->size = size;
465
	rgn->flags = flags;
Tejun Heo's avatar
Tejun Heo committed
466
	memblock_set_region_node(rgn, nid);
467
	type->cnt++;
468
	type->total_size += size;
469
470
471
472
473
474
475
}

/**
 * memblock_add_region - add new memblock region
 * @type: memblock type to add new region into
 * @base: base address of the new region
 * @size: size of the new region
476
 * @nid: nid of the new region
477
 * @flags: flags of the new region
478
479
480
481
482
483
484
485
486
 *
 * Add new memblock region [@base,@base+@size) into @type.  The new region
 * is allowed to overlap with existing ones - overlaps don't affect already
 * existing regions.  @type is guaranteed to be minimal (all neighbouring
 * compatible regions are merged) after the addition.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
487
static int __init_memblock memblock_add_region(struct memblock_type *type,
488
489
				phys_addr_t base, phys_addr_t size,
				int nid, unsigned long flags)
490
491
{
	bool insert = false;
492
493
	phys_addr_t obase = base;
	phys_addr_t end = base + memblock_cap_size(base, &size);
494
495
	int i, nr_new;

496
497
498
	if (!size)
		return 0;

499
500
	/* special case for empty array */
	if (type->regions[0].size == 0) {
501
		WARN_ON(type->cnt != 1 || type->total_size);
502
503
		type->regions[0].base = base;
		type->regions[0].size = size;
504
		type->regions[0].flags = flags;
505
		memblock_set_region_node(&type->regions[0], nid);
506
		type->total_size = size;
507
		return 0;
Yinghai Lu's avatar
Yinghai Lu committed
508
	}
509
510
511
512
513
repeat:
	/*
	 * The following is executed twice.  Once with %false @insert and
	 * then with %true.  The first counts the number of regions needed
	 * to accomodate the new area.  The second actually inserts them.
514
	 */
515
516
	base = obase;
	nr_new = 0;
Yinghai Lu's avatar
Yinghai Lu committed
517

518
519
520
521
522
523
	for (i = 0; i < type->cnt; i++) {
		struct memblock_region *rgn = &type->regions[i];
		phys_addr_t rbase = rgn->base;
		phys_addr_t rend = rbase + rgn->size;

		if (rbase >= end)
Yinghai Lu's avatar
Yinghai Lu committed
524
			break;
525
526
527
528
529
530
531
532
533
534
		if (rend <= base)
			continue;
		/*
		 * @rgn overlaps.  If it separates the lower part of new
		 * area, insert that portion.
		 */
		if (rbase > base) {
			nr_new++;
			if (insert)
				memblock_insert_region(type, i++, base,
535
536
						       rbase - base, nid,
						       flags);
Yinghai Lu's avatar
Yinghai Lu committed
537
		}
538
539
		/* area below @rend is dealt with, forget about it */
		base = min(rend, end);
Yinghai Lu's avatar
Yinghai Lu committed
540
	}
541
542
543
544
545

	/* insert the remaining portion */
	if (base < end) {
		nr_new++;
		if (insert)
546
547
			memblock_insert_region(type, i, base, end - base,
					       nid, flags);
Yinghai Lu's avatar
Yinghai Lu committed
548
549
	}

550
551
552
	/*
	 * If this was the first round, resize array and repeat for actual
	 * insertions; otherwise, merge and return.
553
	 */
554
555
	if (!insert) {
		while (type->cnt + nr_new > type->max)
556
			if (memblock_double_array(type, obase, size) < 0)
557
558
559
560
561
562
				return -ENOMEM;
		insert = true;
		goto repeat;
	} else {
		memblock_merge_regions(type);
		return 0;
563
	}
Yinghai Lu's avatar
Yinghai Lu committed
564
565
}

566
567
568
int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
				       int nid)
{
569
	return memblock_add_region(&memblock.memory, base, size, nid, 0);
570
571
}

572
int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
Yinghai Lu's avatar
Yinghai Lu committed
573
{
574
575
	return memblock_add_region(&memblock.memory, base, size,
				   MAX_NUMNODES, 0);
Yinghai Lu's avatar
Yinghai Lu committed
576
577
}

578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
/**
 * memblock_isolate_range - isolate given range into disjoint memblocks
 * @type: memblock type to isolate range for
 * @base: base of range to isolate
 * @size: size of range to isolate
 * @start_rgn: out parameter for the start of isolated region
 * @end_rgn: out parameter for the end of isolated region
 *
 * Walk @type and ensure that regions don't cross the boundaries defined by
 * [@base,@base+@size).  Crossing regions are split at the boundaries,
 * which may create at most two more regions.  The index of the first
 * region inside the range is returned in *@start_rgn and end in *@end_rgn.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
static int __init_memblock memblock_isolate_range(struct memblock_type *type,
					phys_addr_t base, phys_addr_t size,
					int *start_rgn, int *end_rgn)
{
598
	phys_addr_t end = base + memblock_cap_size(base, &size);
599
600
601
602
	int i;

	*start_rgn = *end_rgn = 0;

603
604
605
	if (!size)
		return 0;

606
607
	/* we'll create at most two more regions */
	while (type->cnt + 2 > type->max)
608
		if (memblock_double_array(type, base, size) < 0)
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
			return -ENOMEM;

	for (i = 0; i < type->cnt; i++) {
		struct memblock_region *rgn = &type->regions[i];
		phys_addr_t rbase = rgn->base;
		phys_addr_t rend = rbase + rgn->size;

		if (rbase >= end)
			break;
		if (rend <= base)
			continue;

		if (rbase < base) {
			/*
			 * @rgn intersects from below.  Split and continue
			 * to process the next region - the new top half.
			 */
			rgn->base = base;
627
628
			rgn->size -= base - rbase;
			type->total_size -= base - rbase;
629
			memblock_insert_region(type, i, rbase, base - rbase,
630
631
					       memblock_get_region_node(rgn),
					       rgn->flags);
632
633
634
635
636
637
		} else if (rend > end) {
			/*
			 * @rgn intersects from above.  Split and redo the
			 * current region - the new bottom half.
			 */
			rgn->base = end;
638
639
			rgn->size -= end - rbase;
			type->total_size -= end - rbase;
640
			memblock_insert_region(type, i--, rbase, end - rbase,
641
642
					       memblock_get_region_node(rgn),
					       rgn->flags);
643
644
645
646
647
648
649
650
651
652
653
		} else {
			/* @rgn is fully contained, record it */
			if (!*end_rgn)
				*start_rgn = i;
			*end_rgn = i + 1;
		}
	}

	return 0;
}

654
655
static int __init_memblock __memblock_remove(struct memblock_type *type,
					     phys_addr_t base, phys_addr_t size)
Yinghai Lu's avatar
Yinghai Lu committed
656
{
657
658
	int start_rgn, end_rgn;
	int i, ret;
Yinghai Lu's avatar
Yinghai Lu committed
659

660
661
662
	ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
	if (ret)
		return ret;
Yinghai Lu's avatar
Yinghai Lu committed
663

664
665
	for (i = end_rgn - 1; i >= start_rgn; i--)
		memblock_remove_region(type, i);
666
	return 0;
Yinghai Lu's avatar
Yinghai Lu committed
667
668
}

669
int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
Yinghai Lu's avatar
Yinghai Lu committed
670
671
672
673
{
	return __memblock_remove(&memblock.memory, base, size);
}

674
int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
Yinghai Lu's avatar
Yinghai Lu committed
675
{
676
	memblock_dbg("   memblock_free: [%#016llx-%#016llx] %pF\n",
677
		     (unsigned long long)base,
678
		     (unsigned long long)base + size - 1,
679
		     (void *)_RET_IP_);
680

Yinghai Lu's avatar
Yinghai Lu committed
681
682
683
	return __memblock_remove(&memblock.reserved, base, size);
}

684
685
686
687
static int __init_memblock memblock_reserve_region(phys_addr_t base,
						   phys_addr_t size,
						   int nid,
						   unsigned long flags)
Yinghai Lu's avatar
Yinghai Lu committed
688
{
689
	struct memblock_type *_rgn = &memblock.reserved;
Yinghai Lu's avatar
Yinghai Lu committed
690

691
	memblock_dbg("memblock_reserve: [%#016llx-%#016llx] flags %#02lx %pF\n",
692
		     (unsigned long long)base,
693
		     (unsigned long long)base + size - 1,
694
695
696
697
		     flags, (void *)_RET_IP_);

	return memblock_add_region(_rgn, base, size, nid, flags);
}
Yinghai Lu's avatar
Yinghai Lu committed
698

699
700
701
int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
{
	return memblock_reserve_region(base, size, MAX_NUMNODES, 0);
Yinghai Lu's avatar
Yinghai Lu committed
702
703
}

704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
/**
 * memblock_mark_hotplug - Mark hotpluggable memory with flag MEMBLOCK_HOTPLUG.
 * @base: the base phys addr of the region
 * @size: the size of the region
 *
 * This function isolates region [@base, @base + @size), and mark it with flag
 * MEMBLOCK_HOTPLUG.
 *
 * Return 0 on succees, -errno on failure.
 */
int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
{
	struct memblock_type *type = &memblock.memory;
	int i, ret, start_rgn, end_rgn;

	ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
	if (ret)
		return ret;

	for (i = start_rgn; i < end_rgn; i++)
		memblock_set_region_flags(&type->regions[i], MEMBLOCK_HOTPLUG);

	memblock_merge_regions(type);
	return 0;
}

/**
 * memblock_clear_hotplug - Clear flag MEMBLOCK_HOTPLUG for a specified region.
 * @base: the base phys addr of the region
 * @size: the size of the region
 *
 * This function isolates region [@base, @base + @size), and clear flag
 * MEMBLOCK_HOTPLUG for the isolated regions.
 *
 * Return 0 on succees, -errno on failure.
 */
int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
{
	struct memblock_type *type = &memblock.memory;
	int i, ret, start_rgn, end_rgn;

	ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
	if (ret)
		return ret;

	for (i = start_rgn; i < end_rgn; i++)
		memblock_clear_region_flags(&type->regions[i],
					    MEMBLOCK_HOTPLUG);

	memblock_merge_regions(type);
	return 0;
}

757
758
759
/**
 * __next_free_mem_range - next function for for_each_free_mem_range()
 * @idx: pointer to u64 loop variable
760
 * @nid: node selector, %NUMA_NO_NODE for all nodes
Wanpeng Li's avatar
Wanpeng Li committed
761
762
763
 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
 * @out_nid: ptr to int for nid of the range, can be %NULL
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
 *
 * Find the first free area from *@idx which matches @nid, fill the out
 * parameters, and update *@idx for the next iteration.  The lower 32bit of
 * *@idx contains index into memory region and the upper 32bit indexes the
 * areas before each reserved region.  For example, if reserved regions
 * look like the following,
 *
 *	0:[0-16), 1:[32-48), 2:[128-130)
 *
 * The upper 32bit indexes the following regions.
 *
 *	0:[0-0), 1:[16-32), 2:[48-128), 3:[130-MAX)
 *
 * As both region arrays are sorted, the function advances the two indices
 * in lockstep and returns each intersection.
 */
void __init_memblock __next_free_mem_range(u64 *idx, int nid,
					   phys_addr_t *out_start,
					   phys_addr_t *out_end, int *out_nid)
{
	struct memblock_type *mem = &memblock.memory;
	struct memblock_type *rsv = &memblock.reserved;
	int mi = *idx & 0xffffffff;
	int ri = *idx >> 32;
788
789
790
	bool check_node = (nid != NUMA_NO_NODE) && (nid != MAX_NUMNODES);

	if (nid == MAX_NUMNODES)
791
		pr_warn_once("%s: Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n",
792
			     __func__);
793
794
795
796
797
798
799

	for ( ; mi < mem->cnt; mi++) {
		struct memblock_region *m = &mem->regions[mi];
		phys_addr_t m_start = m->base;
		phys_addr_t m_end = m->base + m->size;

		/* only memory regions are associated with nodes, check it */
800
		if (check_node && nid != memblock_get_region_node(m))
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
			continue;

		/* scan areas before each reservation for intersection */
		for ( ; ri < rsv->cnt + 1; ri++) {
			struct memblock_region *r = &rsv->regions[ri];
			phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0;
			phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX;

			/* if ri advanced past mi, break out to advance mi */
			if (r_start >= m_end)
				break;
			/* if the two regions intersect, we're done */
			if (m_start < r_end) {
				if (out_start)
					*out_start = max(m_start, r_start);
				if (out_end)
					*out_end = min(m_end, r_end);
				if (out_nid)
					*out_nid = memblock_get_region_node(m);
				/*
				 * The region which ends first is advanced
				 * for the next iteration.
				 */
				if (m_end <= r_end)
					mi++;
				else
					ri++;
				*idx = (u32)mi | (u64)ri << 32;
				return;
			}
		}
	}

	/* signal end of iteration */
	*idx = ULLONG_MAX;
}

838
839
840
/**
 * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse()
 * @idx: pointer to u64 loop variable
841
 * @nid: nid: node selector, %NUMA_NO_NODE for all nodes
Wanpeng Li's avatar
Wanpeng Li committed
842
843
844
 * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
 * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
 * @out_nid: ptr to int for nid of the range, can be %NULL
845
846
 *
 * Reverse of __next_free_mem_range().
847
848
849
850
851
 *
 * Linux kernel cannot migrate pages used by itself. Memory hotplug users won't
 * be able to hot-remove hotpluggable memory used by the kernel. So this
 * function skip hotpluggable regions if needed when allocating memory for the
 * kernel.
852
853
854
855
856
857
858
859
860
 */
void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid,
					   phys_addr_t *out_start,
					   phys_addr_t *out_end, int *out_nid)
{
	struct memblock_type *mem = &memblock.memory;
	struct memblock_type *rsv = &memblock.reserved;
	int mi = *idx & 0xffffffff;
	int ri = *idx >> 32;
861
862
863
	bool check_node = (nid != NUMA_NO_NODE) && (nid != MAX_NUMNODES);

	if (nid == MAX_NUMNODES)
864
		pr_warn_once("%s: Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n",
865
			     __func__);
866
867
868
869
870
871
872
873
874
875
876
877

	if (*idx == (u64)ULLONG_MAX) {
		mi = mem->cnt - 1;
		ri = rsv->cnt;
	}

	for ( ; mi >= 0; mi--) {
		struct memblock_region *m = &mem->regions[mi];
		phys_addr_t m_start = m->base;
		phys_addr_t m_end = m->base + m->size;

		/* only memory regions are associated with nodes, check it */
878
		if (check_node && nid != memblock_get_region_node(m))
879
880
			continue;

881
882
883
884
		/* skip hotpluggable memory regions if needed */
		if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
			continue;

885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
		/* scan areas before each reservation for intersection */
		for ( ; ri >= 0; ri--) {
			struct memblock_region *r = &rsv->regions[ri];
			phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0;
			phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX;

			/* if ri advanced past mi, break out to advance mi */
			if (r_end <= m_start)
				break;
			/* if the two regions intersect, we're done */
			if (m_end > r_start) {
				if (out_start)
					*out_start = max(m_start, r_start);
				if (out_end)
					*out_end = min(m_end, r_end);
				if (out_nid)
					*out_nid = memblock_get_region_node(m);

				if (m_start >= r_start)
					mi--;
				else
					ri--;
				*idx = (u32)mi | (u64)ri << 32;
				return;
			}
		}
	}

	*idx = ULLONG_MAX;
}

Tejun Heo's avatar
Tejun Heo committed
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
/*
 * Common iterator interface used to define for_each_mem_range().
 */
void __init_memblock __next_mem_pfn_range(int *idx, int nid,
				unsigned long *out_start_pfn,
				unsigned long *out_end_pfn, int *out_nid)
{
	struct memblock_type *type = &memblock.memory;
	struct memblock_region *r;

	while (++*idx < type->cnt) {
		r = &type->regions[*idx];

		if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
			continue;
		if (nid == MAX_NUMNODES || nid == r->nid)
			break;
	}
	if (*idx >= type->cnt) {
		*idx = -1;
		return;
	}

	if (out_start_pfn)
		*out_start_pfn = PFN_UP(r->base);
	if (out_end_pfn)
		*out_end_pfn = PFN_DOWN(r->base + r->size);
	if (out_nid)
		*out_nid = r->nid;
}

/**
 * memblock_set_node - set node ID on memblock regions
 * @base: base of area to set node ID for
 * @size: size of area to set node ID for
952
 * @type: memblock type to set node ID for
Tejun Heo's avatar
Tejun Heo committed
953
954
 * @nid: node ID to set
 *
955
 * Set the nid of memblock @type regions in [@base,@base+@size) to @nid.
Tejun Heo's avatar
Tejun Heo committed
956
957
958
959
960
961
 * Regions which cross the area boundaries are split as necessary.
 *
 * RETURNS:
 * 0 on success, -errno on failure.
 */
int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
962
				      struct memblock_type *type, int nid)
Tejun Heo's avatar
Tejun Heo committed
963
{
964
965
	int start_rgn, end_rgn;
	int i, ret;
Tejun Heo's avatar
Tejun Heo committed
966

967
968
969
	ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
	if (ret)
		return ret;
Tejun Heo's avatar
Tejun Heo committed
970

971
	for (i = start_rgn; i < end_rgn; i++)
972
		memblock_set_region_node(&type->regions[i], nid);
Tejun Heo's avatar
Tejun Heo committed
973
974
975
976
977
978

	memblock_merge_regions(type);
	return 0;
}
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */

979
980
981
static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
					phys_addr_t align, phys_addr_t max_addr,
					int nid)
Yinghai Lu's avatar
Yinghai Lu committed
982
{
983
	phys_addr_t found;
Yinghai Lu's avatar
Yinghai Lu committed
984

985
986
	if (!align)
		align = SMP_CACHE_BYTES;
987

988
989
990
	/* align @size to avoid excessive fragmentation on reserved array */
	size = round_up(size, align);

991
	found = memblock_find_in_range_node(size, align, 0, max_addr, nid);
992
	if (found && !memblock_reserve(found, size))
993
		return found;
Yinghai Lu's avatar
Yinghai Lu committed
994

995
	return 0;
Yinghai Lu's avatar
Yinghai Lu committed
996
997
}

998
999
1000
1001
1002
1003
1004
phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
{
	return memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
}

phys_addr_t __init __memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
{
1005
	return memblock_alloc_base_nid(size, align, max_addr, NUMA_NO_NODE);
1006
1007
}

1008
phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr)
Yinghai Lu's avatar
Yinghai Lu committed
1009
{
1010
1011
1012
1013
1014
1015
1016
1017
1018
	phys_addr_t alloc;

	alloc = __memblock_alloc_base(size, align, max_addr);

	if (alloc == 0)
		panic("ERROR: Failed to allocate 0x%llx bytes below 0x%llx.\n",
		      (unsigned long long) size, (unsigned long long) max_addr);

	return alloc;
Yinghai Lu's avatar
Yinghai Lu committed
1019
1020
}

1021
phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)
Yinghai Lu's avatar
Yinghai Lu committed
1022
{
1023
1024
	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
}
Yinghai Lu's avatar
Yinghai Lu committed
1025

1026
1027
1028
1029
1030
1031
phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
{
	phys_addr_t res = memblock_alloc_nid(size, align, nid);

	if (res)
		return res;
1032
	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
Yinghai Lu's avatar
Yinghai Lu committed
1033
1034
}

1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
/**
 * memblock_virt_alloc_internal - allocate boot memory block
 * @size: size of memory block to be allocated in bytes
 * @align: alignment of the region and block's size
 * @min_addr: the lower bound of the memory region to allocate (phys address)
 * @max_addr: the upper bound of the memory region to allocate (phys address)
 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
 *
 * The @min_addr limit is dropped if it can not be satisfied and the allocation
 * will fall back to memory below @min_addr. Also, allocation may fall back
 * to any node in the system if the specified node can not
 * hold the requested memory.
 *
 * The allocation is performed from memory region limited by
 * memblock.current_limit if @max_addr == %BOOTMEM_ALLOC_ACCESSIBLE.
 *
 * The memory block is aligned on SMP_CACHE_BYTES if @align == 0.
 *
 * The phys address of allocated boot memory block is converted to virtual and
 * allocated memory is reset to 0.
 *
 * In addition, function sets the min_count to 0 using kmemleak_alloc for
 * allocated boot memory block, so that it is never reported as leaks.
 *
 * RETURNS:
 * Virtual address of allocated memory block on success, NULL on failure.
 */
static void * __init memblock_virt_alloc_internal(
				phys_addr_t size, phys_addr_t align,
				phys_addr_t min_addr, phys_addr_t max_addr,
				int nid)
{
	phys_addr_t alloc;
	void *ptr;

	if (nid == MAX_NUMNODES)
		pr_warn("%s: usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE\n",
			__func__);

	/*
	 * Detect any accidental use of these APIs after slab is ready, as at
	 * this moment memblock may be deinitialized already and its
	 * internal data may be destroyed (after execution of free_all_bootmem)
	 */
	if (WARN_ON_ONCE(slab_is_available()))
		return kzalloc_node(size, GFP_NOWAIT, nid);

	if (!align)
		align = SMP_CACHE_BYTES;

	/* align @size to avoid excessive fragmentation on reserved array */
	size = round_up(size, align);

again:
	alloc = memblock_find_in_range_node(size, align, min_addr, max_addr,
					    nid);
	if (alloc)
		goto done;

	if (nid != NUMA_NO_NODE) {
		alloc = memblock_find_in_range_node(size, align, min_addr,
						    max_addr,  NUMA_NO_NODE);
		if (alloc)
			goto done;
	}

	if (min_addr) {
		min_addr = 0;
		goto again;
	} else {
		goto error;
	}

done:
	memblock_reserve(alloc, size);
	ptr = phys_to_virt(alloc);
	memset(ptr, 0, size);

	/*
	 * The min_count is set to 0 so that bootmem allocated blocks
	 * are never reported as leaks. This is because many of these blocks
	 * are only referred via the physical address which is not
	 * looked up by kmemleak.
	 */
	kmemleak_alloc(ptr, size, 0, 0);

	return ptr;

error:
	return NULL;
}

/**
 * memblock_virt_alloc_try_nid_nopanic - allocate boot memory block
 * @size: size of memory block to be allocated in bytes
 * @align: alignment of the region and block's size
 * @min_addr: the lower bound of the memory region from where the allocation
 *	  is preferred (phys address)
 * @max_addr: the upper bound of the memory region from where the allocation
 *	      is preferred (phys address), or %BOOTMEM_ALLOC_ACCESSIBLE to
 *	      allocate only from memory limited by memblock.current_limit value
 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
 *
 * Public version of _memblock_virt_alloc_try_nid_nopanic() which provides
 * additional debug information (including caller info), if enabled.
 *
 * RETURNS:
 * Virtual address of allocated memory block on success, NULL on failure.
 */
void * __init memblock_virt_alloc_try_nid_nopanic(
				phys_addr_t size, phys_addr_t align,
				phys_addr_t min_addr, phys_addr_t max_addr,
				int nid)
{
	memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n",
		     __func__, (u64)size, (u64)align, nid, (u64)min_addr,
		     (u64)max_addr, (void *)_RET_IP_);
	return memblock_virt_alloc_internal(size, align, min_addr,
					     max_addr, nid);
}

/**
 * memblock_virt_alloc_try_nid - allocate boot memory block with panicking
 * @size: size of memory block to be allocated in bytes
 * @align: alignment of the region and block's size
 * @min_addr: the lower bound of the memory region from where the allocation
 *	  is preferred (phys address)
 * @max_addr: the upper bound of the memory region from where the allocation
 *	      is preferred (phys address), or %BOOTMEM_ALLOC_ACCESSIBLE to
 *	      allocate only from memory limited by memblock.current_limit value
 * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
 *
 * Public panicking version of _memblock_virt_alloc_try_nid_nopanic()
 * which provides debug information (including caller info), if enabled,
 * and panics if the request can not be satisfied.
 *
 * RETURNS:
 * Virtual address of allocated memory block on success, NULL on failure.
 */
void * __init memblock_virt_alloc_try_nid(
			phys_addr_t size, phys_addr_t align,
			phys_addr_t min_addr, phys_addr_t max_addr,
			int nid)
{
	void *ptr;

	memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx %pF\n",
		     __func__, (u64)size, (u64)align, nid, (u64)min_addr,
		     (u64)max_addr, (void *)_RET_IP_);
	ptr = memblock_virt_alloc_internal(size, align,
					   min_addr, max_addr, nid);
	if (ptr)
		return ptr;

	panic("%s: Failed to allocate %llu bytes align=0x%llx nid=%d from=0x%llx max_addr=0x%llx\n",
	      __func__, (u64)size, (u64)align, nid, (u64)min_addr,
	      (u64)max_addr);
	return NULL;
}

/**
 * __memblock_free_early - free boot memory block
 * @base: phys starting address of the  boot memory block
 * @size: size of the boot memory block in bytes
 *
 * Free boot memory block previously allocated by memblock_virt_alloc_xx() API.
 * The freeing memory will not be released to the buddy allocator.
 */
void __init __memblock_free_early(phys_addr_t base, phys_addr_t size)
{
	memblock_dbg("%s: [%#016llx-%#016llx] %pF\n",
		     __func__, (u64)base, (u64)base + size - 1,
		     (void *)_RET_IP_);
	kmemleak_free_part(__va(base), size);
	__memblock_remove(&memblock.reserved, base, size);
}

/*
 * __memblock_free_late - free bootmem block pages directly to buddy allocator
 * @addr: phys starting address of the  boot memory block
 * @size: size of the boot memory block in bytes
 *
 * This is only useful when the bootmem allocator has already been torn
 * down, but we are still initializing the system.  Pages are released directly
 * to the buddy allocator, no bootmem metadata is updated because it is gone.
 */
void __init __memblock_free_late(phys_addr_t base, phys_addr_t size)
{
	u64 cursor, end;

	memblock_dbg("%s: [%#016llx-%#016llx] %pF\n",
		     __func__, (u64)base, (u64)base + size - 1,
		     (void *)_RET_IP_);
	kmemleak_free_part(__va(base), size);
	cursor = PFN_UP(base);
	end = PFN_DOWN(base + size);

	for (; cursor < end; cursor++) {
		__free_pages_bootmem(pfn_to_page(cursor), 0);
		totalram_pages++;
	}
}
1237
1238
1239
1240
1241

/*
 * Remaining API functions
 */

1242
phys_addr_t __init memblock_phys_mem_size(void)
Yinghai Lu's avatar
Yinghai Lu committed
1243
{
1244
	return memblock.memory.total_size;
Yinghai Lu's avatar
Yinghai Lu committed
1245
1246
}

Yinghai Lu's avatar
Yinghai Lu committed
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
phys_addr_t __init memblock_mem_size(unsigned long limit_pfn)
{
	unsigned long pages = 0;
	struct memblock_region *r;
	unsigned long start_pfn, end_pfn;

	for_each_memblock(memory, r) {
		start_pfn = memblock_region_memory_base_pfn(r);
		end_pfn = memblock_region_memory_end_pfn(r);
		start_pfn = min_t(unsigned long, start_pfn, limit_pfn);
		end_pfn = min_t(unsigned long, end_pfn, limit_pfn);
		pages += end_pfn - start_pfn;
	}

	return (phys_addr_t)pages << PAGE_SHIFT;
}

1264
1265
1266
1267
1268
1269
/* lowest address */
phys_addr_t __init_memblock memblock_start_of_DRAM(void)
{
	return memblock.memory.regions[0].base;
}

1270
phys_addr_t __init_memblock memblock_end_of_DRAM(void)
Yinghai Lu's avatar
Yinghai Lu committed
1271
1272
1273
{
	int idx = memblock.memory.cnt - 1;

1274
	return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
Yinghai Lu's avatar
Yinghai Lu committed
1275
1276
}

1277
void __init memblock_enforce_memory_limit(phys_addr_t limit)
Yinghai Lu's avatar
Yinghai Lu committed
1278
1279
{
	unsigned long i;
1280
	phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
Yinghai Lu's avatar
Yinghai Lu committed
1281

1282
	if (!limit)
Yinghai Lu's avatar
Yinghai Lu committed
1283
1284
		return;

1285
	/* find out max address */
Yinghai Lu's avatar
Yinghai Lu committed
1286
	for (i = 0; i < memblock.memory.cnt; i++) {
1287
		struct memblock_region *r = &memblock.memory.regions[i];
Yinghai Lu's avatar
Yinghai Lu committed
1288

1289
1290
1291
		if (limit <= r->size) {
			max_addr = r->base + limit;
			break;
Yinghai Lu's avatar
Yinghai Lu committed
1292
		}
1293
		limit -= r->size;
Yinghai Lu's avatar
Yinghai Lu committed
1294
	}
1295
1296
1297
1298

	/* truncate both memory and reserved regions */
	__memblock_remove(&memblock.memory, max_addr, (phys_addr_t)ULLONG_MAX);
	__memblock_remove(&memblock.reserved, max_addr, (phys_addr_t)ULLONG_MAX);
Yinghai Lu's avatar
Yinghai Lu committed
1299
1300
}

1301
static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
{
	unsigned int left = 0, right = type->cnt;

	do {
		unsigned int mid = (right + left) / 2;

		if (addr < type->regions[mid].base)
			right = mid;
		else if (addr >= (type->regions[mid].base +
				  type->regions[mid].size))
			left = mid + 1;
		else
			return mid;
	} while (left < right);
	return -1;
}

1319
int __init memblock_is_reserved(phys_addr_t addr)
Yinghai Lu's avatar
Yinghai Lu committed
1320
{
1321
1322
	return memblock_search(&memblock.reserved, addr) != -1;
}
Yinghai Lu's avatar
Yinghai Lu committed
1323

1324
int __init_memblock memblock_is_memory(phys_addr_t addr)
1325
1326
1327
1328
{
	return memblock_search(&memblock.memory, addr) != -1;
}

1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
			 unsigned long *start_pfn, unsigned long *end_pfn)
{
	struct memblock_type *type = &memblock.memory;
	int mid = memblock_search(type, (phys_addr_t)pfn << PAGE_SHIFT);

	if (mid == -1)
		return -1;

	*start_pfn = type->regions[mid].base >> PAGE_SHIFT;
	*end_pfn = (type->regions[mid].base + type->regions[mid].size)
			>> PAGE_SHIFT;

	return type->regions[mid].nid;
}
#endif

1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
/**
 * memblock_is_region_memory - check if a region is a subset of memory
 * @base: base of region to check
 * @size: size of region to check
 *
 * Check if the region [@base, @base+@size) is a subset of a memory block.
 *
 * RETURNS:
 * 0 if false, non-zero if true
 */
1357
int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
1358
{
1359
	int idx = memblock_search(&memblock.memory, base);
1360
	phys_addr_t end = base + memblock_cap_size(base, &size);
1361
1362
1363

	if (idx == -1)
		return 0;
1364
1365
	return memblock.memory.regions[idx].base <= base &&
		(memblock.memory.regions[idx].base +
1366
		 memblock.memory.regions[idx].size) >= end;
Yinghai Lu's avatar
Yinghai Lu committed
1367
1368
}

1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
/**
 * memblock_is_region_reserved - check if a region intersects reserved memory
 * @base: base of region to check
 * @size: size of region to check
 *
 * Check if the region [@base, @base+@size) intersects a reserved memory block.
 *
 * RETURNS:
 * 0 if false, non-zero if true
 */
1379
int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
Yinghai Lu's avatar
Yinghai Lu committed
1380
{