Commit d0dc12e8 authored by Pavel Tatashin's avatar Pavel Tatashin Committed by Linus Torvalds
Browse files

mm/memory_hotplug: optimize memory hotplug

During memory hotplugging we traverse struct pages three times:

1. memset(0) in sparse_add_one_section()
2. loop in __add_section() to set do: set_page_node(page, nid); and
3. loop in memmap_init_zone() to call __init_single_pfn()

This patch removes the first two loops, and leaves only loop 3.  All
struct pages are initialized in one place, the same as it is done during

The benefits:

 - We improve memory hotplug performance because we are not evicting the
   cache several times and also reduce loop branching overhead.

 - Remove condition from hotpath in __init_single_pfn(), that was added
   in order to fix the problem that was reported by Bharata in the above
   email thread, thus also improve performance during normal boot.

 - Make memory hotplug more similar to the boot memory initialization
   path because we zero and initialize struct pages only in one

 - Simplifies memory hotplug struct page initialization code, and thus
   enables future improvements, such as multi-threading the
   initialization of struct pages in order to improve hotplug
   performance even further on larger machines.

[ v5]

Signed-off-by: default avatarPavel Tatashin <>
Reviewed-by: default avatarIngo Molnar <>
Cc: Michal Hocko <>
Cc: Baoquan He <>
Cc: Bharata B Rao <>
Cc: Daniel Jordan <>
Cc: Dan Williams <>
Cc: Greg Kroah-Hartman <>
Cc: "H. Peter Anvin" <>
Cc: Kirill A. Shutemov <>
Cc: Mel Gorman <>
Cc: Steven Sistare <>
Cc: Thomas Gleixner <>
Cc: Vlastimil Babka <>
Signed-off-by: default avatarAndrew Morton <>
Signed-off-by: default avatarLinus Torvalds <>
parent fc44f7f9
......@@ -407,6 +407,8 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, int nid,
if (!mem_blk)
return -EFAULT;
mem_blk->nid = nid;
if (!node_online(nid))
return 0;
......@@ -33,6 +33,7 @@ struct memory_block {
void *hw; /* optional pointer to fw/hw data */
int (*phys_callback)(struct memory_block *);
struct device dev;
int nid; /* NID for this memory block */
int arch_get_memory_phys_device(unsigned long start_pfn);
......@@ -250,7 +250,6 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
struct vmem_altmap *altmap, bool want_memblock)
int ret;
int i;
if (pfn_valid(phys_start_pfn))
return -EEXIST;
......@@ -259,23 +258,6 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
if (ret < 0)
return ret;
* Make all the pages reserved so that nobody will stumble over half
* initialized state.
* FIXME: We also have to associate it with a node because page_to_nid
* relies on having page with the proper node.
for (i = 0; i < PAGES_PER_SECTION; i++) {
unsigned long pfn = phys_start_pfn + i;
struct page *page;
if (!pfn_valid(pfn))
page = pfn_to_page(pfn);
set_page_node(page, nid);
if (!want_memblock)
return 0;
......@@ -908,8 +890,15 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
int nid;
int ret;
struct memory_notify arg;
struct memory_block *mem;
* We can't use pfn_to_nid() because nid might be stored in struct page
* which is not yet initialized. Instead, we find nid from memory block.
mem = find_memory_block(__pfn_to_section(pfn));
nid = mem->nid;
nid = pfn_to_nid(pfn);
/* associate pfn range with the zone */
zone = move_pfn_range(online_type, nid, pfn, nr_pages);
......@@ -1143,10 +1143,9 @@ static void free_one_page(struct zone *zone,
static void __meminit __init_single_page(struct page *page, unsigned long pfn,
unsigned long zone, int nid, bool zero)
unsigned long zone, int nid)
if (zero)
set_page_links(page, zone, nid, pfn);
......@@ -1160,12 +1159,6 @@ static void __meminit __init_single_page(struct page *page, unsigned long pfn,
static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,
int nid, bool zero)
return __init_single_page(pfn_to_page(pfn), pfn, zone, nid, zero);
static void __meminit init_reserved_page(unsigned long pfn)
......@@ -1184,7 +1177,7 @@ static void __meminit init_reserved_page(unsigned long pfn)
if (pfn >= zone->zone_start_pfn && pfn < zone_end_pfn(zone))
__init_single_pfn(pfn, zid, nid, true);
__init_single_page(pfn_to_page(pfn), pfn, zid, nid);
static inline void init_reserved_page(unsigned long pfn)
......@@ -1501,7 +1494,7 @@ static unsigned long __init deferred_init_pages(int nid, int zid,
} else {
__init_single_page(page, pfn, zid, nid, true);
__init_single_page(page, pfn, zid, nid);
return (nr_pages);
......@@ -5434,6 +5427,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
pg_data_t *pgdat = NODE_DATA(nid);
unsigned long pfn;
unsigned long nr_initialised = 0;
struct page *page;
struct memblock_region *r = NULL, *tmp;
......@@ -5486,6 +5480,11 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
page = pfn_to_page(pfn);
__init_single_page(page, pfn, zone, nid);
if (context == MEMMAP_HOTPLUG)
* Mark the block movable so that blocks are reserved for
* movable at startup. This will force kernel allocations
......@@ -5502,15 +5501,8 @@ not_early:
* because this is done early in sparse_add_one_section
if (!(pfn & (pageblock_nr_pages - 1))) {
struct page *page = pfn_to_page(pfn);
__init_single_page(page, pfn, zone, nid,
context != MEMMAP_HOTPLUG);
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
} else {
__init_single_pfn(pfn, zone, nid,
context != MEMMAP_HOTPLUG);
......@@ -779,7 +779,13 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
goto out;
memset(memmap, 0, sizeof(struct page) * PAGES_PER_SECTION);
* Poison uninitialized struct pages in order to catch invalid flags
* combinations.
memset(memmap, PAGE_POISON_PATTERN, sizeof(struct page) * PAGES_PER_SECTION);
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment