Commit 385386cf authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds
Browse files

mm: vmstat: move slab statistics from zone to node counters

Patch series "mm: per-lruvec slab stats"

Josef is working on a new approach to balancing slab caches and the page
cache.  For this to work, he needs slab cache statistics on the lruvec
level.  These patches implement that by adding infrastructure that
allows updating and reading generic VM stat items per lruvec, then
switches some existing VM accounting sites, including the slab
accounting ones, to this new cgroup-aware API.

I'll follow up with more patches on this, because there is actually
substantial simplification that can be done to the memory controller
when we replace private memcg accounting with making the existing VM
accounting sites cgroup-aware.  But this is enough for Josef to base his
slab reclaim work on, so here goes.

This patch (of 5):

To re-implement slab cache vs.  page cache balancing, we'll need the
slab counters at the lruvec level, which, ever since lru reclaim was
moved from the zone to the node, is the intersection of the node, not
the zone, and the memcg.

We could retain the per-zone counters for when the page allocator dumps
its memory information on failures, and have counters on both levels -
which on all but NUMA node 0 is usually redundant.  But let's keep it
simple for now and just move them.  If anybody complains we can restore
the per-zone counters.

[hannes@cmpxchg.org: fix oops]
  Link: http://lkml.kernel.org/r/20170605183511.GA8915@cmpxchg.org
Link: http://lkml.kernel.org/r/20170530181724.27197-3-hannes@cmpxchg.org


Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 2b2695f5
...@@ -129,11 +129,11 @@ static ssize_t node_read_meminfo(struct device *dev, ...@@ -129,11 +129,11 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)), nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
nid, K(sum_zone_node_page_state(nid, NR_SLAB_RECLAIMABLE) + nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE) +
sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)), node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)),
nid, K(sum_zone_node_page_state(nid, NR_SLAB_RECLAIMABLE)), nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE)),
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
nid, K(sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)), nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)),
nid, K(node_page_state(pgdat, NR_ANON_THPS) * nid, K(node_page_state(pgdat, NR_ANON_THPS) *
HPAGE_PMD_NR), HPAGE_PMD_NR),
nid, K(node_page_state(pgdat, NR_SHMEM_THPS) * nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
...@@ -141,7 +141,7 @@ static ssize_t node_read_meminfo(struct device *dev, ...@@ -141,7 +141,7 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) * nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
HPAGE_PMD_NR)); HPAGE_PMD_NR));
#else #else
nid, K(sum_zone_node_page_state(nid, NR_SLAB_UNRECLAIMABLE))); nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)));
#endif #endif
n += hugetlb_report_node_meminfo(nid, buf + n); n += hugetlb_report_node_meminfo(nid, buf + n);
return n; return n;
......
...@@ -125,8 +125,6 @@ enum zone_stat_item { ...@@ -125,8 +125,6 @@ enum zone_stat_item {
NR_ZONE_UNEVICTABLE, NR_ZONE_UNEVICTABLE,
NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
NR_MLOCK, /* mlock()ed pages found and moved off LRU */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_SLAB_RECLAIMABLE,
NR_SLAB_UNRECLAIMABLE,
NR_PAGETABLE, /* used for pagetables */ NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */ NR_KERNEL_STACK_KB, /* measured in KiB */
/* Second 128 byte cacheline */ /* Second 128 byte cacheline */
...@@ -152,6 +150,8 @@ enum node_stat_item { ...@@ -152,6 +150,8 @@ enum node_stat_item {
NR_INACTIVE_FILE, /* " " " " " */ NR_INACTIVE_FILE, /* " " " " " */
NR_ACTIVE_FILE, /* " " " " " */ NR_ACTIVE_FILE, /* " " " " " */
NR_UNEVICTABLE, /* " " " " " */ NR_UNEVICTABLE, /* " " " " " */
NR_SLAB_RECLAIMABLE,
NR_SLAB_UNRECLAIMABLE,
NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
WORKINGSET_REFAULT, WORKINGSET_REFAULT,
......
...@@ -4643,8 +4643,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) ...@@ -4643,8 +4643,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" present:%lukB" " present:%lukB"
" managed:%lukB" " managed:%lukB"
" mlocked:%lukB" " mlocked:%lukB"
" slab_reclaimable:%lukB"
" slab_unreclaimable:%lukB"
" kernel_stack:%lukB" " kernel_stack:%lukB"
" pagetables:%lukB" " pagetables:%lukB"
" bounce:%lukB" " bounce:%lukB"
...@@ -4666,8 +4664,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) ...@@ -4666,8 +4664,6 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone->present_pages), K(zone->present_pages),
K(zone->managed_pages), K(zone->managed_pages),
K(zone_page_state(zone, NR_MLOCK)), K(zone_page_state(zone, NR_MLOCK)),
K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
zone_page_state(zone, NR_KERNEL_STACK_KB), zone_page_state(zone, NR_KERNEL_STACK_KB),
K(zone_page_state(zone, NR_PAGETABLE)), K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)), K(zone_page_state(zone, NR_BOUNCE)),
...@@ -5153,6 +5149,7 @@ static void build_zonelists(pg_data_t *pgdat) ...@@ -5153,6 +5149,7 @@ static void build_zonelists(pg_data_t *pgdat)
*/ */
static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch); static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch);
static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset); static DEFINE_PER_CPU(struct per_cpu_pageset, boot_pageset);
static DEFINE_PER_CPU(struct per_cpu_nodestat, boot_nodestats);
static void setup_zone_pageset(struct zone *zone); static void setup_zone_pageset(struct zone *zone);
/* /*
...@@ -6053,6 +6050,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat) ...@@ -6053,6 +6050,8 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
spin_lock_init(&pgdat->lru_lock); spin_lock_init(&pgdat->lru_lock);
lruvec_init(node_lruvec(pgdat)); lruvec_init(node_lruvec(pgdat));
pgdat->per_cpu_nodestats = &boot_nodestats;
for (j = 0; j < MAX_NR_ZONES; j++) { for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j; struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize, freesize, memmap_pages; unsigned long size, realsize, freesize, memmap_pages;
......
...@@ -1425,10 +1425,10 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, ...@@ -1425,10 +1425,10 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
nr_pages = (1 << cachep->gfporder); nr_pages = (1 << cachep->gfporder);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT) if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
add_zone_page_state(page_zone(page), add_node_page_state(page_pgdat(page),
NR_SLAB_RECLAIMABLE, nr_pages); NR_SLAB_RECLAIMABLE, nr_pages);
else else
add_zone_page_state(page_zone(page), add_node_page_state(page_pgdat(page),
NR_SLAB_UNRECLAIMABLE, nr_pages); NR_SLAB_UNRECLAIMABLE, nr_pages);
__SetPageSlab(page); __SetPageSlab(page);
...@@ -1459,10 +1459,10 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page) ...@@ -1459,10 +1459,10 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
kmemcheck_free_shadow(page, order); kmemcheck_free_shadow(page, order);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT) if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
sub_zone_page_state(page_zone(page), sub_node_page_state(page_pgdat(page),
NR_SLAB_RECLAIMABLE, nr_freed); NR_SLAB_RECLAIMABLE, nr_freed);
else else
sub_zone_page_state(page_zone(page), sub_node_page_state(page_pgdat(page),
NR_SLAB_UNRECLAIMABLE, nr_freed); NR_SLAB_UNRECLAIMABLE, nr_freed);
BUG_ON(!PageSlab(page)); BUG_ON(!PageSlab(page));
......
...@@ -1615,7 +1615,7 @@ out: ...@@ -1615,7 +1615,7 @@ out:
if (!page) if (!page)
return NULL; return NULL;
mod_zone_page_state(page_zone(page), mod_node_page_state(page_pgdat(page),
(s->flags & SLAB_RECLAIM_ACCOUNT) ? (s->flags & SLAB_RECLAIM_ACCOUNT) ?
NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1 << oo_order(oo)); 1 << oo_order(oo));
...@@ -1655,7 +1655,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) ...@@ -1655,7 +1655,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
kmemcheck_free_shadow(page, compound_order(page)); kmemcheck_free_shadow(page, compound_order(page));
mod_zone_page_state(page_zone(page), mod_node_page_state(page_pgdat(page),
(s->flags & SLAB_RECLAIM_ACCOUNT) ? (s->flags & SLAB_RECLAIM_ACCOUNT) ?
NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
-pages); -pages);
......
...@@ -3874,7 +3874,7 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) ...@@ -3874,7 +3874,7 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
* unmapped file backed pages. * unmapped file backed pages.
*/ */
if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages && if (node_pagecache_reclaimable(pgdat) <= pgdat->min_unmapped_pages &&
sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages) node_page_state(pgdat, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages)
return NODE_RECLAIM_FULL; return NODE_RECLAIM_FULL;
/* /*
......
...@@ -928,8 +928,6 @@ const char * const vmstat_text[] = { ...@@ -928,8 +928,6 @@ const char * const vmstat_text[] = {
"nr_zone_unevictable", "nr_zone_unevictable",
"nr_zone_write_pending", "nr_zone_write_pending",
"nr_mlock", "nr_mlock",
"nr_slab_reclaimable",
"nr_slab_unreclaimable",
"nr_page_table_pages", "nr_page_table_pages",
"nr_kernel_stack", "nr_kernel_stack",
"nr_bounce", "nr_bounce",
...@@ -952,6 +950,8 @@ const char * const vmstat_text[] = { ...@@ -952,6 +950,8 @@ const char * const vmstat_text[] = {
"nr_inactive_file", "nr_inactive_file",
"nr_active_file", "nr_active_file",
"nr_unevictable", "nr_unevictable",
"nr_slab_reclaimable",
"nr_slab_unreclaimable",
"nr_isolated_anon", "nr_isolated_anon",
"nr_isolated_file", "nr_isolated_file",
"workingset_refault", "workingset_refault",
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment