Commit c61afb18 authored by Paul Jackson's avatar Paul Jackson Committed by Linus Torvalds
Browse files

[PATCH] cpuset memory spread slab cache optimizations

The hooks in the slab cache allocator code path for support of NUMA
mempolicies and cpuset memory spreading are in an important code path.  Many
systems will use neither feature.

This patch optimizes those hooks down to a single check of some bits in the
current tasks task_struct flags.  For non NUMA systems, this hook and related
code is already ifdef'd out.

The optimization is done by using another task flag, set if the task is using
a non-default NUMA mempolicy.  Taking this flag bit along with the
PF_SPREAD_PAGE and PF_SPREAD_SLAB flag bits added earlier in this 'cpuset
memory spreading' patch set, one can check for the combination of any of these
special case memory placement mechanisms with a single test of the current
tasks task_struct flags.

This patch also tightens up the code, to save a few bytes of kernel text
space, and moves some of it out of line.  Due to the nested inlines called
from multiple places, we were ending up with three copies of this code, which
once we get off the main code path (for local node allocation) seems a bit
wasteful of instruction memory.

Signed-off-by: default avatarPaul Jackson <>
Signed-off-by: default avatarAndrew Morton <>
Signed-off-by: default avatarLinus Torvalds <>
parent 101a5001
......@@ -147,6 +147,7 @@ extern void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *new);
extern void mpol_rebind_task(struct task_struct *tsk,
const nodemask_t *new);
extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
extern void mpol_fix_fork_child_flag(struct task_struct *p);
#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
......@@ -248,6 +249,10 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
static inline void mpol_fix_fork_child_flag(struct task_struct *p)
#define set_cpuset_being_rebound(x) do {} while (0)
static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
......@@ -932,6 +932,7 @@ static inline void put_task_struct(struct task_struct *t)
#define PF_SWAPWRITE 0x01000000 /* Allowed to write to swap */
#define PF_SPREAD_PAGE 0x04000000 /* Spread page cache over cpuset */
#define PF_SPREAD_SLAB 0x08000000 /* Spread some slab caches over cpuset */
#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
* Only the _current_ task can read/write to tsk->flags, but other
......@@ -1021,6 +1021,7 @@ static task_t *copy_process(unsigned long clone_flags,
p->mempolicy = NULL;
goto bad_fork_cleanup_cpuset;
......@@ -422,6 +422,37 @@ static int contextualize_policy(int mode, nodemask_t *nodes)
return mpol_check_policy(mode, nodes);
* Update task->flags PF_MEMPOLICY bit: set iff non-default
* mempolicy. Allows more rapid checking of this (combined perhaps
* with other PF_* flag bits) on memory allocation hot code paths.
* If called from outside this file, the task 'p' should -only- be
* a newly forked child not yet visible on the task list, because
* manipulating the task flags of a visible task is not safe.
* The above limitation is why this routine has the funny name
* mpol_fix_fork_child_flag().
* It is also safe to call this with a task pointer of current,
* which the static wrapper mpol_set_task_struct_flag() does,
* for use within this file.
void mpol_fix_fork_child_flag(struct task_struct *p)
if (p->mempolicy)
p->flags |= PF_MEMPOLICY;
p->flags &= ~PF_MEMPOLICY;
static void mpol_set_task_struct_flag(void)
/* Set the process memory policy */
long do_set_mempolicy(int mode, nodemask_t *nodes)
......@@ -434,6 +465,7 @@ long do_set_mempolicy(int mode, nodemask_t *nodes)
return PTR_ERR(new);
current->mempolicy = new;
if (new && new->policy == MPOL_INTERLEAVE)
current->il_next = first_node(new->v.nodes);
return 0;
......@@ -899,6 +899,7 @@ static struct array_cache *alloc_arraycache(int node, int entries,
static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int);
static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
static struct array_cache **alloc_alien_cache(int node, int limit)
......@@ -2808,19 +2809,11 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
struct array_cache *ac;
if (unlikely(current->mempolicy && !in_interrupt())) {
int nid = slab_node(current->mempolicy);
if (nid != numa_node_id())
return __cache_alloc_node(cachep, flags, nid);
if (unlikely(cpuset_do_slab_mem_spread() &&
(cachep->flags & SLAB_MEM_SPREAD) &&
!in_interrupt())) {
int nid = cpuset_mem_spread_node();
if (nid != numa_node_id())
return __cache_alloc_node(cachep, flags, nid);
if (unlikely(current->flags & (PF_SPREAD_PAGE | PF_SPREAD_SLAB |
objp = alternate_node_alloc(cachep, flags);
if (objp != NULL)
return objp;
......@@ -2855,6 +2848,28 @@ static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
* Try allocating on another node if PF_SPREAD_PAGE|PF_SPREAD_SLAB|PF_MEMPOLICY.
* If we are in_interrupt, then process context, including cpusets and
* mempolicy, may not apply and should not be used for allocation policy.
static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
int nid_alloc, nid_here;
if (in_interrupt())
return NULL;
nid_alloc = nid_here = numa_node_id();
if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
nid_alloc = cpuset_mem_spread_node();
else if (current->mempolicy)
nid_alloc = slab_node(current->mempolicy);
if (nid_alloc != nid_here)
return __cache_alloc_node(cachep, flags, nid_alloc);
return NULL;
* A interface to enable slab creation on nodeid
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment