slab_common.c 34.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
/*
 * Slab allocator functions that are independent of the allocator strategy
 *
 * (C) 2012 Christoph Lameter <cl@linux.com>
 */
#include <linux/slab.h>

#include <linux/mm.h>
#include <linux/poison.h>
#include <linux/interrupt.h>
#include <linux/memory.h>
#include <linux/compiler.h>
#include <linux/module.h>
14
15
#include <linux/cpu.h>
#include <linux/uaccess.h>
16
17
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
18
19
20
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/page.h>
21
#include <linux/memcontrol.h>
22
23

#define CREATE_TRACE_POINTS
24
#include <trace/events/kmem.h>
25

26
27
28
#include "slab.h"

enum slab_state slab_state;
29
30
LIST_HEAD(slab_caches);
DEFINE_MUTEX(slab_mutex);
31
struct kmem_cache *kmem_cache;
32

33
34
35
36
37
static LIST_HEAD(slab_caches_to_rcu_destroy);
static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
		    slab_caches_to_rcu_destroy_workfn);

38
39
40
41
42
/*
 * Set of flags that will prevent slab merging
 */
#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
Alexander Potapenko's avatar
Alexander Potapenko committed
43
		SLAB_FAILSLAB | SLAB_KASAN)
44

Vladimir Davydov's avatar
Vladimir Davydov committed
45
46
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
			 SLAB_NOTRACK | SLAB_ACCOUNT)
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

/*
 * Merge control. If this is set then no merging of slab caches will occur.
 * (Could be removed. This was introduced to pacify the merge skeptics.)
 */
static int slab_nomerge;

static int __init setup_slab_nomerge(char *str)
{
	slab_nomerge = 1;
	return 1;
}

#ifdef CONFIG_SLUB
__setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
#endif

__setup("slab_nomerge", setup_slab_nomerge);

66
67
68
69
70
71
72
73
74
/*
 * Determine the size of a slab object
 */
unsigned int kmem_cache_size(struct kmem_cache *s)
{
	return s->object_size;
}
EXPORT_SYMBOL(kmem_cache_size);

75
#ifdef CONFIG_DEBUG_VM
76
static int kmem_cache_sanity_check(const char *name, size_t size)
77
78
79
80
81
{
	struct kmem_cache *s = NULL;

	if (!name || in_interrupt() || size < sizeof(void *) ||
		size > KMALLOC_MAX_SIZE) {
82
83
		pr_err("kmem_cache_create(%s) integrity check failed\n", name);
		return -EINVAL;
84
	}
85

86
87
88
89
90
91
92
93
94
95
96
	list_for_each_entry(s, &slab_caches, list) {
		char tmp;
		int res;

		/*
		 * This happens when the module gets unloaded and doesn't
		 * destroy its slab cache and no-one else reuses the vmalloc
		 * area of the module.  Print a warning.
		 */
		res = probe_kernel_address(s->name, tmp);
		if (res) {
97
			pr_err("Slab cache with size %d has lost its name\n",
98
99
100
101
102
103
			       s->object_size);
			continue;
		}
	}

	WARN_ON(strchr(name, ' '));	/* It confuses parsers */
104
105
106
	return 0;
}
#else
107
static inline int kmem_cache_sanity_check(const char *name, size_t size)
108
109
110
{
	return 0;
}
111
112
#endif

113
114
115
116
void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
{
	size_t i;

117
118
119
120
121
122
	for (i = 0; i < nr; i++) {
		if (s)
			kmem_cache_free(s, p[i]);
		else
			kfree(p[i]);
	}
123
124
}

125
int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
126
127
128
129
130
131
132
133
								void **p)
{
	size_t i;

	for (i = 0; i < nr; i++) {
		void *x = p[i] = kmem_cache_alloc(s, flags);
		if (!x) {
			__kmem_cache_free_bulk(s, i, p);
134
			return 0;
135
136
		}
	}
137
	return i;
138
139
}

140
#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
141
142
143

LIST_HEAD(slab_root_caches);

144
void slab_init_memcg_params(struct kmem_cache *s)
145
{
146
	s->memcg_params.root_cache = NULL;
147
	RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
148
	INIT_LIST_HEAD(&s->memcg_params.children);
149
150
151
152
153
154
}

static int init_memcg_params(struct kmem_cache *s,
		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
{
	struct memcg_cache_array *arr;
155

156
	if (root_cache) {
157
		s->memcg_params.root_cache = root_cache;
158
159
		s->memcg_params.memcg = memcg;
		INIT_LIST_HEAD(&s->memcg_params.children_node);
160
		INIT_LIST_HEAD(&s->memcg_params.kmem_caches_node);
161
		return 0;
162
	}
163

164
	slab_init_memcg_params(s);
165

166
167
	if (!memcg_nr_cache_ids)
		return 0;
168

169
170
171
172
173
	arr = kzalloc(sizeof(struct memcg_cache_array) +
		      memcg_nr_cache_ids * sizeof(void *),
		      GFP_KERNEL);
	if (!arr)
		return -ENOMEM;
174

175
	RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
176
177
178
	return 0;
}

179
static void destroy_memcg_params(struct kmem_cache *s)
180
{
181
182
	if (is_root_cache(s))
		kfree(rcu_access_pointer(s->memcg_params.memcg_caches));
183
184
}

185
static int update_memcg_params(struct kmem_cache *s, int new_array_size)
186
{
187
	struct memcg_cache_array *old, *new;
188

189
190
191
	new = kzalloc(sizeof(struct memcg_cache_array) +
		      new_array_size * sizeof(void *), GFP_KERNEL);
	if (!new)
192
193
		return -ENOMEM;

194
195
196
197
198
	old = rcu_dereference_protected(s->memcg_params.memcg_caches,
					lockdep_is_held(&slab_mutex));
	if (old)
		memcpy(new->entries, old->entries,
		       memcg_nr_cache_ids * sizeof(void *));
199

200
201
202
	rcu_assign_pointer(s->memcg_params.memcg_caches, new);
	if (old)
		kfree_rcu(old, rcu);
203
204
205
	return 0;
}

206
207
208
209
210
int memcg_update_all_caches(int num_memcgs)
{
	struct kmem_cache *s;
	int ret = 0;

211
	mutex_lock(&slab_mutex);
212
	list_for_each_entry(s, &slab_root_caches, root_caches_node) {
213
		ret = update_memcg_params(s, num_memcgs);
214
215
216
217
218
		/*
		 * Instead of freeing the memory, we'll just leave the caches
		 * up to this point in an updated state.
		 */
		if (ret)
219
			break;
220
221
222
223
	}
	mutex_unlock(&slab_mutex);
	return ret;
}
224

225
void memcg_link_cache(struct kmem_cache *s)
226
{
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
	if (is_root_cache(s)) {
		list_add(&s->root_caches_node, &slab_root_caches);
	} else {
		list_add(&s->memcg_params.children_node,
			 &s->memcg_params.root_cache->memcg_params.children);
		list_add(&s->memcg_params.kmem_caches_node,
			 &s->memcg_params.memcg->kmem_caches);
	}
}

static void memcg_unlink_cache(struct kmem_cache *s)
{
	if (is_root_cache(s)) {
		list_del(&s->root_caches_node);
	} else {
		list_del(&s->memcg_params.children_node);
		list_del(&s->memcg_params.kmem_caches_node);
	}
245
}
246
#else
247
248
static inline int init_memcg_params(struct kmem_cache *s,
		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
249
250
251
252
{
	return 0;
}

253
static inline void destroy_memcg_params(struct kmem_cache *s)
254
255
{
}
256

257
static inline void memcg_unlink_cache(struct kmem_cache *s)
258
259
{
}
260
#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
261

262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
/*
 * Find a mergeable slab cache
 */
int slab_unmergeable(struct kmem_cache *s)
{
	if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
		return 1;

	if (!is_root_cache(s))
		return 1;

	if (s->ctor)
		return 1;

	/*
	 * We may have set a slab to be unmergeable during bootstrap.
	 */
	if (s->refcount < 0)
		return 1;

	return 0;
}

struct kmem_cache *find_mergeable(size_t size, size_t align,
		unsigned long flags, const char *name, void (*ctor)(void *))
{
	struct kmem_cache *s;

290
	if (slab_nomerge)
291
292
293
294
295
296
297
298
299
300
		return NULL;

	if (ctor)
		return NULL;

	size = ALIGN(size, sizeof(void *));
	align = calculate_alignment(flags, align, size);
	size = ALIGN(size, align);
	flags = kmem_cache_flags(size, flags, name, NULL);

301
302
303
	if (flags & SLAB_NEVER_MERGE)
		return NULL;

304
	list_for_each_entry_reverse(s, &slab_root_caches, root_caches_node) {
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
		if (slab_unmergeable(s))
			continue;

		if (size > s->size)
			continue;

		if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
			continue;
		/*
		 * Check if alignment is compatible.
		 * Courtesy of Adrian Drzewiecki
		 */
		if ((s->size & ~(align - 1)) != s->size)
			continue;

		if (s->size - size >= sizeof(void *))
			continue;

323
324
325
326
		if (IS_ENABLED(CONFIG_SLAB) && align &&
			(align > s->align || s->align % align))
			continue;

327
328
329
330
331
		return s;
	}
	return NULL;
}

332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
/*
 * Figure out what the alignment of the objects will be given a set of
 * flags, a user specified alignment and the size of the objects.
 */
unsigned long calculate_alignment(unsigned long flags,
		unsigned long align, unsigned long size)
{
	/*
	 * If the user wants hardware cache aligned objects then follow that
	 * suggestion if the object is sufficiently large.
	 *
	 * The hardware cache alignment cannot override the specified
	 * alignment though. If that is greater then use it.
	 */
	if (flags & SLAB_HWCACHE_ALIGN) {
		unsigned long ralign = cache_line_size();
		while (size <= ralign / 2)
			ralign /= 2;
		align = max(align, ralign);
	}

	if (align < ARCH_SLAB_MINALIGN)
		align = ARCH_SLAB_MINALIGN;

	return ALIGN(align, sizeof(void *));
}

359
360
361
362
static struct kmem_cache *create_cache(const char *name,
		size_t object_size, size_t size, size_t align,
		unsigned long flags, void (*ctor)(void *),
		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
{
	struct kmem_cache *s;
	int err;

	err = -ENOMEM;
	s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
	if (!s)
		goto out;

	s->name = name;
	s->object_size = object_size;
	s->size = size;
	s->align = align;
	s->ctor = ctor;

378
	err = init_memcg_params(s, memcg, root_cache);
379
380
381
382
383
384
385
386
387
	if (err)
		goto out_free_cache;

	err = __kmem_cache_create(s, flags);
	if (err)
		goto out_free_cache;

	s->refcount = 1;
	list_add(&s->list, &slab_caches);
388
	memcg_link_cache(s);
389
390
391
392
393
394
out:
	if (err)
		return ERR_PTR(err);
	return s;

out_free_cache:
395
	destroy_memcg_params(s);
396
	kmem_cache_free(kmem_cache, s);
397
398
	goto out;
}
399

400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
/*
 * kmem_cache_create - Create a cache.
 * @name: A string which is used in /proc/slabinfo to identify this cache.
 * @size: The size of objects to be created in this cache.
 * @align: The required alignment for the objects.
 * @flags: SLAB flags
 * @ctor: A constructor for the objects.
 *
 * Returns a ptr to the cache on success, NULL on failure.
 * Cannot be called within a interrupt, but can be interrupted.
 * The @ctor is run when new pages are allocated by the cache.
 *
 * The flags are
 *
 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
 * to catch references to uninitialised memory.
 *
 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
 * for buffer overruns.
 *
 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
 * cacheline.  This can be beneficial if you're counting cycles as closely
 * as davem.
 */
424
struct kmem_cache *
425
426
kmem_cache_create(const char *name, size_t size, size_t align,
		  unsigned long flags, void (*ctor)(void *))
427
{
428
	struct kmem_cache *s = NULL;
429
	const char *cache_name;
430
	int err;
431

432
	get_online_cpus();
433
	get_online_mems();
434
	memcg_get_cache_ids();
435

436
	mutex_lock(&slab_mutex);
437

438
	err = kmem_cache_sanity_check(name, size);
439
	if (err) {
440
		goto out_unlock;
441
	}
442

443
444
445
446
447
448
	/* Refuse requests with allocator specific flags */
	if (flags & ~SLAB_FLAGS_PERMITTED) {
		err = -EINVAL;
		goto out_unlock;
	}

449
450
451
452
453
454
455
	/*
	 * Some allocators will constraint the set of valid flags to a subset
	 * of all flags. We expect them to define CACHE_CREATE_MASK in this
	 * case, and we'll just provide them with a sanitized version of the
	 * passed flags.
	 */
	flags &= CACHE_CREATE_MASK;
456

457
458
	s = __kmem_cache_alias(name, size, align, flags, ctor);
	if (s)
459
		goto out_unlock;
460

461
	cache_name = kstrdup_const(name, GFP_KERNEL);
462
463
464
465
	if (!cache_name) {
		err = -ENOMEM;
		goto out_unlock;
	}
466

467
468
469
	s = create_cache(cache_name, size, size,
			 calculate_alignment(flags, align, size),
			 flags, ctor, NULL, NULL);
470
471
	if (IS_ERR(s)) {
		err = PTR_ERR(s);
472
		kfree_const(cache_name);
473
	}
474
475

out_unlock:
476
	mutex_unlock(&slab_mutex);
477

478
	memcg_put_cache_ids();
479
	put_online_mems();
480
481
	put_online_cpus();

482
	if (err) {
483
484
485
486
		if (flags & SLAB_PANIC)
			panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
				name, err);
		else {
487
			pr_warn("kmem_cache_create(%s) failed with error %d\n",
488
489
490
491
492
				name, err);
			dump_stack();
		}
		return NULL;
	}
493
494
	return s;
}
495
EXPORT_SYMBOL(kmem_cache_create);
496

497
static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
498
{
499
500
	LIST_HEAD(to_destroy);
	struct kmem_cache *s, *s2;
501

502
503
504
505
506
507
508
509
510
511
512
513
	/*
	 * On destruction, SLAB_DESTROY_BY_RCU kmem_caches are put on the
	 * @slab_caches_to_rcu_destroy list.  The slab pages are freed
	 * through RCU and and the associated kmem_cache are dereferenced
	 * while freeing the pages, so the kmem_caches should be freed only
	 * after the pending RCU operations are finished.  As rcu_barrier()
	 * is a pretty slow operation, we batch all pending destructions
	 * asynchronously.
	 */
	mutex_lock(&slab_mutex);
	list_splice_init(&slab_caches_to_rcu_destroy, &to_destroy);
	mutex_unlock(&slab_mutex);
514

515
516
517
518
519
520
521
522
523
524
525
526
	if (list_empty(&to_destroy))
		return;

	rcu_barrier();

	list_for_each_entry_safe(s, s2, &to_destroy, list) {
#ifdef SLAB_SUPPORTS_SYSFS
		sysfs_slab_release(s);
#else
		slab_kmem_cache_release(s);
#endif
	}
527
528
}

529
static int shutdown_cache(struct kmem_cache *s)
530
{
531
532
	if (__kmem_cache_shutdown(s) != 0)
		return -EBUSY;
533

534
	memcg_unlink_cache(s);
535
	list_del(&s->list);
536

537
538
539
540
	if (s->flags & SLAB_DESTROY_BY_RCU) {
		list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
		schedule_work(&slab_caches_to_rcu_destroy_work);
	} else {
541
#ifdef SLAB_SUPPORTS_SYSFS
542
		sysfs_slab_release(s);
543
544
545
546
#else
		slab_kmem_cache_release(s);
#endif
	}
547
548

	return 0;
549
550
}

551
#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
552
/*
553
 * memcg_create_kmem_cache - Create a cache for a memory cgroup.
554
555
556
557
558
559
560
 * @memcg: The memory cgroup the new cache is for.
 * @root_cache: The parent of the new cache.
 *
 * This function attempts to create a kmem cache that will serve allocation
 * requests going from @memcg to @root_cache. The new cache inherits properties
 * from its parent.
 */
561
562
void memcg_create_kmem_cache(struct mem_cgroup *memcg,
			     struct kmem_cache *root_cache)
563
{
564
	static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
Michal Hocko's avatar
Michal Hocko committed
565
	struct cgroup_subsys_state *css = &memcg->css;
566
	struct memcg_cache_array *arr;
567
	struct kmem_cache *s = NULL;
568
	char *cache_name;
569
	int idx;
570
571

	get_online_cpus();
572
573
	get_online_mems();

574
575
	mutex_lock(&slab_mutex);

576
	/*
577
	 * The memory cgroup could have been offlined while the cache
578
579
	 * creation work was pending.
	 */
580
	if (memcg->kmem_state != KMEM_ONLINE)
581
582
		goto out_unlock;

583
584
585
586
	idx = memcg_cache_id(memcg);
	arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
					lockdep_is_held(&slab_mutex));

587
588
589
590
591
	/*
	 * Since per-memcg caches are created asynchronously on first
	 * allocation (see memcg_kmem_get_cache()), several threads can try to
	 * create the same cache, but only one of them may succeed.
	 */
592
	if (arr->entries[idx])
593
594
		goto out_unlock;

595
	cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
596
597
	cache_name = kasprintf(GFP_KERNEL, "%s(%llu:%s)", root_cache->name,
			       css->serial_nr, memcg_name_buf);
598
599
600
	if (!cache_name)
		goto out_unlock;

601
602
	s = create_cache(cache_name, root_cache->object_size,
			 root_cache->size, root_cache->align,
603
604
			 root_cache->flags & CACHE_CREATE_MASK,
			 root_cache->ctor, memcg, root_cache);
605
606
607
608
609
	/*
	 * If we could not create a memcg cache, do not complain, because
	 * that's not critical at all as we can always proceed with the root
	 * cache.
	 */
610
	if (IS_ERR(s)) {
611
		kfree(cache_name);
612
		goto out_unlock;
613
	}
614

615
616
617
618
619
620
	/*
	 * Since readers won't lock (see cache_from_memcg_idx()), we need a
	 * barrier here to ensure nobody will see the kmem_cache partially
	 * initialized.
	 */
	smp_wmb();
621
	arr->entries[idx] = s;
622

623
624
out_unlock:
	mutex_unlock(&slab_mutex);
625
626

	put_online_mems();
627
	put_online_cpus();
628
}
629

630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
static void kmemcg_deactivate_workfn(struct work_struct *work)
{
	struct kmem_cache *s = container_of(work, struct kmem_cache,
					    memcg_params.deact_work);

	get_online_cpus();
	get_online_mems();

	mutex_lock(&slab_mutex);

	s->memcg_params.deact_fn(s);

	mutex_unlock(&slab_mutex);

	put_online_mems();
	put_online_cpus();

	/* done, put the ref from slab_deactivate_memcg_cache_rcu_sched() */
	css_put(&s->memcg_params.memcg->css);
}

static void kmemcg_deactivate_rcufn(struct rcu_head *head)
{
	struct kmem_cache *s = container_of(head, struct kmem_cache,
					    memcg_params.deact_rcu_head);

	/*
	 * We need to grab blocking locks.  Bounce to ->deact_work.  The
	 * work item shares the space with the RCU head and can't be
	 * initialized eariler.
	 */
	INIT_WORK(&s->memcg_params.deact_work, kmemcg_deactivate_workfn);
	schedule_work(&s->memcg_params.deact_work);
}

/**
 * slab_deactivate_memcg_cache_rcu_sched - schedule deactivation after a
 *					   sched RCU grace period
 * @s: target kmem_cache
 * @deact_fn: deactivation function to call
 *
 * Schedule @deact_fn to be invoked with online cpus, mems and slab_mutex
 * held after a sched RCU grace period.  The slab is guaranteed to stay
 * alive until @deact_fn is finished.  This is to be used from
 * __kmemcg_cache_deactivate().
 */
void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
					   void (*deact_fn)(struct kmem_cache *))
{
	if (WARN_ON_ONCE(is_root_cache(s)) ||
	    WARN_ON_ONCE(s->memcg_params.deact_fn))
		return;

	/* pin memcg so that @s doesn't get destroyed in the middle */
	css_get(&s->memcg_params.memcg->css);

	s->memcg_params.deact_fn = deact_fn;
	call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn);
}

690
691
692
693
void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
{
	int idx;
	struct memcg_cache_array *arr;
694
	struct kmem_cache *s, *c;
695
696
697

	idx = memcg_cache_id(memcg);

698
699
700
	get_online_cpus();
	get_online_mems();

701
	mutex_lock(&slab_mutex);
702
	list_for_each_entry(s, &slab_root_caches, root_caches_node) {
703
704
		arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
						lockdep_is_held(&slab_mutex));
705
706
707
708
		c = arr->entries[idx];
		if (!c)
			continue;

709
		__kmemcg_cache_deactivate(c);
710
711
712
		arr->entries[idx] = NULL;
	}
	mutex_unlock(&slab_mutex);
713
714
715

	put_online_mems();
	put_online_cpus();
716
717
}

718
void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
719
{
720
	struct kmem_cache *s, *s2;
721

722
723
	get_online_cpus();
	get_online_mems();
724
725

	mutex_lock(&slab_mutex);
726
727
	list_for_each_entry_safe(s, s2, &memcg->kmem_caches,
				 memcg_params.kmem_caches_node) {
728
729
730
731
		/*
		 * The cgroup is about to be freed and therefore has no charges
		 * left. Hence, all its caches must be empty by now.
		 */
732
		BUG_ON(shutdown_cache(s));
733
734
	}
	mutex_unlock(&slab_mutex);
735

736
737
	put_online_mems();
	put_online_cpus();
738
}
739

740
static int shutdown_memcg_caches(struct kmem_cache *s)
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
{
	struct memcg_cache_array *arr;
	struct kmem_cache *c, *c2;
	LIST_HEAD(busy);
	int i;

	BUG_ON(!is_root_cache(s));

	/*
	 * First, shutdown active caches, i.e. caches that belong to online
	 * memory cgroups.
	 */
	arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
					lockdep_is_held(&slab_mutex));
	for_each_memcg_cache_index(i) {
		c = arr->entries[i];
		if (!c)
			continue;
759
		if (shutdown_cache(c))
760
761
762
763
764
			/*
			 * The cache still has objects. Move it to a temporary
			 * list so as not to try to destroy it for a second
			 * time while iterating over inactive caches below.
			 */
765
			list_move(&c->memcg_params.children_node, &busy);
766
767
768
769
770
771
772
773
774
775
776
777
778
779
		else
			/*
			 * The cache is empty and will be destroyed soon. Clear
			 * the pointer to it in the memcg_caches array so that
			 * it will never be accessed even if the root cache
			 * stays alive.
			 */
			arr->entries[i] = NULL;
	}

	/*
	 * Second, shutdown all caches left from memory cgroups that are now
	 * offline.
	 */
780
781
	list_for_each_entry_safe(c, c2, &s->memcg_params.children,
				 memcg_params.children_node)
782
		shutdown_cache(c);
783

784
	list_splice(&busy, &s->memcg_params.children);
785
786
787
788
789

	/*
	 * A cache being destroyed must be empty. In particular, this means
	 * that all per memcg caches attached to it must be empty too.
	 */
790
	if (!list_empty(&s->memcg_params.children))
791
792
793
794
		return -EBUSY;
	return 0;
}
#else
795
static inline int shutdown_memcg_caches(struct kmem_cache *s)
796
797
798
{
	return 0;
}
799
#endif /* CONFIG_MEMCG && !CONFIG_SLOB */
800

801
802
void slab_kmem_cache_release(struct kmem_cache *s)
{
803
	__kmem_cache_release(s);
804
	destroy_memcg_params(s);
805
	kfree_const(s->name);
806
807
808
	kmem_cache_free(kmem_cache, s);
}

809
810
void kmem_cache_destroy(struct kmem_cache *s)
{
811
	int err;
812

813
814
815
	if (unlikely(!s))
		return;

816
	get_online_cpus();
817
818
	get_online_mems();

819
	kasan_cache_destroy(s);
820
	mutex_lock(&slab_mutex);
821

822
	s->refcount--;
823
824
825
	if (s->refcount)
		goto out_unlock;

826
	err = shutdown_memcg_caches(s);
827
	if (!err)
828
		err = shutdown_cache(s);
829

830
	if (err) {
Joe Perches's avatar
Joe Perches committed
831
832
		pr_err("kmem_cache_destroy %s: Slab cache still has objects\n",
		       s->name);
833
834
		dump_stack();
	}
835
836
out_unlock:
	mutex_unlock(&slab_mutex);
837

838
	put_online_mems();
839
840
841
842
	put_online_cpus();
}
EXPORT_SYMBOL(kmem_cache_destroy);

843
844
845
846
847
848
849
850
851
852
853
854
855
/**
 * kmem_cache_shrink - Shrink a cache.
 * @cachep: The cache to shrink.
 *
 * Releases as many slabs as possible for a cache.
 * To help debugging, a zero exit status indicates all slabs were released.
 */
int kmem_cache_shrink(struct kmem_cache *cachep)
{
	int ret;

	get_online_cpus();
	get_online_mems();
856
	kasan_cache_shrink(cachep);
857
	ret = __kmem_cache_shrink(cachep);
858
859
860
861
862
863
	put_online_mems();
	put_online_cpus();
	return ret;
}
EXPORT_SYMBOL(kmem_cache_shrink);

864
bool slab_is_available(void)
865
866
867
{
	return slab_state >= UP;
}
868

869
870
871
872
873
874
875
876
877
#ifndef CONFIG_SLOB
/* Create a cache during boot when no slab services are available yet */
void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
		unsigned long flags)
{
	int err;

	s->name = name;
	s->size = s->object_size = size;
878
	s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
879
880
881

	slab_init_memcg_params(s);

882
883
884
	err = __kmem_cache_create(s, flags);

	if (err)
885
		panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n",
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
					name, size, err);

	s->refcount = -1;	/* Exempt from merging for now */
}

struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
				unsigned long flags)
{
	struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);

	if (!s)
		panic("Out of memory when creating slab %s\n", name);

	create_boot_cache(s, name, size, flags);
	list_add(&s->list, &slab_caches);
901
	memcg_link_cache(s);
902
903
904
905
	s->refcount = 1;
	return s;
}

906
907
908
909
910
911
912
913
struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
EXPORT_SYMBOL(kmalloc_caches);

#ifdef CONFIG_ZONE_DMA
struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
EXPORT_SYMBOL(kmalloc_dma_caches);
#endif

914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
/*
 * Conversion table for small slabs sizes / 8 to the index in the
 * kmalloc array. This is necessary for slabs < 192 since we have non power
 * of two cache sizes there. The size of larger slabs can be determined using
 * fls.
 */
static s8 size_index[24] = {
	3,	/* 8 */
	4,	/* 16 */
	5,	/* 24 */
	5,	/* 32 */
	6,	/* 40 */
	6,	/* 48 */
	6,	/* 56 */
	6,	/* 64 */
	1,	/* 72 */
	1,	/* 80 */
	1,	/* 88 */
	1,	/* 96 */
	7,	/* 104 */
	7,	/* 112 */
	7,	/* 120 */
	7,	/* 128 */
	2,	/* 136 */
	2,	/* 144 */
	2,	/* 152 */
	2,	/* 160 */
	2,	/* 168 */
	2,	/* 176 */
	2,	/* 184 */
	2	/* 192 */
};

static inline int size_index_elem(size_t bytes)
{
	return (bytes - 1) / 8;
}

/*
 * Find the kmem_cache structure that serves a given size of
 * allocation
 */
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
{
	int index;

960
	if (unlikely(size > KMALLOC_MAX_SIZE)) {
961
		WARN_ON_ONCE(!(flags & __GFP_NOWARN));
962
		return NULL;
963
	}
964

965
966
967
968
969
970
971
972
973
	if (size <= 192) {
		if (!size)
			return ZERO_SIZE_PTR;

		index = size_index[size_index_elem(size)];
	} else
		index = fls(size - 1);

#ifdef CONFIG_ZONE_DMA
974
	if (unlikely((flags & GFP_DMA)))
975
976
977
978
979
980
		return kmalloc_dma_caches[index];

#endif
	return kmalloc_caches[index];
}

981
982
983
984
985
/*
 * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
 * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is
 * kmalloc-67108864.
 */
986
const struct kmalloc_info_struct kmalloc_info[] __initconst = {
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
	{NULL,                      0},		{"kmalloc-96",             96},
	{"kmalloc-192",           192},		{"kmalloc-8",               8},
	{"kmalloc-16",             16},		{"kmalloc-32",             32},
	{"kmalloc-64",             64},		{"kmalloc-128",           128},
	{"kmalloc-256",           256},		{"kmalloc-512",           512},
	{"kmalloc-1024",         1024},		{"kmalloc-2048",         2048},
	{"kmalloc-4096",         4096},		{"kmalloc-8192",         8192},
	{"kmalloc-16384",       16384},		{"kmalloc-32768",       32768},
	{"kmalloc-65536",       65536},		{"kmalloc-131072",     131072},
	{"kmalloc-262144",     262144},		{"kmalloc-524288",     524288},
	{"kmalloc-1048576",   1048576},		{"kmalloc-2097152",   2097152},
	{"kmalloc-4194304",   4194304},		{"kmalloc-8388608",   8388608},
	{"kmalloc-16777216", 16777216},		{"kmalloc-33554432", 33554432},
	{"kmalloc-67108864", 67108864}