slab_common.c 29.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
/*
 * Slab allocator functions that are independent of the allocator strategy
 *
 * (C) 2012 Christoph Lameter <cl@linux.com>
 */
#include <linux/slab.h>

#include <linux/mm.h>
#include <linux/poison.h>
#include <linux/interrupt.h>
#include <linux/memory.h>
#include <linux/compiler.h>
#include <linux/module.h>
14
15
#include <linux/cpu.h>
#include <linux/uaccess.h>
16
17
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
18
19
20
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/page.h>
21
#include <linux/memcontrol.h>
22
23

#define CREATE_TRACE_POINTS
24
#include <trace/events/kmem.h>
25

26
27
28
#include "slab.h"

enum slab_state slab_state;
29
30
LIST_HEAD(slab_caches);
DEFINE_MUTEX(slab_mutex);
31
struct kmem_cache *kmem_cache;
32

33
34
35
36
37
38
39
/*
 * Set of flags that will prevent slab merging
 */
#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
		SLAB_FAILSLAB)

40
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | SLAB_NOTRACK)
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59

/*
 * Merge control. If this is set then no merging of slab caches will occur.
 * (Could be removed. This was introduced to pacify the merge skeptics.)
 */
static int slab_nomerge;

static int __init setup_slab_nomerge(char *str)
{
	slab_nomerge = 1;
	return 1;
}

#ifdef CONFIG_SLUB
__setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
#endif

__setup("slab_nomerge", setup_slab_nomerge);

60
61
62
63
64
65
66
67
68
/*
 * Determine the size of a slab object
 */
unsigned int kmem_cache_size(struct kmem_cache *s)
{
	return s->object_size;
}
EXPORT_SYMBOL(kmem_cache_size);

69
#ifdef CONFIG_DEBUG_VM
70
static int kmem_cache_sanity_check(const char *name, size_t size)
71
72
73
74
75
{
	struct kmem_cache *s = NULL;

	if (!name || in_interrupt() || size < sizeof(void *) ||
		size > KMALLOC_MAX_SIZE) {
76
77
		pr_err("kmem_cache_create(%s) integrity check failed\n", name);
		return -EINVAL;
78
	}
79

80
81
82
83
84
85
86
87
88
89
90
	list_for_each_entry(s, &slab_caches, list) {
		char tmp;
		int res;

		/*
		 * This happens when the module gets unloaded and doesn't
		 * destroy its slab cache and no-one else reuses the vmalloc
		 * area of the module.  Print a warning.
		 */
		res = probe_kernel_address(s->name, tmp);
		if (res) {
91
			pr_err("Slab cache with size %d has lost its name\n",
92
93
94
95
96
97
			       s->object_size);
			continue;
		}
	}

	WARN_ON(strchr(name, ' '));	/* It confuses parsers */
98
99
100
	return 0;
}
#else
101
static inline int kmem_cache_sanity_check(const char *name, size_t size)
102
103
104
{
	return 0;
}
105
106
#endif

107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
{
	size_t i;

	for (i = 0; i < nr; i++)
		kmem_cache_free(s, p[i]);
}

bool __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
								void **p)
{
	size_t i;

	for (i = 0; i < nr; i++) {
		void *x = p[i] = kmem_cache_alloc(s, flags);
		if (!x) {
			__kmem_cache_free_bulk(s, i, p);
			return false;
		}
	}
	return true;
}

130
#ifdef CONFIG_MEMCG_KMEM
131
void slab_init_memcg_params(struct kmem_cache *s)
132
{
133
	s->memcg_params.is_root_cache = true;
134
	INIT_LIST_HEAD(&s->memcg_params.list);
135
136
137
138
139
140
141
	RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
}

static int init_memcg_params(struct kmem_cache *s,
		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
{
	struct memcg_cache_array *arr;
142

143
144
145
146
	if (memcg) {
		s->memcg_params.is_root_cache = false;
		s->memcg_params.memcg = memcg;
		s->memcg_params.root_cache = root_cache;
147
		return 0;
148
	}
149

150
	slab_init_memcg_params(s);
151

152
153
	if (!memcg_nr_cache_ids)
		return 0;
154

155
156
157
158
159
	arr = kzalloc(sizeof(struct memcg_cache_array) +
		      memcg_nr_cache_ids * sizeof(void *),
		      GFP_KERNEL);
	if (!arr)
		return -ENOMEM;
160

161
	RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
162
163
164
	return 0;
}

165
static void destroy_memcg_params(struct kmem_cache *s)
166
{
167
168
	if (is_root_cache(s))
		kfree(rcu_access_pointer(s->memcg_params.memcg_caches));
169
170
}

171
static int update_memcg_params(struct kmem_cache *s, int new_array_size)
172
{
173
	struct memcg_cache_array *old, *new;
174

175
176
	if (!is_root_cache(s))
		return 0;
177

178
179
180
	new = kzalloc(sizeof(struct memcg_cache_array) +
		      new_array_size * sizeof(void *), GFP_KERNEL);
	if (!new)
181
182
		return -ENOMEM;

183
184
185
186
187
	old = rcu_dereference_protected(s->memcg_params.memcg_caches,
					lockdep_is_held(&slab_mutex));
	if (old)
		memcpy(new->entries, old->entries,
		       memcg_nr_cache_ids * sizeof(void *));
188

189
190
191
	rcu_assign_pointer(s->memcg_params.memcg_caches, new);
	if (old)
		kfree_rcu(old, rcu);
192
193
194
	return 0;
}

195
196
197
198
199
int memcg_update_all_caches(int num_memcgs)
{
	struct kmem_cache *s;
	int ret = 0;

200
	mutex_lock(&slab_mutex);
201
	list_for_each_entry(s, &slab_caches, list) {
202
		ret = update_memcg_params(s, num_memcgs);
203
204
205
206
207
		/*
		 * Instead of freeing the memory, we'll just leave the caches
		 * up to this point in an updated state.
		 */
		if (ret)
208
			break;
209
210
211
212
	}
	mutex_unlock(&slab_mutex);
	return ret;
}
213
#else
214
215
static inline int init_memcg_params(struct kmem_cache *s,
		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
216
217
218
219
{
	return 0;
}

220
static inline void destroy_memcg_params(struct kmem_cache *s)
221
222
223
{
}
#endif /* CONFIG_MEMCG_KMEM */
224

225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
/*
 * Find a mergeable slab cache
 */
int slab_unmergeable(struct kmem_cache *s)
{
	if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
		return 1;

	if (!is_root_cache(s))
		return 1;

	if (s->ctor)
		return 1;

	/*
	 * We may have set a slab to be unmergeable during bootstrap.
	 */
	if (s->refcount < 0)
		return 1;

	return 0;
}

struct kmem_cache *find_mergeable(size_t size, size_t align,
		unsigned long flags, const char *name, void (*ctor)(void *))
{
	struct kmem_cache *s;

	if (slab_nomerge || (flags & SLAB_NEVER_MERGE))
		return NULL;

	if (ctor)
		return NULL;

	size = ALIGN(size, sizeof(void *));
	align = calculate_alignment(flags, align, size);
	size = ALIGN(size, align);
	flags = kmem_cache_flags(size, flags, name, NULL);

264
	list_for_each_entry_reverse(s, &slab_caches, list) {
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
		if (slab_unmergeable(s))
			continue;

		if (size > s->size)
			continue;

		if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
			continue;
		/*
		 * Check if alignment is compatible.
		 * Courtesy of Adrian Drzewiecki
		 */
		if ((s->size & ~(align - 1)) != s->size)
			continue;

		if (s->size - size >= sizeof(void *))
			continue;

283
284
285
286
		if (IS_ENABLED(CONFIG_SLAB) && align &&
			(align > s->align || s->align % align))
			continue;

287
288
289
290
291
		return s;
	}
	return NULL;
}

292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
/*
 * Figure out what the alignment of the objects will be given a set of
 * flags, a user specified alignment and the size of the objects.
 */
unsigned long calculate_alignment(unsigned long flags,
		unsigned long align, unsigned long size)
{
	/*
	 * If the user wants hardware cache aligned objects then follow that
	 * suggestion if the object is sufficiently large.
	 *
	 * The hardware cache alignment cannot override the specified
	 * alignment though. If that is greater then use it.
	 */
	if (flags & SLAB_HWCACHE_ALIGN) {
		unsigned long ralign = cache_line_size();
		while (size <= ralign / 2)
			ralign /= 2;
		align = max(align, ralign);
	}

	if (align < ARCH_SLAB_MINALIGN)
		align = ARCH_SLAB_MINALIGN;

	return ALIGN(align, sizeof(void *));
}

319
320
321
322
static struct kmem_cache *create_cache(const char *name,
		size_t object_size, size_t size, size_t align,
		unsigned long flags, void (*ctor)(void *),
		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
{
	struct kmem_cache *s;
	int err;

	err = -ENOMEM;
	s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
	if (!s)
		goto out;

	s->name = name;
	s->object_size = object_size;
	s->size = size;
	s->align = align;
	s->ctor = ctor;

338
	err = init_memcg_params(s, memcg, root_cache);
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
	if (err)
		goto out_free_cache;

	err = __kmem_cache_create(s, flags);
	if (err)
		goto out_free_cache;

	s->refcount = 1;
	list_add(&s->list, &slab_caches);
out:
	if (err)
		return ERR_PTR(err);
	return s;

out_free_cache:
354
	destroy_memcg_params(s);
355
	kmem_cache_free(kmem_cache, s);
356
357
	goto out;
}
358

359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
/*
 * kmem_cache_create - Create a cache.
 * @name: A string which is used in /proc/slabinfo to identify this cache.
 * @size: The size of objects to be created in this cache.
 * @align: The required alignment for the objects.
 * @flags: SLAB flags
 * @ctor: A constructor for the objects.
 *
 * Returns a ptr to the cache on success, NULL on failure.
 * Cannot be called within a interrupt, but can be interrupted.
 * The @ctor is run when new pages are allocated by the cache.
 *
 * The flags are
 *
 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
 * to catch references to uninitialised memory.
 *
 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
 * for buffer overruns.
 *
 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
 * cacheline.  This can be beneficial if you're counting cycles as closely
 * as davem.
 */
383
struct kmem_cache *
384
385
kmem_cache_create(const char *name, size_t size, size_t align,
		  unsigned long flags, void (*ctor)(void *))
386
{
387
	struct kmem_cache *s = NULL;
388
	const char *cache_name;
389
	int err;
390

391
	get_online_cpus();
392
	get_online_mems();
393
	memcg_get_cache_ids();
394

395
	mutex_lock(&slab_mutex);
396

397
	err = kmem_cache_sanity_check(name, size);
398
	if (err) {
399
		goto out_unlock;
400
	}
401

402
403
404
405
406
407
408
	/*
	 * Some allocators will constraint the set of valid flags to a subset
	 * of all flags. We expect them to define CACHE_CREATE_MASK in this
	 * case, and we'll just provide them with a sanitized version of the
	 * passed flags.
	 */
	flags &= CACHE_CREATE_MASK;
409

410
411
	s = __kmem_cache_alias(name, size, align, flags, ctor);
	if (s)
412
		goto out_unlock;
413

414
	cache_name = kstrdup_const(name, GFP_KERNEL);
415
416
417
418
	if (!cache_name) {
		err = -ENOMEM;
		goto out_unlock;
	}
419

420
421
422
	s = create_cache(cache_name, size, size,
			 calculate_alignment(flags, align, size),
			 flags, ctor, NULL, NULL);
423
424
	if (IS_ERR(s)) {
		err = PTR_ERR(s);
425
		kfree_const(cache_name);
426
	}
427
428

out_unlock:
429
	mutex_unlock(&slab_mutex);
430

431
	memcg_put_cache_ids();
432
	put_online_mems();
433
434
	put_online_cpus();

435
	if (err) {
436
437
438
439
440
441
442
443
444
445
		if (flags & SLAB_PANIC)
			panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
				name, err);
		else {
			printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d",
				name, err);
			dump_stack();
		}
		return NULL;
	}
446
447
	return s;
}
448
EXPORT_SYMBOL(kmem_cache_create);
449

450
static int shutdown_cache(struct kmem_cache *s,
451
452
		struct list_head *release, bool *need_rcu_barrier)
{
453
	if (__kmem_cache_shutdown(s) != 0)
454
455
456
457
458
459
460
461
462
		return -EBUSY;

	if (s->flags & SLAB_DESTROY_BY_RCU)
		*need_rcu_barrier = true;

	list_move(&s->list, release);
	return 0;
}

463
static void release_caches(struct list_head *release, bool need_rcu_barrier)
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
{
	struct kmem_cache *s, *s2;

	if (need_rcu_barrier)
		rcu_barrier();

	list_for_each_entry_safe(s, s2, release, list) {
#ifdef SLAB_SUPPORTS_SYSFS
		sysfs_slab_remove(s);
#else
		slab_kmem_cache_release(s);
#endif
	}
}

479
480
#ifdef CONFIG_MEMCG_KMEM
/*
481
 * memcg_create_kmem_cache - Create a cache for a memory cgroup.
482
483
484
485
486
487
488
 * @memcg: The memory cgroup the new cache is for.
 * @root_cache: The parent of the new cache.
 *
 * This function attempts to create a kmem cache that will serve allocation
 * requests going from @memcg to @root_cache. The new cache inherits properties
 * from its parent.
 */
489
490
void memcg_create_kmem_cache(struct mem_cgroup *memcg,
			     struct kmem_cache *root_cache)
491
{
492
	static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
Michal Hocko's avatar
Michal Hocko committed
493
	struct cgroup_subsys_state *css = &memcg->css;
494
	struct memcg_cache_array *arr;
495
	struct kmem_cache *s = NULL;
496
	char *cache_name;
497
	int idx;
498
499

	get_online_cpus();
500
501
	get_online_mems();

502
503
	mutex_lock(&slab_mutex);

504
505
506
507
508
509
510
	/*
	 * The memory cgroup could have been deactivated while the cache
	 * creation work was pending.
	 */
	if (!memcg_kmem_is_active(memcg))
		goto out_unlock;

511
512
513
514
	idx = memcg_cache_id(memcg);
	arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
					lockdep_is_held(&slab_mutex));

515
516
517
518
519
	/*
	 * Since per-memcg caches are created asynchronously on first
	 * allocation (see memcg_kmem_get_cache()), several threads can try to
	 * create the same cache, but only one of them may succeed.
	 */
520
	if (arr->entries[idx])
521
522
		goto out_unlock;

523
	cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
524
	cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
525
			       css->id, memcg_name_buf);
526
527
528
	if (!cache_name)
		goto out_unlock;

529
530
531
532
	s = create_cache(cache_name, root_cache->object_size,
			 root_cache->size, root_cache->align,
			 root_cache->flags, root_cache->ctor,
			 memcg, root_cache);
533
534
535
536
537
	/*
	 * If we could not create a memcg cache, do not complain, because
	 * that's not critical at all as we can always proceed with the root
	 * cache.
	 */
538
	if (IS_ERR(s)) {
539
		kfree(cache_name);
540
		goto out_unlock;
541
	}
542

543
544
	list_add(&s->memcg_params.list, &root_cache->memcg_params.list);

545
546
547
548
549
550
	/*
	 * Since readers won't lock (see cache_from_memcg_idx()), we need a
	 * barrier here to ensure nobody will see the kmem_cache partially
	 * initialized.
	 */
	smp_wmb();
551
	arr->entries[idx] = s;
552

553
554
out_unlock:
	mutex_unlock(&slab_mutex);
555
556

	put_online_mems();
557
	put_online_cpus();
558
}
559

560
561
562
563
void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
{
	int idx;
	struct memcg_cache_array *arr;
564
	struct kmem_cache *s, *c;
565
566
567

	idx = memcg_cache_id(memcg);

568
569
570
	get_online_cpus();
	get_online_mems();

571
572
573
574
575
576
577
	mutex_lock(&slab_mutex);
	list_for_each_entry(s, &slab_caches, list) {
		if (!is_root_cache(s))
			continue;

		arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
						lockdep_is_held(&slab_mutex));
578
579
580
581
582
		c = arr->entries[idx];
		if (!c)
			continue;

		__kmem_cache_shrink(c, true);
583
584
585
		arr->entries[idx] = NULL;
	}
	mutex_unlock(&slab_mutex);
586
587
588

	put_online_mems();
	put_online_cpus();
589
590
}

591
592
593
594
595
596
597
598
599
600
601
602
static int __shutdown_memcg_cache(struct kmem_cache *s,
		struct list_head *release, bool *need_rcu_barrier)
{
	BUG_ON(is_root_cache(s));

	if (shutdown_cache(s, release, need_rcu_barrier))
		return -EBUSY;

	list_del(&s->memcg_params.list);
	return 0;
}

603
void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
604
{
605
606
607
	LIST_HEAD(release);
	bool need_rcu_barrier = false;
	struct kmem_cache *s, *s2;
608

609
610
	get_online_cpus();
	get_online_mems();
611
612

	mutex_lock(&slab_mutex);
613
	list_for_each_entry_safe(s, s2, &slab_caches, list) {
614
		if (is_root_cache(s) || s->memcg_params.memcg != memcg)
615
616
617
618
619
			continue;
		/*
		 * The cgroup is about to be freed and therefore has no charges
		 * left. Hence, all its caches must be empty by now.
		 */
620
		BUG_ON(__shutdown_memcg_cache(s, &release, &need_rcu_barrier));
621
622
	}
	mutex_unlock(&slab_mutex);
623

624
625
626
	put_online_mems();
	put_online_cpus();

627
	release_caches(&release, need_rcu_barrier);
628
}
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690

static int shutdown_memcg_caches(struct kmem_cache *s,
		struct list_head *release, bool *need_rcu_barrier)
{
	struct memcg_cache_array *arr;
	struct kmem_cache *c, *c2;
	LIST_HEAD(busy);
	int i;

	BUG_ON(!is_root_cache(s));

	/*
	 * First, shutdown active caches, i.e. caches that belong to online
	 * memory cgroups.
	 */
	arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
					lockdep_is_held(&slab_mutex));
	for_each_memcg_cache_index(i) {
		c = arr->entries[i];
		if (!c)
			continue;
		if (__shutdown_memcg_cache(c, release, need_rcu_barrier))
			/*
			 * The cache still has objects. Move it to a temporary
			 * list so as not to try to destroy it for a second
			 * time while iterating over inactive caches below.
			 */
			list_move(&c->memcg_params.list, &busy);
		else
			/*
			 * The cache is empty and will be destroyed soon. Clear
			 * the pointer to it in the memcg_caches array so that
			 * it will never be accessed even if the root cache
			 * stays alive.
			 */
			arr->entries[i] = NULL;
	}

	/*
	 * Second, shutdown all caches left from memory cgroups that are now
	 * offline.
	 */
	list_for_each_entry_safe(c, c2, &s->memcg_params.list,
				 memcg_params.list)
		__shutdown_memcg_cache(c, release, need_rcu_barrier);

	list_splice(&busy, &s->memcg_params.list);

	/*
	 * A cache being destroyed must be empty. In particular, this means
	 * that all per memcg caches attached to it must be empty too.
	 */
	if (!list_empty(&s->memcg_params.list))
		return -EBUSY;
	return 0;
}
#else
static inline int shutdown_memcg_caches(struct kmem_cache *s,
		struct list_head *release, bool *need_rcu_barrier)
{
	return 0;
}
691
#endif /* CONFIG_MEMCG_KMEM */
692

693
694
void slab_kmem_cache_release(struct kmem_cache *s)
{
695
	destroy_memcg_params(s);
696
	kfree_const(s->name);
697
698
699
	kmem_cache_free(kmem_cache, s);
}

700
701
void kmem_cache_destroy(struct kmem_cache *s)
{
702
703
	LIST_HEAD(release);
	bool need_rcu_barrier = false;
704
	int err;
705

706
707
708
	if (unlikely(!s))
		return;

709
	get_online_cpus();
710
711
	get_online_mems();

712
	mutex_lock(&slab_mutex);
713

714
	s->refcount--;
715
716
717
	if (s->refcount)
		goto out_unlock;

718
719
	err = shutdown_memcg_caches(s, &release, &need_rcu_barrier);
	if (!err)
720
		err = shutdown_cache(s, &release, &need_rcu_barrier);
721

722
723
724
725
726
	if (err) {
		pr_err("kmem_cache_destroy %s: "
		       "Slab cache still has objects\n", s->name);
		dump_stack();
	}
727
728
out_unlock:
	mutex_unlock(&slab_mutex);
729

730
	put_online_mems();
731
	put_online_cpus();
732

733
	release_caches(&release, need_rcu_barrier);
734
735
736
}
EXPORT_SYMBOL(kmem_cache_destroy);

737
738
739
740
741
742
743
744
745
746
747
748
749
/**
 * kmem_cache_shrink - Shrink a cache.
 * @cachep: The cache to shrink.
 *
 * Releases as many slabs as possible for a cache.
 * To help debugging, a zero exit status indicates all slabs were released.
 */
int kmem_cache_shrink(struct kmem_cache *cachep)
{
	int ret;

	get_online_cpus();
	get_online_mems();
750
	ret = __kmem_cache_shrink(cachep, false);
751
752
753
754
755
756
	put_online_mems();
	put_online_cpus();
	return ret;
}
EXPORT_SYMBOL(kmem_cache_shrink);

757
bool slab_is_available(void)
758
759
760
{
	return slab_state >= UP;
}
761

762
763
764
765
766
767
768
769
770
#ifndef CONFIG_SLOB
/* Create a cache during boot when no slab services are available yet */
void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
		unsigned long flags)
{
	int err;

	s->name = name;
	s->size = s->object_size = size;
771
	s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
772
773
774

	slab_init_memcg_params(s);

775
776
777
	err = __kmem_cache_create(s, flags);

	if (err)
778
		panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n",
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
					name, size, err);

	s->refcount = -1;	/* Exempt from merging for now */
}

struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
				unsigned long flags)
{
	struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);

	if (!s)
		panic("Out of memory when creating slab %s\n", name);

	create_boot_cache(s, name, size, flags);
	list_add(&s->list, &slab_caches);
	s->refcount = 1;
	return s;
}

798
799
800
801
802
803
804
805
struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
EXPORT_SYMBOL(kmalloc_caches);

#ifdef CONFIG_ZONE_DMA
struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
EXPORT_SYMBOL(kmalloc_dma_caches);
#endif

806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
/*
 * Conversion table for small slabs sizes / 8 to the index in the
 * kmalloc array. This is necessary for slabs < 192 since we have non power
 * of two cache sizes there. The size of larger slabs can be determined using
 * fls.
 */
static s8 size_index[24] = {
	3,	/* 8 */
	4,	/* 16 */
	5,	/* 24 */
	5,	/* 32 */
	6,	/* 40 */
	6,	/* 48 */
	6,	/* 56 */
	6,	/* 64 */
	1,	/* 72 */
	1,	/* 80 */
	1,	/* 88 */
	1,	/* 96 */
	7,	/* 104 */
	7,	/* 112 */
	7,	/* 120 */
	7,	/* 128 */
	2,	/* 136 */
	2,	/* 144 */
	2,	/* 152 */
	2,	/* 160 */
	2,	/* 168 */
	2,	/* 176 */
	2,	/* 184 */
	2	/* 192 */
};

static inline int size_index_elem(size_t bytes)
{
	return (bytes - 1) / 8;
}

/*
 * Find the kmem_cache structure that serves a given size of
 * allocation
 */
struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
{
	int index;

852
	if (unlikely(size > KMALLOC_MAX_SIZE)) {
853
		WARN_ON_ONCE(!(flags & __GFP_NOWARN));
854
		return NULL;
855
	}
856

857
858
859
860
861
862
863
864
865
	if (size <= 192) {
		if (!size)
			return ZERO_SIZE_PTR;

		index = size_index[size_index_elem(size)];
	} else
		index = fls(size - 1);

#ifdef CONFIG_ZONE_DMA
866
	if (unlikely((flags & GFP_DMA)))
867
868
869
870
871
872
		return kmalloc_dma_caches[index];

#endif
	return kmalloc_caches[index];
}

873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
/*
 * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
 * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is
 * kmalloc-67108864.
 */
static struct {
	const char *name;
	unsigned long size;
} const kmalloc_info[] __initconst = {
	{NULL,                      0},		{"kmalloc-96",             96},
	{"kmalloc-192",           192},		{"kmalloc-8",               8},
	{"kmalloc-16",             16},		{"kmalloc-32",             32},
	{"kmalloc-64",             64},		{"kmalloc-128",           128},
	{"kmalloc-256",           256},		{"kmalloc-512",           512},
	{"kmalloc-1024",         1024},		{"kmalloc-2048",         2048},
	{"kmalloc-4096",         4096},		{"kmalloc-8192",         8192},
	{"kmalloc-16384",       16384},		{"kmalloc-32768",       32768},
	{"kmalloc-65536",       65536},		{"kmalloc-131072",     131072},
	{"kmalloc-262144",     262144},		{"kmalloc-524288",     524288},
	{"kmalloc-1048576",   1048576},		{"kmalloc-2097152",   2097152},
	{"kmalloc-4194304",   4194304},		{"kmalloc-8388608",   8388608},
	{"kmalloc-16777216", 16777216},		{"kmalloc-33554432", 33554432},
	{"kmalloc-67108864", 67108864}
};

898
/*
899
900
901
902
903
904
905
906
907
 * Patch up the size_index table if we have strange large alignment
 * requirements for the kmalloc array. This is only the case for
 * MIPS it seems. The standard arches will not generate any code here.
 *
 * Largest permitted alignment is 256 bytes due to the way we
 * handle the index determination for the smaller caches.
 *
 * Make sure that nothing crazy happens if someone starts tinkering
 * around with ARCH_KMALLOC_MINALIGN
908
 */
909
void __init setup_kmalloc_cache_index_table(void)
910
911
912
{
	int i;

913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
	BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
		(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));

	for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
		int elem = size_index_elem(i);

		if (elem >= ARRAY_SIZE(size_index))
			break;
		size_index[elem] = KMALLOC_SHIFT_LOW;
	}

	if (KMALLOC_MIN_SIZE >= 64) {
		/*
		 * The 96 byte size cache is not used if the alignment
		 * is 64 byte.
		 */
		for (i = 64 + 8; i <= 96; i += 8)
			size_index[size_index_elem(i)] = 7;

	}

	if (KMALLOC_MIN_SIZE >= 128) {
		/*
		 * The 192 byte sized cache is not used if the alignment
		 * is 128 byte. Redirect kmalloc to use the 256 byte cache
		 * instead.
		 */
		for (i = 128 + 8; i <= 192; i += 8)
			size_index[size_index_elem(i)] = 8;
	}
943
944
}

945
static void __init new_kmalloc_cache(int idx, unsigned long flags)
946
947
948
949
950
{
	kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
					kmalloc_info[idx].size, flags);
}

951
952
953
954
955
956
957
958
959
/*
 * Create the kmalloc array. Some of the regular kmalloc arrays
 * may already have been created because they were needed to
 * enable allocations for slab creation.
 */
void __init create_kmalloc_caches(unsigned long flags)
{
	int i;

960
961
962
	for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
		if (!kmalloc_caches[i])
			new_kmalloc_cache(i, flags);
963

964
		/*
965
966
967
		 * Caches that are not of the two-to-the-power-of size.
		 * These have to be created immediately after the
		 * earlier power of two caches
968
		 */
969
970
971
972
		if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
			new_kmalloc_cache(1, flags);
		if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
			new_kmalloc_cache(2, flags);
973
974
	}

975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
	/* Kmalloc array is now usable */
	slab_state = UP;

#ifdef CONFIG_ZONE_DMA
	for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
		struct kmem_cache *s = kmalloc_caches[i];

		if (s) {
			int size = kmalloc_size(i);
			char *n = kasprintf(GFP_NOWAIT,
				 "dma-kmalloc-%d", size);

			BUG_ON(!n);
			kmalloc_dma_caches[i] = create_kmalloc_cache(n,
				size, SLAB_CACHE_DMA | flags);
		}
	}
#endif
}
994
995
#endif /* !CONFIG_SLOB */

996
997
998
999
1000
/*
 * To avoid unnecessary overhead, we pass through large allocation requests
 * directly to the page allocator. We use __GFP_COMP, because we will need to
 * know the allocation order to free the pages properly in kfree.
 */