zsmalloc.c 60.3 KB
Newer Older
1
2
3
4
/*
 * zsmalloc memory allocator
 *
 * Copyright (C) 2011  Nitin Gupta
Minchan Kim's avatar
Minchan Kim committed
5
 * Copyright (C) 2012, 2013 Minchan Kim
6
7
8
9
10
11
12
13
 *
 * This code is released using a dual license strategy: BSD/GPL
 * You can choose the license that better fits your requirements.
 *
 * Released under the terms of 3-clause BSD License
 * Released under the terms of GNU General Public License Version 2.0
 */

Nitin Gupta's avatar
Nitin Gupta committed
14
15
16
17
18
/*
 * Following is how we use various fields and flags of underlying
 * struct page(s) to form a zspage.
 *
 * Usage of struct page fields:
19
 *	page->private: points to zspage
Minchan Kim's avatar
Minchan Kim committed
20
21
22
 *	page->freelist(index): links together all component pages of a zspage
 *		For the huge page, this is always 0, so we use this field
 *		to store handle.
Nitin Gupta's avatar
Nitin Gupta committed
23
24
25
26
 *
 * Usage of struct page flags:
 *	PG_private: identifies the first component page
 *	PG_private2: identifies the last component page
Minchan Kim's avatar
Minchan Kim committed
27
 *	PG_owner_priv_1: indentifies the huge component page
Nitin Gupta's avatar
Nitin Gupta committed
28
29
30
 *
 */

31
32
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

33
34
#include <linux/module.h>
#include <linux/kernel.h>
Minchan Kim's avatar
Minchan Kim committed
35
#include <linux/sched.h>
36
37
38
39
40
41
42
43
44
#include <linux/bitops.h>
#include <linux/errno.h>
#include <linux/highmem.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <asm/tlbflush.h>
#include <asm/pgtable.h>
#include <linux/cpumask.h>
#include <linux/cpu.h>
45
#include <linux/vmalloc.h>
46
#include <linux/preempt.h>
47
48
#include <linux/spinlock.h>
#include <linux/types.h>
49
#include <linux/debugfs.h>
Minchan Kim's avatar
Minchan Kim committed
50
#include <linux/zsmalloc.h>
51
#include <linux/zpool.h>
Minchan Kim's avatar
Minchan Kim committed
52
#include <linux/mount.h>
53
#include <linux/migrate.h>
Minchan Kim's avatar
Minchan Kim committed
54
55
56
#include <linux/pagemap.h>

#define ZSPAGE_MAGIC	0x58
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72

/*
 * This must be power of 2 and greater than of equal to sizeof(link_free).
 * These two conditions ensure that any 'struct link_free' itself doesn't
 * span more than 1 page which avoids complex case of mapping 2 pages simply
 * to restore link_free pointer values.
 */
#define ZS_ALIGN		8

/*
 * A single 'zspage' is composed of up to 2^N discontiguous 0-order (single)
 * pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N.
 */
#define ZS_MAX_ZSPAGE_ORDER 2
#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)

73
74
#define ZS_HANDLE_SIZE (sizeof(unsigned long))

75
76
/*
 * Object location (<PFN>, <obj_idx>) is encoded as
Nitin Cupta's avatar
Nitin Cupta committed
77
 * as single (unsigned long) handle value.
78
 *
Minchan Kim's avatar
Minchan Kim committed
79
 * Note that object index <obj_idx> starts from 0.
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
 *
 * This is made more complicated by various memory models and PAE.
 */

#ifndef MAX_PHYSMEM_BITS
#ifdef CONFIG_HIGHMEM64G
#define MAX_PHYSMEM_BITS 36
#else /* !CONFIG_HIGHMEM64G */
/*
 * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
 * be PAGE_SHIFT
 */
#define MAX_PHYSMEM_BITS BITS_PER_LONG
#endif
#endif
#define _PFN_BITS		(MAX_PHYSMEM_BITS - PAGE_SHIFT)
Minchan Kim's avatar
Minchan Kim committed
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115

/*
 * Memory for allocating for handle keeps object position by
 * encoding <page, obj_idx> and the encoded value has a room
 * in least bit(ie, look at obj_to_location).
 * We use the bit to synchronize between object access by
 * user and migration.
 */
#define HANDLE_PIN_BIT	0

/*
 * Head in allocated object should have OBJ_ALLOCATED_TAG
 * to identify the object was allocated or not.
 * It's okay to add the status bit in the least bit because
 * header keeps handle which is 4byte-aligned address so we
 * have room for two bit at least.
 */
#define OBJ_ALLOCATED_TAG 1
#define OBJ_TAG_BITS 1
#define OBJ_INDEX_BITS	(BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
116
117
118
119
120
121
#define OBJ_INDEX_MASK	((_AC(1, UL) << OBJ_INDEX_BITS) - 1)

#define MAX(a, b) ((a) >= (b) ? (a) : (b))
/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
#define ZS_MIN_ALLOC_SIZE \
	MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
122
/* each chunk includes extra space to keep handle */
123
#define ZS_MAX_ALLOC_SIZE	PAGE_SIZE
124
125

/*
126
 * On systems with 4K page size, this gives 255 size classes! There is a
127
128
129
130
131
132
133
134
135
136
137
 * trader-off here:
 *  - Large number of size classes is potentially wasteful as free page are
 *    spread across these classes
 *  - Small number of size classes causes large internal fragmentation
 *  - Probably its better to use specific size classes (empirically
 *    determined). NOTE: all those class sizes must be set as multiple of
 *    ZS_ALIGN to make sure link_free itself never has to span 2 pages.
 *
 *  ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN
 *  (reason above)
 */
138
#define ZS_SIZE_CLASS_DELTA	(PAGE_SIZE >> CLASS_BITS)
139
140
141
142
143
144

/*
 * We do not maintain any list for completely empty or full pages
 */
enum fullness_group {
	ZS_EMPTY,
Minchan Kim's avatar
Minchan Kim committed
145
146
147
148
	ZS_ALMOST_EMPTY,
	ZS_ALMOST_FULL,
	ZS_FULL,
	NR_ZS_FULLNESS,
149
150
};

151
enum zs_stat_type {
Minchan Kim's avatar
Minchan Kim committed
152
153
154
155
	CLASS_EMPTY,
	CLASS_ALMOST_EMPTY,
	CLASS_ALMOST_FULL,
	CLASS_FULL,
156
157
	OBJ_ALLOCATED,
	OBJ_USED,
Minchan Kim's avatar
Minchan Kim committed
158
	NR_ZS_STAT_TYPE,
159
160
161
162
163
164
};

struct zs_size_stat {
	unsigned long objs[NR_ZS_STAT_TYPE];
};

165
166
#ifdef CONFIG_ZSMALLOC_STAT
static struct dentry *zs_stat_root;
167
168
#endif

Minchan Kim's avatar
Minchan Kim committed
169
170
171
172
#ifdef CONFIG_COMPACTION
static struct vfsmount *zsmalloc_mnt;
#endif

173
174
175
176
177
/*
 * number of size_classes
 */
static int zs_size_classes;

178
179
180
181
182
/*
 * We assign a page to ZS_ALMOST_EMPTY fullness group when:
 *	n <= N / f, where
 * n = number of allocated objects
 * N = total number of objects zspage can store
183
 * f = fullness_threshold_frac
184
185
186
187
188
189
190
191
192
193
194
 *
 * Similarly, we assign zspage to:
 *	ZS_ALMOST_FULL	when n > N / f
 *	ZS_EMPTY	when n == 0
 *	ZS_FULL		when n == N
 *
 * (see: fix_fullness_group())
 */
static const int fullness_threshold_frac = 4;

struct size_class {
195
	spinlock_t lock;
Minchan Kim's avatar
Minchan Kim committed
196
	struct list_head fullness_list[NR_ZS_FULLNESS];
197
198
199
200
201
	/*
	 * Size of objects stored in this class. Must be multiple
	 * of ZS_ALIGN.
	 */
	int size;
202
	int objs_per_zspage;
203
204
	/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
	int pages_per_zspage;
Minchan Kim's avatar
Minchan Kim committed
205
206
207

	unsigned int index;
	struct zs_size_stat stats;
208
209
};

Minchan Kim's avatar
Minchan Kim committed
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
static void SetPageHugeObject(struct page *page)
{
	SetPageOwnerPriv1(page);
}

static void ClearPageHugeObject(struct page *page)
{
	ClearPageOwnerPriv1(page);
}

static int PageHugeObject(struct page *page)
{
	return PageOwnerPriv1(page);
}

226
227
/*
 * Placed within free objects to form a singly linked list.
228
 * For every zspage, zspage->freeobj gives head of this list.
229
230
231
232
 *
 * This must be power of 2 and less than or equal to ZS_ALIGN
 */
struct link_free {
233
234
	union {
		/*
Minchan Kim's avatar
Minchan Kim committed
235
		 * Free object index;
236
237
		 * It's valid for non-allocated object
		 */
Minchan Kim's avatar
Minchan Kim committed
238
		unsigned long next;
239
240
241
242
243
		/*
		 * Handle of allocated object.
		 */
		unsigned long handle;
	};
244
245
246
};

struct zs_pool {
247
	const char *name;
248

249
	struct size_class **size_class;
250
	struct kmem_cache *handle_cachep;
251
	struct kmem_cache *zspage_cachep;
252

253
	atomic_long_t pages_allocated;
254

255
	struct zs_pool_stats stats;
256
257
258
259
260
261
262
263

	/* Compact classes */
	struct shrinker shrinker;
	/*
	 * To signify that register_shrinker() was successful
	 * and unregister_shrinker() will not Oops.
	 */
	bool shrinker_enabled;
264
265
266
#ifdef CONFIG_ZSMALLOC_STAT
	struct dentry *stat_dentry;
#endif
Minchan Kim's avatar
Minchan Kim committed
267
268
269
270
#ifdef CONFIG_COMPACTION
	struct inode *inode;
	struct work_struct free_work;
#endif
271
};
272
273
274
275
276

/*
 * A zspage's class index and fullness group
 * are encoded in its (first)page->mapping
 */
277
278
#define FULLNESS_BITS	2
#define CLASS_BITS	8
Minchan Kim's avatar
Minchan Kim committed
279
280
#define ISOLATED_BITS	3
#define MAGIC_VAL_BITS	8
Minchan Kim's avatar
Minchan Kim committed
281

282
283
284
285
struct zspage {
	struct {
		unsigned int fullness:FULLNESS_BITS;
		unsigned int class:CLASS_BITS;
Minchan Kim's avatar
Minchan Kim committed
286
287
		unsigned int isolated:ISOLATED_BITS;
		unsigned int magic:MAGIC_VAL_BITS;
288
289
	};
	unsigned int inuse;
Minchan Kim's avatar
Minchan Kim committed
290
	unsigned int freeobj;
291
292
	struct page *first_page;
	struct list_head list; /* fullness list */
Minchan Kim's avatar
Minchan Kim committed
293
294
295
#ifdef CONFIG_COMPACTION
	rwlock_t lock;
#endif
296
};
297

298
struct mapping_area {
299
#ifdef CONFIG_PGTABLE_MAPPING
300
301
302
303
304
305
306
307
	struct vm_struct *vm; /* vm area for mapping object that span pages */
#else
	char *vm_buf; /* copy buffer for objects that span pages */
#endif
	char *vm_addr; /* address of kmap_atomic()'ed pages */
	enum zs_mapmode vm_mm; /* mapping mode */
};

Minchan Kim's avatar
Minchan Kim committed
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
#ifdef CONFIG_COMPACTION
static int zs_register_migration(struct zs_pool *pool);
static void zs_unregister_migration(struct zs_pool *pool);
static void migrate_lock_init(struct zspage *zspage);
static void migrate_read_lock(struct zspage *zspage);
static void migrate_read_unlock(struct zspage *zspage);
static void kick_deferred_free(struct zs_pool *pool);
static void init_deferred_free(struct zs_pool *pool);
static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
#else
static int zsmalloc_mount(void) { return 0; }
static void zsmalloc_unmount(void) {}
static int zs_register_migration(struct zs_pool *pool) { return 0; }
static void zs_unregister_migration(struct zs_pool *pool) {}
static void migrate_lock_init(struct zspage *zspage) {}
static void migrate_read_lock(struct zspage *zspage) {}
static void migrate_read_unlock(struct zspage *zspage) {}
static void kick_deferred_free(struct zs_pool *pool) {}
static void init_deferred_free(struct zs_pool *pool) {}
static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
#endif

330
static int create_cache(struct zs_pool *pool)
331
332
333
{
	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
					0, 0, NULL);
334
335
336
337
338
339
340
341
342
343
344
345
	if (!pool->handle_cachep)
		return 1;

	pool->zspage_cachep = kmem_cache_create("zspage", sizeof(struct zspage),
					0, 0, NULL);
	if (!pool->zspage_cachep) {
		kmem_cache_destroy(pool->handle_cachep);
		pool->handle_cachep = NULL;
		return 1;
	}

	return 0;
346
347
}

348
static void destroy_cache(struct zs_pool *pool)
349
{
350
	kmem_cache_destroy(pool->handle_cachep);
351
	kmem_cache_destroy(pool->zspage_cachep);
352
353
}

354
static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
355
356
{
	return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
Minchan Kim's avatar
Minchan Kim committed
357
			gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
358
359
}

360
static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
361
362
363
364
{
	kmem_cache_free(pool->handle_cachep, (void *)handle);
}

365
366
static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
{
Minchan Kim's avatar
Minchan Kim committed
367
368
	return kmem_cache_alloc(pool->zspage_cachep,
			flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
369
370
371
372
373
374
375
};

static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
{
	kmem_cache_free(pool->zspage_cachep, zspage);
}

376
377
static void record_obj(unsigned long handle, unsigned long obj)
{
378
379
380
381
382
383
	/*
	 * lsb of @obj represents handle lock while other bits
	 * represent object value the handle is pointing so
	 * updating shouldn't do store tearing.
	 */
	WRITE_ONCE(*(unsigned long *)handle, obj);
384
385
}

386
387
388
389
/* zpool driver */

#ifdef CONFIG_ZPOOL

390
static void *zs_zpool_create(const char *name, gfp_t gfp,
391
			     const struct zpool_ops *zpool_ops,
Dan Streetman's avatar
Dan Streetman committed
392
			     struct zpool *zpool)
393
{
394
395
396
397
398
399
	/*
	 * Ignore global gfp flags: zs_malloc() may be invoked from
	 * different contexts and its caller must provide a valid
	 * gfp mask.
	 */
	return zs_create_pool(name);
400
401
402
403
404
405
406
407
408
409
}

static void zs_zpool_destroy(void *pool)
{
	zs_destroy_pool(pool);
}

static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
			unsigned long *handle)
{
410
	*handle = zs_malloc(pool, size, gfp);
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
	return *handle ? 0 : -1;
}
static void zs_zpool_free(void *pool, unsigned long handle)
{
	zs_free(pool, handle);
}

static int zs_zpool_shrink(void *pool, unsigned int pages,
			unsigned int *reclaimed)
{
	return -EINVAL;
}

static void *zs_zpool_map(void *pool, unsigned long handle,
			enum zpool_mapmode mm)
{
	enum zs_mapmode zs_mm;

	switch (mm) {
	case ZPOOL_MM_RO:
		zs_mm = ZS_MM_RO;
		break;
	case ZPOOL_MM_WO:
		zs_mm = ZS_MM_WO;
		break;
	case ZPOOL_MM_RW: /* fallthru */
	default:
		zs_mm = ZS_MM_RW;
		break;
	}

	return zs_map_object(pool, handle, zs_mm);
}
static void zs_zpool_unmap(void *pool, unsigned long handle)
{
	zs_unmap_object(pool, handle);
}

static u64 zs_zpool_total_size(void *pool)
{
451
	return zs_get_total_pages(pool) << PAGE_SHIFT;
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
}

static struct zpool_driver zs_zpool_driver = {
	.type =		"zsmalloc",
	.owner =	THIS_MODULE,
	.create =	zs_zpool_create,
	.destroy =	zs_zpool_destroy,
	.malloc =	zs_zpool_malloc,
	.free =		zs_zpool_free,
	.shrink =	zs_zpool_shrink,
	.map =		zs_zpool_map,
	.unmap =	zs_zpool_unmap,
	.total_size =	zs_zpool_total_size,
};

467
MODULE_ALIAS("zpool-zsmalloc");
468
469
#endif /* CONFIG_ZPOOL */

470
471
472
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
static DEFINE_PER_CPU(struct mapping_area, zs_map_area);

Minchan Kim's avatar
Minchan Kim committed
473
474
475
476
477
static bool is_zspage_isolated(struct zspage *zspage)
{
	return zspage->isolated;
}

478
479
static int is_first_page(struct page *page)
{
480
	return PagePrivate(page);
481
482
}

Minchan Kim's avatar
Minchan Kim committed
483
/* Protected by class->lock */
484
static inline int get_zspage_inuse(struct zspage *zspage)
Minchan Kim's avatar
Minchan Kim committed
485
{
486
	return zspage->inuse;
Minchan Kim's avatar
Minchan Kim committed
487
488
}

489
static inline void set_zspage_inuse(struct zspage *zspage, int val)
Minchan Kim's avatar
Minchan Kim committed
490
{
491
	zspage->inuse = val;
Minchan Kim's avatar
Minchan Kim committed
492
493
}

494
static inline void mod_zspage_inuse(struct zspage *zspage, int val)
Minchan Kim's avatar
Minchan Kim committed
495
{
496
	zspage->inuse += val;
Minchan Kim's avatar
Minchan Kim committed
497
498
}

Minchan Kim's avatar
Minchan Kim committed
499
static inline struct page *get_first_page(struct zspage *zspage)
Minchan Kim's avatar
Minchan Kim committed
500
{
Minchan Kim's avatar
Minchan Kim committed
501
	struct page *first_page = zspage->first_page;
502

Minchan Kim's avatar
Minchan Kim committed
503
504
	VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
	return first_page;
Minchan Kim's avatar
Minchan Kim committed
505
506
}

Minchan Kim's avatar
Minchan Kim committed
507
static inline int get_first_obj_offset(struct page *page)
Minchan Kim's avatar
Minchan Kim committed
508
{
Minchan Kim's avatar
Minchan Kim committed
509
510
	return page->units;
}
511

Minchan Kim's avatar
Minchan Kim committed
512
513
514
static inline void set_first_obj_offset(struct page *page, int offset)
{
	page->units = offset;
Minchan Kim's avatar
Minchan Kim committed
515
516
}

Minchan Kim's avatar
Minchan Kim committed
517
static inline unsigned int get_freeobj(struct zspage *zspage)
Minchan Kim's avatar
Minchan Kim committed
518
{
Minchan Kim's avatar
Minchan Kim committed
519
	return zspage->freeobj;
Minchan Kim's avatar
Minchan Kim committed
520
521
}

Minchan Kim's avatar
Minchan Kim committed
522
static inline void set_freeobj(struct zspage *zspage, unsigned int obj)
Minchan Kim's avatar
Minchan Kim committed
523
{
Minchan Kim's avatar
Minchan Kim committed
524
	zspage->freeobj = obj;
Minchan Kim's avatar
Minchan Kim committed
525
526
}

527
static void get_zspage_mapping(struct zspage *zspage,
528
				unsigned int *class_idx,
529
530
				enum fullness_group *fullness)
{
Minchan Kim's avatar
Minchan Kim committed
531
532
	BUG_ON(zspage->magic != ZSPAGE_MAGIC);

533
534
	*fullness = zspage->fullness;
	*class_idx = zspage->class;
535
536
}

537
static void set_zspage_mapping(struct zspage *zspage,
538
				unsigned int class_idx,
539
540
				enum fullness_group fullness)
{
541
542
	zspage->class = class_idx;
	zspage->fullness = fullness;
543
544
}

Nitin Cupta's avatar
Nitin Cupta committed
545
546
547
548
549
550
551
/*
 * zsmalloc divides the pool into various size classes where each
 * class maintains a list of zspages where each zspage is divided
 * into equal sized chunks. Each allocation falls into one of these
 * classes depending on its size. This function returns index of the
 * size class which has chunk size big enough to hold the give size.
 */
552
553
554
555
556
557
558
559
static int get_size_class_index(int size)
{
	int idx = 0;

	if (likely(size > ZS_MIN_ALLOC_SIZE))
		idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,
				ZS_SIZE_CLASS_DELTA);

560
	return min(zs_size_classes - 1, idx);
561
562
}

Minchan Kim's avatar
Minchan Kim committed
563
564
565
static inline void zs_stat_inc(struct size_class *class,
				enum zs_stat_type type, unsigned long cnt)
{
Minchan Kim's avatar
Minchan Kim committed
566
	class->stats.objs[type] += cnt;
Minchan Kim's avatar
Minchan Kim committed
567
568
569
570
571
}

static inline void zs_stat_dec(struct size_class *class,
				enum zs_stat_type type, unsigned long cnt)
{
Minchan Kim's avatar
Minchan Kim committed
572
	class->stats.objs[type] -= cnt;
Minchan Kim's avatar
Minchan Kim committed
573
574
575
576
577
}

static inline unsigned long zs_stat_get(struct size_class *class,
				enum zs_stat_type type)
{
Minchan Kim's avatar
Minchan Kim committed
578
	return class->stats.objs[type];
Minchan Kim's avatar
Minchan Kim committed
579
580
}

581
582
#ifdef CONFIG_ZSMALLOC_STAT

583
static void __init zs_stat_init(void)
Minchan Kim's avatar
Minchan Kim committed
584
{
585
586
587
588
	if (!debugfs_initialized()) {
		pr_warn("debugfs not available, stat dir not created\n");
		return;
	}
Minchan Kim's avatar
Minchan Kim committed
589
590
591

	zs_stat_root = debugfs_create_dir("zsmalloc", NULL);
	if (!zs_stat_root)
592
		pr_warn("debugfs 'zsmalloc' stat dir creation failed\n");
Minchan Kim's avatar
Minchan Kim committed
593
594
595
596
597
598
599
}

static void __exit zs_stat_exit(void)
{
	debugfs_remove_recursive(zs_stat_root);
}

600
601
static unsigned long zs_can_compact(struct size_class *class);

Minchan Kim's avatar
Minchan Kim committed
602
603
604
605
606
607
608
static int zs_stats_size_show(struct seq_file *s, void *v)
{
	int i;
	struct zs_pool *pool = s->private;
	struct size_class *class;
	int objs_per_zspage;
	unsigned long class_almost_full, class_almost_empty;
609
	unsigned long obj_allocated, obj_used, pages_used, freeable;
Minchan Kim's avatar
Minchan Kim committed
610
611
	unsigned long total_class_almost_full = 0, total_class_almost_empty = 0;
	unsigned long total_objs = 0, total_used_objs = 0, total_pages = 0;
612
	unsigned long total_freeable = 0;
Minchan Kim's avatar
Minchan Kim committed
613

614
	seq_printf(s, " %5s %5s %11s %12s %13s %10s %10s %16s %8s\n",
Minchan Kim's avatar
Minchan Kim committed
615
616
			"class", "size", "almost_full", "almost_empty",
			"obj_allocated", "obj_used", "pages_used",
617
			"pages_per_zspage", "freeable");
Minchan Kim's avatar
Minchan Kim committed
618
619
620
621
622
623
624
625
626
627
628
629

	for (i = 0; i < zs_size_classes; i++) {
		class = pool->size_class[i];

		if (class->index != i)
			continue;

		spin_lock(&class->lock);
		class_almost_full = zs_stat_get(class, CLASS_ALMOST_FULL);
		class_almost_empty = zs_stat_get(class, CLASS_ALMOST_EMPTY);
		obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
		obj_used = zs_stat_get(class, OBJ_USED);
630
		freeable = zs_can_compact(class);
Minchan Kim's avatar
Minchan Kim committed
631
632
		spin_unlock(&class->lock);

633
		objs_per_zspage = class->objs_per_zspage;
Minchan Kim's avatar
Minchan Kim committed
634
635
636
		pages_used = obj_allocated / objs_per_zspage *
				class->pages_per_zspage;

637
638
		seq_printf(s, " %5u %5u %11lu %12lu %13lu"
				" %10lu %10lu %16d %8lu\n",
Minchan Kim's avatar
Minchan Kim committed
639
640
			i, class->size, class_almost_full, class_almost_empty,
			obj_allocated, obj_used, pages_used,
641
			class->pages_per_zspage, freeable);
Minchan Kim's avatar
Minchan Kim committed
642
643
644
645
646
647

		total_class_almost_full += class_almost_full;
		total_class_almost_empty += class_almost_empty;
		total_objs += obj_allocated;
		total_used_objs += obj_used;
		total_pages += pages_used;
648
		total_freeable += freeable;
Minchan Kim's avatar
Minchan Kim committed
649
650
651
	}

	seq_puts(s, "\n");
652
	seq_printf(s, " %5s %5s %11lu %12lu %13lu %10lu %10lu %16s %8lu\n",
Minchan Kim's avatar
Minchan Kim committed
653
654
			"Total", "", total_class_almost_full,
			total_class_almost_empty, total_objs,
655
			total_used_objs, total_pages, "", total_freeable);
Minchan Kim's avatar
Minchan Kim committed
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671

	return 0;
}

static int zs_stats_size_open(struct inode *inode, struct file *file)
{
	return single_open(file, zs_stats_size_show, inode->i_private);
}

static const struct file_operations zs_stat_size_ops = {
	.open           = zs_stats_size_open,
	.read           = seq_read,
	.llseek         = seq_lseek,
	.release        = single_release,
};

672
static void zs_pool_stat_create(struct zs_pool *pool, const char *name)
Minchan Kim's avatar
Minchan Kim committed
673
674
675
{
	struct dentry *entry;

676
677
	if (!zs_stat_root) {
		pr_warn("no root stat dir, not creating <%s> stat dir\n", name);
678
		return;
679
	}
Minchan Kim's avatar
Minchan Kim committed
680
681
682
683

	entry = debugfs_create_dir(name, zs_stat_root);
	if (!entry) {
		pr_warn("debugfs dir <%s> creation failed\n", name);
684
		return;
Minchan Kim's avatar
Minchan Kim committed
685
686
687
688
689
690
691
692
	}
	pool->stat_dentry = entry;

	entry = debugfs_create_file("classes", S_IFREG | S_IRUGO,
			pool->stat_dentry, pool, &zs_stat_size_ops);
	if (!entry) {
		pr_warn("%s: debugfs file entry <%s> creation failed\n",
				name, "classes");
693
694
		debugfs_remove_recursive(pool->stat_dentry);
		pool->stat_dentry = NULL;
Minchan Kim's avatar
Minchan Kim committed
695
696
697
698
699
700
701
702
703
	}
}

static void zs_pool_stat_destroy(struct zs_pool *pool)
{
	debugfs_remove_recursive(pool->stat_dentry);
}

#else /* CONFIG_ZSMALLOC_STAT */
704
static void __init zs_stat_init(void)
Minchan Kim's avatar
Minchan Kim committed
705
706
707
708
709
710
711
{
}

static void __exit zs_stat_exit(void)
{
}

712
static inline void zs_pool_stat_create(struct zs_pool *pool, const char *name)
Minchan Kim's avatar
Minchan Kim committed
713
714
715
716
717
718
719
720
{
}

static inline void zs_pool_stat_destroy(struct zs_pool *pool)
{
}
#endif

Minchan Kim's avatar
Minchan Kim committed
721

Nitin Cupta's avatar
Nitin Cupta committed
722
723
724
725
726
727
728
/*
 * For each size class, zspages are divided into different groups
 * depending on how "full" they are. This was done so that we could
 * easily find empty or nearly empty zspages when we try to shrink
 * the pool (not yet implemented). This function returns fullness
 * status of the given page.
 */
729
static enum fullness_group get_fullness_group(struct size_class *class,
730
						struct zspage *zspage)
731
{
732
	int inuse, objs_per_zspage;
733
	enum fullness_group fg;
Minchan Kim's avatar
Minchan Kim committed
734

735
	inuse = get_zspage_inuse(zspage);
736
	objs_per_zspage = class->objs_per_zspage;
737
738
739

	if (inuse == 0)
		fg = ZS_EMPTY;
740
	else if (inuse == objs_per_zspage)
741
		fg = ZS_FULL;
742
	else if (inuse <= 3 * objs_per_zspage / fullness_threshold_frac)
743
744
745
746
747
748
749
		fg = ZS_ALMOST_EMPTY;
	else
		fg = ZS_ALMOST_FULL;

	return fg;
}

Nitin Cupta's avatar
Nitin Cupta committed
750
751
752
753
754
755
/*
 * Each size class maintains various freelists and zspages are assigned
 * to one of these freelists based on the number of live objects they
 * have. This functions inserts the given zspage into the freelist
 * identified by <class, fullness_group>.
 */
756
static void insert_zspage(struct size_class *class,
757
758
				struct zspage *zspage,
				enum fullness_group fullness)
759
{
760
	struct zspage *head;
761

Minchan Kim's avatar
Minchan Kim committed
762
	zs_stat_inc(class, fullness, 1);
763
764
	head = list_first_entry_or_null(&class->fullness_list[fullness],
					struct zspage, list);
765
	/*
766
767
	 * We want to see more ZS_FULL pages and less almost empty/full.
	 * Put pages with higher ->inuse first.
768
	 */
769
770
771
772
773
774
775
	if (head) {
		if (get_zspage_inuse(zspage) < get_zspage_inuse(head)) {
			list_add(&zspage->list, &head->list);
			return;
		}
	}
	list_add(&zspage->list, &class->fullness_list[fullness]);
776
777
}

Nitin Cupta's avatar
Nitin Cupta committed
778
779
780
781
/*
 * This function removes the given zspage from the freelist identified
 * by <class, fullness_group>.
 */
782
static void remove_zspage(struct size_class *class,
783
784
				struct zspage *zspage,
				enum fullness_group fullness)
785
{
786
	VM_BUG_ON(list_empty(&class->fullness_list[fullness]));
Minchan Kim's avatar
Minchan Kim committed
787
	VM_BUG_ON(is_zspage_isolated(zspage));
788

789
	list_del_init(&zspage->list);
Minchan Kim's avatar
Minchan Kim committed
790
	zs_stat_dec(class, fullness, 1);
791
792
}

Nitin Cupta's avatar
Nitin Cupta committed
793
794
795
796
797
798
799
800
801
/*
 * Each size class maintains zspages in different fullness groups depending
 * on the number of live objects they contain. When allocating or freeing
 * objects, the fullness status of the page can change, say, from ALMOST_FULL
 * to ALMOST_EMPTY when freeing an object. This function checks if such
 * a status change has occurred for the given page and accordingly moves the
 * page from the freelist of the old fullness group to that of the new
 * fullness group.
 */
802
static enum fullness_group fix_fullness_group(struct size_class *class,
803
						struct zspage *zspage)
804
805
806
807
{
	int class_idx;
	enum fullness_group currfg, newfg;

808
809
	get_zspage_mapping(zspage, &class_idx, &currfg);
	newfg = get_fullness_group(class, zspage);
810
811
812
	if (newfg == currfg)
		goto out;

Minchan Kim's avatar
Minchan Kim committed
813
814
815
816
817
	if (!is_zspage_isolated(zspage)) {
		remove_zspage(class, zspage, currfg);
		insert_zspage(class, zspage, newfg);
	}

818
	set_zspage_mapping(zspage, class_idx, newfg);
819
820
821
822
823
824
825
826
827
828

out:
	return newfg;
}

/*
 * We have to decide on how many pages to link together
 * to form a zspage for each size class. This is important
 * to reduce wastage due to unusable space left at end of
 * each zspage which is given as:
829
830
 *     wastage = Zp % class_size
 *     usage = Zp - wastage
831
832
833
834
835
836
 * where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ...
 *
 * For example, for size class of 3/8 * PAGE_SIZE, we should
 * link together 3 PAGE_SIZE sized pages to form a zspage
 * since then we can perfectly fit in 8 such objects.
 */
837
static int get_pages_per_zspage(int class_size)
838
839
840
841
842
{
	int i, max_usedpc = 0;
	/* zspage order which gives maximum used size per KB */
	int max_usedpc_order = 1;

843
	for (i = 1; i <= ZS_MAX_PAGES_PER_ZSPAGE; i++) {
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
		int zspage_size;
		int waste, usedpc;

		zspage_size = i * PAGE_SIZE;
		waste = zspage_size % class_size;
		usedpc = (zspage_size - waste) * 100 / zspage_size;

		if (usedpc > max_usedpc) {
			max_usedpc = usedpc;
			max_usedpc_order = i;
		}
	}

	return max_usedpc_order;
}

860
static struct zspage *get_zspage(struct page *page)
861
{
Minchan Kim's avatar
Minchan Kim committed
862
863
864
865
	struct zspage *zspage = (struct zspage *)page->private;

	BUG_ON(zspage->magic != ZSPAGE_MAGIC);
	return zspage;
866
867
868
869
}

static struct page *get_next_page(struct page *page)
{
Minchan Kim's avatar
Minchan Kim committed
870
871
872
873
	if (unlikely(PageHugeObject(page)))
		return NULL;

	return page->freelist;
874
875
}

Minchan Kim's avatar
Minchan Kim committed
876
877
878
879
/**
 * obj_to_location - get (<page>, <obj_idx>) from encoded object value
 * @page: page object resides in zspage
 * @obj_idx: object index
880
 */
Minchan Kim's avatar
Minchan Kim committed
881
882
static void obj_to_location(unsigned long obj, struct page **page,
				unsigned int *obj_idx)
883
{
Minchan Kim's avatar
Minchan Kim committed
884
885
886
887
	obj >>= OBJ_TAG_BITS;
	*page = pfn_to_page(obj >> OBJ_INDEX_BITS);
	*obj_idx = (obj & OBJ_INDEX_MASK);
}
888

Minchan Kim's avatar
Minchan Kim committed
889
890
891
892
893
894
895
896
/**
 * location_to_obj - get obj value encoded from (<page>, <obj_idx>)
 * @page: page object resides in zspage
 * @obj_idx: object index
 */
static unsigned long location_to_obj(struct page *page, unsigned int obj_idx)
{
	unsigned long obj;
897

Minchan Kim's avatar
Minchan Kim committed
898
	obj = page_to_pfn(page) << OBJ_INDEX_BITS;
Minchan Kim's avatar
Minchan Kim committed
899
	obj |= obj_idx & OBJ_INDEX_MASK;
Minchan Kim's avatar
Minchan Kim committed
900
	obj <<= OBJ_TAG_BITS;
901

Minchan Kim's avatar
Minchan Kim committed
902
	return obj;
903
904
}

905
906
907
908
909
static unsigned long handle_to_obj(unsigned long handle)
{
	return *(unsigned long *)handle;
}

Minchan Kim's avatar
Minchan Kim committed
910
static unsigned long obj_to_head(struct page *page, void *obj)
Minchan Kim's avatar
Minchan Kim committed
911
{
Minchan Kim's avatar
Minchan Kim committed
912
	if (unlikely(PageHugeObject(page))) {
Minchan Kim's avatar
Minchan Kim committed
913
		VM_BUG_ON_PAGE(!is_first_page(page), page);
914
		return page->index;
915
916
	} else
		return *(unsigned long *)obj;
Minchan Kim's avatar
Minchan Kim committed
917
918
}

Minchan Kim's avatar
Minchan Kim committed
919
920
921
922
923
static inline int testpin_tag(unsigned long handle)
{
	return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
}

Minchan Kim's avatar
Minchan Kim committed
924
925
static inline int trypin_tag(unsigned long handle)
{
Minchan Kim's avatar
Minchan Kim committed
926
	return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
Minchan Kim's avatar
Minchan Kim committed
927
928
929
930
}

static void pin_tag(unsigned long handle)
{
Minchan Kim's avatar
Minchan Kim committed
931
	bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle);
Minchan Kim's avatar
Minchan Kim committed
932
933
934
935
}

static void unpin_tag(unsigned long handle)
{
Minchan Kim's avatar
Minchan Kim committed
936
	bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle);
Minchan Kim's avatar
Minchan Kim committed
937
938
}

939
940
static void reset_page(struct page *page)
{
Minchan Kim's avatar
Minchan Kim committed
941
	__ClearPageMovable(page);
942
943
944
	clear_bit(PG_private, &page->flags);
	clear_bit(PG_private_2, &page->flags);
	set_page_private(page, 0);
Minchan Kim's avatar
Minchan Kim committed
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
	page_mapcount_reset(page);
	ClearPageHugeObject(page);
	page->freelist = NULL;
}

/*
 * To prevent zspage destroy during migration, zspage freeing should
 * hold locks of all pages in the zspage.
 */
void lock_zspage(struct zspage *zspage)
{
	struct page *page = get_first_page(zspage);

	do {
		lock_page(page);
	} while ((page = get_next_page(page)) != NULL);
}

int trylock_zspage(struct zspage *zspage)
{
	struct page *cursor, *fail;

	for (cursor = get_first_page(zspage); cursor != NULL; cursor =
					get_next_page(cursor)) {
		if (!trylock_page(cursor)) {
			fail = cursor;
			goto unlock;
		}
	}

	return 1;
unlock:
	for (cursor = get_first_page(zspage); cursor != fail; cursor =
					get_next_page(cursor))
		unlock_page(cursor);

	return 0;
982
983
}

Minchan Kim's avatar
Minchan Kim committed
984
985
static void __free_zspage(struct zs_pool *pool, struct size_class *class,
				struct zspage *zspage)
986
{
987
	struct page *page, *next;
Minchan Kim's avatar
Minchan Kim committed
988
989
990
991
992
993
	enum fullness_group fg;
	unsigned int class_idx;

	get_zspage_mapping(zspage, &class_idx, &fg);

	assert_spin_locked(&class->lock);
994

995
	VM_BUG_ON(get_zspage_inuse(zspage));
Minchan Kim's avatar
Minchan Kim committed
996
	VM_BUG_ON(fg != ZS_EMPTY);
997

Minchan Kim's avatar
Minchan Kim committed
998
	next = page = get_first_page(zspage);
999
	do {
Minchan Kim's avatar
Minchan Kim committed
1000
		VM_BUG_ON_PAGE(!PageLocked(page), page);
For faster browsing, not all history is shown. View entire blame