kmemleak.c 56.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/*
 * mm/kmemleak.c
 *
 * Copyright (C) 2008 ARM Limited
 * Written by Catalin Marinas <catalin.marinas@arm.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 *
 * For more information on the algorithm and kmemleak usage, please see
22
 * Documentation/dev-tools/kmemleak.rst.
23
24
25
26
27
28
29
30
31
 *
 * Notes on locking
 * ----------------
 *
 * The following locks and mutexes are used by kmemleak:
 *
 * - kmemleak_lock (rwlock): protects the object_list modifications and
 *   accesses to the object_tree_root. The object_list is the main list
 *   holding the metadata (struct kmemleak_object) for the allocated memory
32
 *   blocks. The object_tree_root is a red black tree used to look-up
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
 *   metadata based on a pointer to the corresponding memory block.  The
 *   kmemleak_object structures are added to the object_list and
 *   object_tree_root in the create_object() function called from the
 *   kmemleak_alloc() callback and removed in delete_object() called from the
 *   kmemleak_free() callback
 * - kmemleak_object.lock (spinlock): protects a kmemleak_object. Accesses to
 *   the metadata (e.g. count) are protected by this lock. Note that some
 *   members of this structure may be protected by other means (atomic or
 *   kmemleak_lock). This lock is also held when scanning the corresponding
 *   memory block to avoid the kernel freeing it via the kmemleak_free()
 *   callback. This is less heavyweight than holding a global lock like
 *   kmemleak_lock during scanning
 * - scan_mutex (mutex): ensures that only one thread may scan the memory for
 *   unreferenced objects at a time. The gray_list contains the objects which
 *   are already referenced or marked as false positives and need to be
 *   scanned. This list is only modified during a scanning episode when the
 *   scan_mutex is held. At the end of a scan, the gray_list is always empty.
 *   Note that the kmemleak_object.use_count is incremented when an object is
51
52
53
54
 *   added to the gray_list and therefore cannot be freed. This mutex also
 *   prevents multiple users of the "kmemleak" debugfs file together with
 *   modifications to the memory scanning parameters including the scan_thread
 *   pointer
55
 *
56
 * Locks and mutexes are acquired/nested in the following order:
57
 *
58
59
60
61
 *   scan_mutex [-> object->lock] -> kmemleak_lock -> other_object->lock (SINGLE_DEPTH_NESTING)
 *
 * No kmemleak_lock and object->lock nesting is allowed outside scan_mutex
 * regions.
62
 *
63
64
65
66
67
68
69
70
 * The kmemleak_object structures have a use_count incremented or decremented
 * using the get_object()/put_object() functions. When the use_count becomes
 * 0, this count can no longer be incremented and put_object() schedules the
 * kmemleak_object freeing via an RCU callback. All calls to the get_object()
 * function must be protected by rcu_read_lock() to avoid accessing a freed
 * structure.
 */

Joe Perches's avatar
Joe Perches committed
71
72
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

73
74
75
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/list.h>
76
#include <linux/sched/signal.h>
77
#include <linux/sched/task.h>
78
#include <linux/sched/task_stack.h>
79
80
#include <linux/jiffies.h>
#include <linux/delay.h>
81
#include <linux/export.h>
82
#include <linux/kthread.h>
83
#include <linux/rbtree.h>
84
85
86
87
88
89
90
91
92
93
94
#include <linux/fs.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/cpumask.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/rcupdate.h>
#include <linux/stacktrace.h>
#include <linux/cache.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
95
96
#include <linux/bootmem.h>
#include <linux/pfn.h>
97
98
99
100
101
102
103
104
#include <linux/mmzone.h>
#include <linux/slab.h>
#include <linux/thread_info.h>
#include <linux/err.h>
#include <linux/uaccess.h>
#include <linux/string.h>
#include <linux/nodemask.h>
#include <linux/mm.h>
105
#include <linux/workqueue.h>
106
#include <linux/crc32.h>
107
108
109

#include <asm/sections.h>
#include <asm/processor.h>
Arun Sharma's avatar
Arun Sharma committed
110
#include <linux/atomic.h>
111

112
#include <linux/kasan.h>
113
#include <linux/kmemcheck.h>
114
#include <linux/kmemleak.h>
115
#include <linux/memory_hotplug.h>
116
117
118
119
120
121
122
123

/*
 * Kmemleak configuration and common defines.
 */
#define MAX_TRACE		16	/* stack trace length */
#define MSECS_MIN_AGE		5000	/* minimum object age for reporting */
#define SECS_FIRST_SCAN		60	/* delay before the first scan */
#define SECS_SCAN_WAIT		600	/* subsequent auto scanning delay */
124
#define MAX_SCAN_SIZE		4096	/* maximum size of a scanned block */
125
126
127

#define BYTES_PER_POINTER	sizeof(void *)

128
/* GFP bitmask for kmemleak internal allocations */
129
#define gfp_kmemleak_mask(gfp)	(((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \
130
131
				 __GFP_NORETRY | __GFP_NOMEMALLOC | \
				 __GFP_NOWARN)
132

133
134
135
/* scanning area inside a memory block */
struct kmemleak_scan_area {
	struct hlist_node node;
136
137
	unsigned long start;
	size_t size;
138
139
};

140
141
142
#define KMEMLEAK_GREY	0
#define KMEMLEAK_BLACK	-1

143
144
145
146
/*
 * Structure holding the metadata for each allocated memory block.
 * Modifications to such objects should be made while holding the
 * object->lock. Insertions or deletions from object_list, gray_list or
147
 * rb_node are already protected by the corresponding locks or mutex (see
148
149
150
151
152
153
154
155
 * the notes on locking above). These objects are reference-counted
 * (use_count) and freed using the RCU mechanism.
 */
struct kmemleak_object {
	spinlock_t lock;
	unsigned long flags;		/* object status flags */
	struct list_head object_list;
	struct list_head gray_list;
156
	struct rb_node rb_node;
157
158
159
160
161
162
163
164
165
	struct rcu_head rcu;		/* object_list lockless traversal */
	/* object usage count; object freed when use_count == 0 */
	atomic_t use_count;
	unsigned long pointer;
	size_t size;
	/* minimum number of a pointers found before it is considered leak */
	int min_count;
	/* the total number of pointers found pointing to this object */
	int count;
166
167
	/* checksum for detecting modified objects */
	u32 checksum;
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
	/* memory ranges to be scanned inside an object (empty for all) */
	struct hlist_head area_list;
	unsigned long trace[MAX_TRACE];
	unsigned int trace_len;
	unsigned long jiffies;		/* creation timestamp */
	pid_t pid;			/* pid of the current task */
	char comm[TASK_COMM_LEN];	/* executable name */
};

/* flag representing the memory block allocation status */
#define OBJECT_ALLOCATED	(1 << 0)
/* flag set after the first reporting of an unreference object */
#define OBJECT_REPORTED		(1 << 1)
/* flag set to not scan the object */
#define OBJECT_NO_SCAN		(1 << 2)

184
185
186
187
188
189
190
191
192
/* number of bytes to print per line; must be 16 or 32 */
#define HEX_ROW_SIZE		16
/* number of bytes to print at a time (1, 2, 4, 8) */
#define HEX_GROUP_SIZE		1
/* include ASCII after the hex output */
#define HEX_ASCII		1
/* max number of lines to be printed */
#define HEX_MAX_LINES		2

193
194
195
196
/* the list of all allocated objects */
static LIST_HEAD(object_list);
/* the list of gray-colored objects (see color_gray comment below) */
static LIST_HEAD(gray_list);
197
198
199
/* search tree for object boundaries */
static struct rb_root object_tree_root = RB_ROOT;
/* rw_lock protecting the access to object_list and object_tree_root */
200
201
202
203
204
205
206
static DEFINE_RWLOCK(kmemleak_lock);

/* allocation caches for kmemleak internal data */
static struct kmem_cache *object_cache;
static struct kmem_cache *scan_area_cache;

/* set if tracing memory operations is enabled */
207
static int kmemleak_enabled;
208
209
/* same as above but only for the kmemleak_free() callback */
static int kmemleak_free_enabled;
210
/* set in the late_initcall if there were no errors */
211
static int kmemleak_initialized;
212
/* enables or disables early logging of the memory operations */
213
static int kmemleak_early_log = 1;
214
/* set if a kmemleak warning was issued */
215
static int kmemleak_warning;
216
/* set if a fatal kmemleak error has occurred */
217
static int kmemleak_error;
218
219
220
221
222
223

/* minimum and maximum address that may be valid pointers */
static unsigned long min_addr = ULONG_MAX;
static unsigned long max_addr;

static struct task_struct *scan_thread;
224
/* used to avoid reporting of recently allocated objects */
225
static unsigned long jiffies_min_age;
226
static unsigned long jiffies_last_scan;
227
228
229
/* delay between automatic memory scannings */
static signed long jiffies_scan_wait;
/* enables or disables the task stacks scanning */
230
static int kmemleak_stack_scan = 1;
231
/* protects the memory scanning, parameters and debug/kmemleak file access */
232
static DEFINE_MUTEX(scan_mutex);
233
234
/* setting kmemleak=on, will set this var, skipping the disable */
static int kmemleak_skip_disable;
235
236
/* If there are leaks that can be reported */
static bool kmemleak_found_leaks;
237
238

/*
239
 * Early object allocation/freeing logging. Kmemleak is initialized after the
240
 * kernel allocator. However, both the kernel allocator and kmemleak may
241
 * allocate memory blocks which need to be tracked. Kmemleak defines an
242
243
244
245
246
247
248
 * arbitrary buffer to hold the allocation/freeing information before it is
 * fully initialized.
 */

/* kmemleak operation type for early logging */
enum {
	KMEMLEAK_ALLOC,
249
	KMEMLEAK_ALLOC_PERCPU,
250
	KMEMLEAK_FREE,
251
	KMEMLEAK_FREE_PART,
252
	KMEMLEAK_FREE_PERCPU,
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
	KMEMLEAK_NOT_LEAK,
	KMEMLEAK_IGNORE,
	KMEMLEAK_SCAN_AREA,
	KMEMLEAK_NO_SCAN
};

/*
 * Structure holding the information passed to kmemleak callbacks during the
 * early logging.
 */
struct early_log {
	int op_type;			/* kmemleak operation type */
	const void *ptr;		/* allocated/freed memory block */
	size_t size;			/* memory block size */
	int min_count;			/* minimum reference count */
268
269
	unsigned long trace[MAX_TRACE];	/* stack trace */
	unsigned int trace_len;		/* stack trace length */
270
271
272
};

/* early logging buffer and current position */
273
274
275
static struct early_log
	early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE] __initdata;
static int crt_early_log __initdata;
276
277
278
279
280
281

static void kmemleak_disable(void);

/*
 * Print a warning and dump the stack trace.
 */
282
#define kmemleak_warn(x...)	do {		\
283
	pr_warn(x);				\
284
	dump_stack();				\
285
	kmemleak_warning = 1;			\
286
287
288
} while (0)

/*
Lucas De Marchi's avatar
Lucas De Marchi committed
289
 * Macro invoked when a serious kmemleak condition occurred and cannot be
290
 * recovered from. Kmemleak will be disabled and further allocation/freeing
291
292
 * tracing no longer available.
 */
293
#define kmemleak_stop(x...)	do {	\
294
295
296
297
	kmemleak_warn(x);		\
	kmemleak_disable();		\
} while (0)

298
299
300
301
302
303
304
305
306
307
/*
 * Printing of the objects hex dump to the seq file. The number of lines to be
 * printed is limited to HEX_MAX_LINES to prevent seq file spamming. The
 * actual number of printed bytes depends on HEX_ROW_SIZE. It must be called
 * with the object->lock held.
 */
static void hex_dump_object(struct seq_file *seq,
			    struct kmemleak_object *object)
{
	const u8 *ptr = (const u8 *)object->pointer;
308
	size_t len;
309
310

	/* limit the number of lines to HEX_MAX_LINES */
311
	len = min_t(size_t, object->size, HEX_MAX_LINES * HEX_ROW_SIZE);
312

313
	seq_printf(seq, "  hex dump (first %zu bytes):\n", len);
314
	kasan_disable_current();
315
316
	seq_hex_dump(seq, "    ", DUMP_PREFIX_NONE, HEX_ROW_SIZE,
		     HEX_GROUP_SIZE, ptr, len, HEX_ASCII);
317
	kasan_enable_current();
318
319
}

320
321
322
323
324
325
326
327
328
329
/*
 * Object colors, encoded with count and min_count:
 * - white - orphan object, not enough references to it (count < min_count)
 * - gray  - not orphan, not marked as false positive (min_count == 0) or
 *		sufficient references to it (count >= min_count)
 * - black - ignore, it doesn't contain references (e.g. text section)
 *		(min_count == -1). No function defined for this color.
 * Newly created objects don't have any color assigned (object->count == -1)
 * before the next memory scan when they become white.
 */
330
static bool color_white(const struct kmemleak_object *object)
331
{
332
333
	return object->count != KMEMLEAK_BLACK &&
		object->count < object->min_count;
334
335
}

336
static bool color_gray(const struct kmemleak_object *object)
337
{
338
339
	return object->min_count != KMEMLEAK_BLACK &&
		object->count >= object->min_count;
340
341
342
343
344
345
346
}

/*
 * Objects are considered unreferenced only if their color is white, they have
 * not be deleted and have a minimum age to avoid false positives caused by
 * pointers temporarily stored in CPU registers.
 */
347
static bool unreferenced_object(struct kmemleak_object *object)
348
{
349
	return (color_white(object) && object->flags & OBJECT_ALLOCATED) &&
350
351
		time_before_eq(object->jiffies + jiffies_min_age,
			       jiffies_last_scan);
352
353
354
}

/*
355
356
 * Printing of the unreferenced objects information to the seq file. The
 * print_unreferenced function must be called with the object->lock held.
357
358
359
360
361
 */
static void print_unreferenced(struct seq_file *seq,
			       struct kmemleak_object *object)
{
	int i;
362
	unsigned int msecs_age = jiffies_to_msecs(jiffies - object->jiffies);
363

364
365
	seq_printf(seq, "unreferenced object 0x%08lx (size %zu):\n",
		   object->pointer, object->size);
366
367
368
	seq_printf(seq, "  comm \"%s\", pid %d, jiffies %lu (age %d.%03ds)\n",
		   object->comm, object->pid, object->jiffies,
		   msecs_age / 1000, msecs_age % 1000);
369
	hex_dump_object(seq, object);
370
	seq_printf(seq, "  backtrace:\n");
371
372
373

	for (i = 0; i < object->trace_len; i++) {
		void *ptr = (void *)object->trace[i];
374
		seq_printf(seq, "    [<%p>] %pS\n", ptr, ptr);
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
	}
}

/*
 * Print the kmemleak_object information. This function is used mainly for
 * debugging special cases when kmemleak operations. It must be called with
 * the object->lock held.
 */
static void dump_object_info(struct kmemleak_object *object)
{
	struct stack_trace trace;

	trace.nr_entries = object->trace_len;
	trace.entries = object->trace;

Joe Perches's avatar
Joe Perches committed
390
	pr_notice("Object 0x%08lx (size %zu):\n",
391
		  object->pointer, object->size);
392
393
394
395
	pr_notice("  comm \"%s\", pid %d, jiffies %lu\n",
		  object->comm, object->pid, object->jiffies);
	pr_notice("  min_count = %d\n", object->min_count);
	pr_notice("  count = %d\n", object->count);
396
	pr_notice("  flags = 0x%lx\n", object->flags);
397
	pr_notice("  checksum = %u\n", object->checksum);
398
399
400
401
402
	pr_notice("  backtrace:\n");
	print_stack_trace(&trace, 4);
}

/*
403
 * Look-up a memory block metadata (kmemleak_object) in the object search
404
405
406
407
408
409
 * tree based on a pointer value. If alias is 0, only values pointing to the
 * beginning of the memory block are allowed. The kmemleak_lock must be held
 * when calling this function.
 */
static struct kmemleak_object *lookup_object(unsigned long ptr, int alias)
{
410
411
412
413
414
415
416
417
418
419
420
421
	struct rb_node *rb = object_tree_root.rb_node;

	while (rb) {
		struct kmemleak_object *object =
			rb_entry(rb, struct kmemleak_object, rb_node);
		if (ptr < object->pointer)
			rb = object->rb_node.rb_left;
		else if (object->pointer + object->size <= ptr)
			rb = object->rb_node.rb_right;
		else if (object->pointer == ptr || alias)
			return object;
		else {
422
423
			kmemleak_warn("Found object by alias at 0x%08lx\n",
				      ptr);
424
			dump_object_info(object);
425
			break;
426
		}
427
428
	}
	return NULL;
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
}

/*
 * Increment the object use_count. Return 1 if successful or 0 otherwise. Note
 * that once an object's use_count reached 0, the RCU freeing was already
 * registered and the object should no longer be used. This function must be
 * called under the protection of rcu_read_lock().
 */
static int get_object(struct kmemleak_object *object)
{
	return atomic_inc_not_zero(&object->use_count);
}

/*
 * RCU callback to free a kmemleak_object.
 */
static void free_object_rcu(struct rcu_head *rcu)
{
447
	struct hlist_node *tmp;
448
449
450
451
452
453
454
455
	struct kmemleak_scan_area *area;
	struct kmemleak_object *object =
		container_of(rcu, struct kmemleak_object, rcu);

	/*
	 * Once use_count is 0 (guaranteed by put_object), there is no other
	 * code accessing this object, hence no need for locking.
	 */
456
457
	hlist_for_each_entry_safe(area, tmp, &object->area_list, node) {
		hlist_del(&area->node);
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
		kmem_cache_free(scan_area_cache, area);
	}
	kmem_cache_free(object_cache, object);
}

/*
 * Decrement the object use_count. Once the count is 0, free the object using
 * an RCU callback. Since put_object() may be called via the kmemleak_free() ->
 * delete_object() path, the delayed RCU freeing ensures that there is no
 * recursive call to the kernel allocator. Lock-less RCU object_list traversal
 * is also possible.
 */
static void put_object(struct kmemleak_object *object)
{
	if (!atomic_dec_and_test(&object->use_count))
		return;

	/* should only get here after delete_object was called */
	WARN_ON(object->flags & OBJECT_ALLOCATED);

	call_rcu(&object->rcu, free_object_rcu);
}

/*
482
 * Look up an object in the object search tree and increase its use_count.
483
484
485
486
 */
static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
{
	unsigned long flags;
487
	struct kmemleak_object *object;
488
489
490

	rcu_read_lock();
	read_lock_irqsave(&kmemleak_lock, flags);
491
	object = lookup_object(ptr, alias);
492
493
494
495
496
497
498
499
500
501
	read_unlock_irqrestore(&kmemleak_lock, flags);

	/* check whether the object is still available */
	if (object && !get_object(object))
		object = NULL;
	rcu_read_unlock();

	return object;
}

502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
/*
 * Look up an object in the object search tree and remove it from both
 * object_tree_root and object_list. The returned object's use_count should be
 * at least 1, as initially set by create_object().
 */
static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int alias)
{
	unsigned long flags;
	struct kmemleak_object *object;

	write_lock_irqsave(&kmemleak_lock, flags);
	object = lookup_object(ptr, alias);
	if (object) {
		rb_erase(&object->rb_node, &object_tree_root);
		list_del_rcu(&object->object_list);
	}
	write_unlock_irqrestore(&kmemleak_lock, flags);

	return object;
}

523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
/*
 * Save stack trace to the given array of MAX_TRACE size.
 */
static int __save_stack_trace(unsigned long *trace)
{
	struct stack_trace stack_trace;

	stack_trace.max_entries = MAX_TRACE;
	stack_trace.nr_entries = 0;
	stack_trace.entries = trace;
	stack_trace.skip = 2;
	save_stack_trace(&stack_trace);

	return stack_trace.nr_entries;
}

539
540
541
542
/*
 * Create the metadata (struct kmemleak_object) corresponding to an allocated
 * memory block and add it to the object_list and object_tree_root.
 */
543
544
static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
					     int min_count, gfp_t gfp)
545
546
{
	unsigned long flags;
547
548
	struct kmemleak_object *object, *parent;
	struct rb_node **link, *rb_parent;
549

550
	object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp));
551
	if (!object) {
552
		pr_warn("Cannot allocate a kmemleak_object structure\n");
553
		kmemleak_disable();
554
		return NULL;
555
556
557
558
559
560
561
	}

	INIT_LIST_HEAD(&object->object_list);
	INIT_LIST_HEAD(&object->gray_list);
	INIT_HLIST_HEAD(&object->area_list);
	spin_lock_init(&object->lock);
	atomic_set(&object->use_count, 1);
562
	object->flags = OBJECT_ALLOCATED;
563
564
565
	object->pointer = ptr;
	object->size = size;
	object->min_count = min_count;
566
	object->count = 0;			/* white color initially */
567
	object->jiffies = jiffies;
568
	object->checksum = 0;
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588

	/* task information */
	if (in_irq()) {
		object->pid = 0;
		strncpy(object->comm, "hardirq", sizeof(object->comm));
	} else if (in_softirq()) {
		object->pid = 0;
		strncpy(object->comm, "softirq", sizeof(object->comm));
	} else {
		object->pid = current->pid;
		/*
		 * There is a small chance of a race with set_task_comm(),
		 * however using get_task_comm() here may cause locking
		 * dependency issues with current->alloc_lock. In the worst
		 * case, the command line is not correct.
		 */
		strncpy(object->comm, current->comm, sizeof(object->comm));
	}

	/* kernel backtrace */
589
	object->trace_len = __save_stack_trace(object->trace);
590
591

	write_lock_irqsave(&kmemleak_lock, flags);
592

593
594
	min_addr = min(min_addr, ptr);
	max_addr = max(max_addr, ptr + size);
595
596
597
598
599
600
601
602
603
604
	link = &object_tree_root.rb_node;
	rb_parent = NULL;
	while (*link) {
		rb_parent = *link;
		parent = rb_entry(rb_parent, struct kmemleak_object, rb_node);
		if (ptr + size <= parent->pointer)
			link = &parent->rb_node.rb_left;
		else if (parent->pointer + parent->size <= ptr)
			link = &parent->rb_node.rb_right;
		else {
Joe Perches's avatar
Joe Perches committed
605
			kmemleak_stop("Cannot insert 0x%lx into the object search tree (overlaps existing)\n",
606
				      ptr);
607
608
609
610
611
			/*
			 * No need for parent->lock here since "parent" cannot
			 * be freed while the kmemleak_lock is held.
			 */
			dump_object_info(parent);
612
			kmem_cache_free(object_cache, object);
613
			object = NULL;
614
615
			goto out;
		}
616
	}
617
618
619
	rb_link_node(&object->rb_node, rb_parent, link);
	rb_insert_color(&object->rb_node, &object_tree_root);

620
621
622
	list_add_tail_rcu(&object->object_list, &object_list);
out:
	write_unlock_irqrestore(&kmemleak_lock, flags);
623
	return object;
624
625
626
}

/*
627
 * Mark the object as not allocated and schedule RCU freeing via put_object().
628
 */
629
static void __delete_object(struct kmemleak_object *object)
630
631
632
633
{
	unsigned long flags;

	WARN_ON(!(object->flags & OBJECT_ALLOCATED));
634
	WARN_ON(atomic_read(&object->use_count) < 1);
635
636
637
638
639
640
641
642
643
644
645

	/*
	 * Locking here also ensures that the corresponding memory block
	 * cannot be freed when it is being scanned.
	 */
	spin_lock_irqsave(&object->lock, flags);
	object->flags &= ~OBJECT_ALLOCATED;
	spin_unlock_irqrestore(&object->lock, flags);
	put_object(object);
}

646
647
648
649
650
651
652
653
/*
 * Look up the metadata (struct kmemleak_object) corresponding to ptr and
 * delete it.
 */
static void delete_object_full(unsigned long ptr)
{
	struct kmemleak_object *object;

654
	object = find_and_remove_object(ptr, 0);
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
	if (!object) {
#ifdef DEBUG
		kmemleak_warn("Freeing unknown object at 0x%08lx\n",
			      ptr);
#endif
		return;
	}
	__delete_object(object);
}

/*
 * Look up the metadata (struct kmemleak_object) corresponding to ptr and
 * delete it. If the memory block is partially freed, the function may create
 * additional metadata for the remaining parts of the block.
 */
static void delete_object_part(unsigned long ptr, size_t size)
{
	struct kmemleak_object *object;
	unsigned long start, end;

675
	object = find_and_remove_object(ptr, 1);
676
677
	if (!object) {
#ifdef DEBUG
Joe Perches's avatar
Joe Perches committed
678
679
		kmemleak_warn("Partially freeing unknown object at 0x%08lx (size %zu)\n",
			      ptr, size);
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
#endif
		return;
	}

	/*
	 * Create one or two objects that may result from the memory block
	 * split. Note that partial freeing is only done by free_bootmem() and
	 * this happens before kmemleak_init() is called. The path below is
	 * only executed during early log recording in kmemleak_init(), so
	 * GFP_KERNEL is enough.
	 */
	start = object->pointer;
	end = object->pointer + object->size;
	if (ptr > start)
		create_object(start, ptr - start, object->min_count,
			      GFP_KERNEL);
	if (ptr + size < end)
		create_object(ptr + size, end - ptr - size, object->min_count,
			      GFP_KERNEL);

700
	__delete_object(object);
701
}
702
703
704
705
706
707
708
709
710

static void __paint_it(struct kmemleak_object *object, int color)
{
	object->min_count = color;
	if (color == KMEMLEAK_BLACK)
		object->flags |= OBJECT_NO_SCAN;
}

static void paint_it(struct kmemleak_object *object, int color)
711
712
{
	unsigned long flags;
713
714
715
716
717
718
719
720

	spin_lock_irqsave(&object->lock, flags);
	__paint_it(object, color);
	spin_unlock_irqrestore(&object->lock, flags);
}

static void paint_ptr(unsigned long ptr, int color)
{
721
722
723
724
	struct kmemleak_object *object;

	object = find_and_get_object(ptr, 0);
	if (!object) {
Joe Perches's avatar
Joe Perches committed
725
726
		kmemleak_warn("Trying to color unknown object at 0x%08lx as %s\n",
			      ptr,
727
728
			      (color == KMEMLEAK_GREY) ? "Grey" :
			      (color == KMEMLEAK_BLACK) ? "Black" : "Unknown");
729
730
		return;
	}
731
	paint_it(object, color);
732
733
734
	put_object(object);
}

735
/*
736
 * Mark an object permanently as gray-colored so that it can no longer be
737
738
739
740
741
742
743
 * reported as a leak. This is used in general to mark a false positive.
 */
static void make_gray_object(unsigned long ptr)
{
	paint_ptr(ptr, KMEMLEAK_GREY);
}

744
745
746
747
748
749
/*
 * Mark the object as black-colored so that it is ignored from scans and
 * reporting.
 */
static void make_black_object(unsigned long ptr)
{
750
	paint_ptr(ptr, KMEMLEAK_BLACK);
751
752
753
754
755
756
}

/*
 * Add a scanning area to the object. If at least one such area is added,
 * kmemleak will only scan these ranges rather than the whole memory block.
 */
757
static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp)
758
759
760
761
762
{
	unsigned long flags;
	struct kmemleak_object *object;
	struct kmemleak_scan_area *area;

763
	object = find_and_get_object(ptr, 1);
764
	if (!object) {
Joe Perches's avatar
Joe Perches committed
765
766
		kmemleak_warn("Adding scan area to unknown object at 0x%08lx\n",
			      ptr);
767
768
769
		return;
	}

770
	area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp));
771
	if (!area) {
772
		pr_warn("Cannot allocate a scan area\n");
773
774
775
776
		goto out;
	}

	spin_lock_irqsave(&object->lock, flags);
777
778
779
	if (size == SIZE_MAX) {
		size = object->pointer + object->size - ptr;
	} else if (ptr + size > object->pointer + object->size) {
Joe Perches's avatar
Joe Perches committed
780
		kmemleak_warn("Scan area larger than object 0x%08lx\n", ptr);
781
782
783
784
785
786
		dump_object_info(object);
		kmem_cache_free(scan_area_cache, area);
		goto out_unlock;
	}

	INIT_HLIST_NODE(&area->node);
787
788
	area->start = ptr;
	area->size = size;
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808

	hlist_add_head(&area->node, &object->area_list);
out_unlock:
	spin_unlock_irqrestore(&object->lock, flags);
out:
	put_object(object);
}

/*
 * Set the OBJECT_NO_SCAN flag for the object corresponding to the give
 * pointer. Such object will not be scanned by kmemleak but references to it
 * are searched.
 */
static void object_no_scan(unsigned long ptr)
{
	unsigned long flags;
	struct kmemleak_object *object;

	object = find_and_get_object(ptr, 0);
	if (!object) {
Joe Perches's avatar
Joe Perches committed
809
		kmemleak_warn("Not scanning unknown object at 0x%08lx\n", ptr);
810
811
812
813
814
815
816
817
818
819
820
821
822
		return;
	}

	spin_lock_irqsave(&object->lock, flags);
	object->flags |= OBJECT_NO_SCAN;
	spin_unlock_irqrestore(&object->lock, flags);
	put_object(object);
}

/*
 * Log an early kmemleak_* call to the early_log buffer. These calls will be
 * processed later once kmemleak is fully initialized.
 */
823
static void __init log_early(int op_type, const void *ptr, size_t size,
824
			     int min_count)
825
826
827
828
{
	unsigned long flags;
	struct early_log *log;

829
	if (kmemleak_error) {
830
831
832
833
834
		/* kmemleak stopped recording, just count the requests */
		crt_early_log++;
		return;
	}

835
	if (crt_early_log >= ARRAY_SIZE(early_log)) {
836
		crt_early_log++;
837
		kmemleak_disable();
838
839
840
841
842
843
844
845
846
847
848
849
850
		return;
	}

	/*
	 * There is no need for locking since the kernel is still in UP mode
	 * at this stage. Disabling the IRQs is enough.
	 */
	local_irq_save(flags);
	log = &early_log[crt_early_log];
	log->op_type = op_type;
	log->ptr = ptr;
	log->size = size;
	log->min_count = min_count;
851
	log->trace_len = __save_stack_trace(log->trace);
852
853
854
855
	crt_early_log++;
	local_irq_restore(flags);
}

856
857
858
859
860
861
862
863
864
/*
 * Log an early allocated block and populate the stack trace.
 */
static void early_alloc(struct early_log *log)
{
	struct kmemleak_object *object;
	unsigned long flags;
	int i;

865
	if (!kmemleak_enabled || !log->ptr || IS_ERR(log->ptr))
866
867
868
869
870
871
872
		return;

	/*
	 * RCU locking needed to ensure object is not freed via put_object().
	 */
	rcu_read_lock();
	object = create_object((unsigned long)log->ptr, log->size,
873
			       log->min_count, GFP_ATOMIC);
874
875
	if (!object)
		goto out;
876
877
878
879
880
	spin_lock_irqsave(&object->lock, flags);
	for (i = 0; i < log->trace_len; i++)
		object->trace[i] = log->trace[i];
	object->trace_len = log->trace_len;
	spin_unlock_irqrestore(&object->lock, flags);
881
out:
882
883
884
	rcu_read_unlock();
}

885
886
887
888
889
890
891
892
893
894
895
896
897
898
/*
 * Log an early allocated block and populate the stack trace.
 */
static void early_alloc_percpu(struct early_log *log)
{
	unsigned int cpu;
	const void __percpu *ptr = log->ptr;

	for_each_possible_cpu(cpu) {
		log->ptr = per_cpu_ptr(ptr, cpu);
		early_alloc(log);
	}
}

899
900
901
902
903
904
905
906
907
908
909
910
911
/**
 * kmemleak_alloc - register a newly allocated object
 * @ptr:	pointer to beginning of the object
 * @size:	size of the object
 * @min_count:	minimum number of references to this object. If during memory
 *		scanning a number of references less than @min_count is found,
 *		the object is reported as a memory leak. If @min_count is 0,
 *		the object is never reported as a leak. If @min_count is -1,
 *		the object is ignored (not scanned and not reported as a leak)
 * @gfp:	kmalloc() flags used for kmemleak internal memory allocations
 *
 * This function is called from the kernel allocators when a new object
 * (memory block) is allocated (kmem_cache_alloc, kmalloc, vmalloc etc.).
912
 */
913
914
void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
			  gfp_t gfp)
915
916
917
{
	pr_debug("%s(0x%p, %zu, %d)\n", __func__, ptr, size, min_count);

918
	if (kmemleak_enabled && ptr && !IS_ERR(ptr))
919
		create_object((unsigned long)ptr, size, min_count, gfp);
920
	else if (kmemleak_early_log)
921
		log_early(KMEMLEAK_ALLOC, ptr, size, min_count);
922
923
924
}
EXPORT_SYMBOL_GPL(kmemleak_alloc);

925
926
927
928
/**
 * kmemleak_alloc_percpu - register a newly allocated __percpu object
 * @ptr:	__percpu pointer to beginning of the object
 * @size:	size of the object
929
 * @gfp:	flags used for kmemleak internal memory allocations
930
931
 *
 * This function is called from the kernel percpu allocator when a new object
932
 * (memory block) is allocated (alloc_percpu).
933
 */
934
935
void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
				 gfp_t gfp)
936
937
938
939
940
941
942
943
944
{
	unsigned int cpu;

	pr_debug("%s(0x%p, %zu)\n", __func__, ptr, size);

	/*
	 * Percpu allocations are only scanned and not reported as leaks
	 * (min_count is set to 0).
	 */
945
	if (kmemleak_enabled && ptr && !IS_ERR(ptr))
946
947
		for_each_possible_cpu(cpu)
			create_object((unsigned long)per_cpu_ptr(ptr, cpu),
948
				      size, 0, gfp);
949
	else if (kmemleak_early_log)
950
951
952
953
		log_early(KMEMLEAK_ALLOC_PERCPU, ptr, size, 0);
}
EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu);

954
955
956
957
958
959
/**
 * kmemleak_free - unregister a previously registered object
 * @ptr:	pointer to beginning of the object
 *
 * This function is called from the kernel allocators when an object (memory
 * block) is freed (kmem_cache_free, kfree, vfree etc.).
960
 */
961
void __ref kmemleak_free(const void *ptr)
962
963
964
{
	pr_debug("%s(0x%p)\n", __func__, ptr);

965
	if (kmemleak_free_enabled && ptr && !IS_ERR(ptr))
966
		delete_object_full((unsigned long)ptr);
967
	else if (kmemleak_early_log)
968
		log_early(KMEMLEAK_FREE, ptr, 0, 0);
969
970
971
}
EXPORT_SYMBOL_GPL(kmemleak_free);

972
973
974
975
976
977
978
979
/**
 * kmemleak_free_part - partially unregister a previously registered object
 * @ptr:	pointer to the beginning or inside the object. This also
 *		represents the start of the range to be freed
 * @size:	size to be unregistered
 *
 * This function is called when only a part of a memory block is freed
 * (usually from the bootmem allocator).
980
 */
981
void __ref kmemleak_free_part(const void *ptr, size_t size)
982
983
984
{
	pr_debug("%s(0x%p)\n", __func__, ptr);

985
	if (kmemleak_enabled && ptr && !IS_ERR(ptr))
986
		delete_object_part((unsigned long)ptr, size);
987
	else if (kmemleak_early_log)
988
		log_early(KMEMLEAK_FREE_PART, ptr, size, 0);
989
990
991
}
EXPORT_SYMBOL_GPL(kmemleak_free_part);

992
993
994
995
996
997
998
999
1000
/**
 * kmemleak_free_percpu - unregister a previously registered __percpu object
 * @ptr:	__percpu pointer to beginning of the object
 *
 * This function is called from the kernel percpu allocator when an object
 * (memory block) is freed (free_percpu).
 */
void __ref kmemleak_free_percpu(const void __percpu *ptr)
{