base.c 85 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
Linus Torvalds's avatar
Linus Torvalds committed
2
3
4
5
6
7
8
9
10
11
12
13
14
/*
 *  linux/fs/proc/base.c
 *
 *  Copyright (C) 1991, 1992 Linus Torvalds
 *
 *  proc base directory handling functions
 *
 *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
 *  Instead of using magical inumbers to determine the kind of object
 *  we allocate and fill in-core inodes upon lookup. They don't even
 *  go into icache. We cache the reference to task_struct upon lookup too.
 *  Eventually it should become a filesystem in its own. We don't use the
 *  rest of procfs anymore.
Mauricio Lin's avatar
Mauricio Lin committed
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
 *
 *
 *  Changelog:
 *  17-Jan-2005
 *  Allan Bezerra
 *  Bruna Moreira <bruna.moreira@indt.org.br>
 *  Edjard Mota <edjard.mota@indt.org.br>
 *  Ilias Biris <ilias.biris@indt.org.br>
 *  Mauricio Lin <mauricio.lin@indt.org.br>
 *
 *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
 *
 *  A new process specific entry (smaps) included in /proc. It shows the
 *  size of rss for each memory area. The maps entry lacks information
 *  about physical memory size (rss) for each mapped file, i.e.,
 *  rss information for executables and library files.
 *  This additional information is useful for any tools that need to know
 *  about physical memory consumption for a process specific library.
 *
 *  Changelog:
 *  21-Feb-2005
 *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
 *  Pud inclusion in the page table walking.
 *
 *  ChangeLog:
 *  10-Mar-2005
 *  10LE Instituto Nokia de Tecnologia - INdT:
 *  A better way to walks through the page table as suggested by Hugh Dickins.
 *
 *  Simo Piiroinen <simo.piiroinen@nokia.com>:
 *  Smaps information related to shared, private, clean and dirty pages.
 *
 *  Paul Mundt <paul.mundt@nokia.com>:
 *  Overall revision about smaps.
Linus Torvalds's avatar
Linus Torvalds committed
49
50
 */

51
#include <linux/uaccess.h>
Linus Torvalds's avatar
Linus Torvalds committed
52
53
54
55
56

#include <linux/errno.h>
#include <linux/time.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
57
#include <linux/task_io_accounting_ops.h>
Linus Torvalds's avatar
Linus Torvalds committed
58
#include <linux/init.h>
59
#include <linux/capability.h>
Linus Torvalds's avatar
Linus Torvalds committed
60
#include <linux/file.h>
Al Viro's avatar
Al Viro committed
61
#include <linux/fdtable.h>
Linus Torvalds's avatar
Linus Torvalds committed
62
63
64
#include <linux/string.h>
#include <linux/seq_file.h>
#include <linux/namei.h>
65
#include <linux/mnt_namespace.h>
Linus Torvalds's avatar
Linus Torvalds committed
66
#include <linux/mm.h>
67
#include <linux/swap.h>
68
#include <linux/rcupdate.h>
Linus Torvalds's avatar
Linus Torvalds committed
69
#include <linux/kallsyms.h>
Ken Chen's avatar
Ken Chen committed
70
#include <linux/stacktrace.h>
71
#include <linux/resource.h>
Kees Cook's avatar
Kees Cook committed
72
#include <linux/module.h>
Linus Torvalds's avatar
Linus Torvalds committed
73
74
75
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/ptrace.h>
76
#include <linux/tracehook.h>
Andrew Morton's avatar
Andrew Morton committed
77
#include <linux/printk.h>
Alexey Dobriyan's avatar
Alexey Dobriyan committed
78
#include <linux/cache.h>
79
#include <linux/cgroup.h>
Linus Torvalds's avatar
Linus Torvalds committed
80
81
#include <linux/cpuset.h>
#include <linux/audit.h>
Al Viro's avatar
Al Viro committed
82
#include <linux/poll.h>
83
#include <linux/nsproxy.h>
84
#include <linux/oom.h>
85
#include <linux/elf.h>
86
#include <linux/pid_namespace.h>
87
#include <linux/user_namespace.h>
88
#include <linux/fs_struct.h>
89
#include <linux/slab.h>
90
#include <linux/sched/autogroup.h>
91
#include <linux/sched/mm.h>
92
#include <linux/sched/coredump.h>
93
#include <linux/sched/debug.h>
94
#include <linux/sched/stat.h>
95
#include <linux/flex_array.h>
96
#include <linux/posix-timers.h>
97
#include <trace/events/oom.h>
Linus Torvalds's avatar
Linus Torvalds committed
98
#include "internal.h"
99
#include "fd.h"
Linus Torvalds's avatar
Linus Torvalds committed
100

101
102
#include "../../lib/kstrtox.h"

103
104
105
106
107
108
109
110
111
112
/* NOTE:
 *	Implementing inode permission operations in /proc is almost
 *	certainly an error.  Permission checks need to happen during
 *	each system call not at open time.  The reason is that most of
 *	what we wish to check for permissions in /proc varies at runtime.
 *
 *	The classic example of a problem is opening file descriptors
 *	in /proc for a task before it execs a suid executable.
 */

Alexey Dobriyan's avatar
Alexey Dobriyan committed
113
114
static u8 nlink_tid __ro_after_init;
static u8 nlink_tgid __ro_after_init;
115

Linus Torvalds's avatar
Linus Torvalds committed
116
struct pid_entry {
117
	const char *name;
118
	unsigned int len;
Al Viro's avatar
Al Viro committed
119
	umode_t mode;
120
	const struct inode_operations *iop;
121
	const struct file_operations *fop;
122
	union proc_op op;
Linus Torvalds's avatar
Linus Torvalds committed
123
124
};

125
#define NOD(NAME, MODE, IOP, FOP, OP) {			\
126
	.name = (NAME),					\
127
	.len  = sizeof(NAME) - 1,			\
128
129
130
131
132
133
	.mode = MODE,					\
	.iop  = IOP,					\
	.fop  = FOP,					\
	.op   = OP,					\
}

Alexey Dobriyan's avatar
Alexey Dobriyan committed
134
135
136
#define DIR(NAME, MODE, iops, fops)	\
	NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} )
#define LNK(NAME, get_link)					\
137
	NOD(NAME, (S_IFLNK|S_IRWXUGO),				\
138
		&proc_pid_link_inode_operations, NULL,		\
Alexey Dobriyan's avatar
Alexey Dobriyan committed
139
140
141
142
		{ .proc_get_link = get_link } )
#define REG(NAME, MODE, fops)				\
	NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
#define ONE(NAME, MODE, show)				\
143
144
	NOD(NAME, (S_IFREG|(MODE)), 			\
		NULL, &proc_single_file_operations,	\
Alexey Dobriyan's avatar
Alexey Dobriyan committed
145
		{ .proc_show = show } )
Linus Torvalds's avatar
Linus Torvalds committed
146

147
148
149
150
/*
 * Count the number of hardlinks for the pid_entry table, excluding the .
 * and .. links.
 */
151
static unsigned int __init pid_entry_nlink(const struct pid_entry *entries,
152
153
154
155
156
	unsigned int n)
{
	unsigned int i;
	unsigned int count;

157
	count = 2;
158
159
160
161
162
163
164
165
	for (i = 0; i < n; ++i) {
		if (S_ISDIR(entries[i].mode))
			++count;
	}

	return count;
}

166
static int get_task_root(struct task_struct *task, struct path *root)
Linus Torvalds's avatar
Linus Torvalds committed
167
{
Hugh Dickins's avatar
Hugh Dickins committed
168
169
	int result = -ENOENT;

170
	task_lock(task);
171
172
	if (task->fs) {
		get_fs_root(task->fs, root);
Hugh Dickins's avatar
Hugh Dickins committed
173
174
		result = 0;
	}
175
	task_unlock(task);
Hugh Dickins's avatar
Hugh Dickins committed
176
	return result;
177
178
}

179
static int proc_cwd_link(struct dentry *dentry, struct path *path)
180
{
181
	struct task_struct *task = get_proc_task(d_inode(dentry));
182
	int result = -ENOENT;
183
184

	if (task) {
185
186
187
188
189
190
		task_lock(task);
		if (task->fs) {
			get_fs_pwd(task->fs, path);
			result = 0;
		}
		task_unlock(task);
191
192
		put_task_struct(task);
	}
Linus Torvalds's avatar
Linus Torvalds committed
193
194
195
	return result;
}

196
static int proc_root_link(struct dentry *dentry, struct path *path)
Linus Torvalds's avatar
Linus Torvalds committed
197
{
198
	struct task_struct *task = get_proc_task(d_inode(dentry));
Linus Torvalds's avatar
Linus Torvalds committed
199
	int result = -ENOENT;
200
201

	if (task) {
202
		result = get_task_root(task, path);
203
204
		put_task_struct(task);
	}
Linus Torvalds's avatar
Linus Torvalds committed
205
206
207
	return result;
}

208
static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
209
			      size_t count, loff_t *ppos)
Linus Torvalds's avatar
Linus Torvalds committed
210
{
211
	unsigned long arg_start, arg_end, env_start, env_end;
212
213
	unsigned long pos, len;
	char *page;
214
215

	/* Check if process spawned far enough to have cmdline. */
216
217
	if (!mm->env_end)
		return 0;
218

219
	spin_lock(&mm->arg_lock);
220
221
222
223
	arg_start = mm->arg_start;
	arg_end = mm->arg_end;
	env_start = mm->env_start;
	env_end = mm->env_end;
224
	spin_unlock(&mm->arg_lock);
225

226
227
	if (arg_start >= arg_end)
		return 0;
228

229
	/*
230
231
232
233
	 * We have traditionally allowed the user to re-write
	 * the argument strings and overflow the end result
	 * into the environment section. But only do that if
	 * the environment area is contiguous to the arguments.
234
	 */
235
236
237
238
239
	if (env_start != arg_end || env_start >= env_end)
		env_start = env_end = arg_end;

	/* We're not going to care if "*ppos" has high bits set */
	pos = arg_start + *ppos;
240

241
242
243
	/* .. but we do check the result is in the proper range */
	if (pos < arg_start || pos >= env_end)
		return 0;
244

245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
	/* .. and we never go past env_end */
	if (env_end - pos < count)
		count = env_end - pos;

	page = (char *)__get_free_page(GFP_KERNEL);
	if (!page)
		return -ENOMEM;

	len = 0;
	while (count) {
		int got;
		size_t size = min_t(size_t, PAGE_SIZE, count);

		got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
		if (got <= 0)
			break;

		/* Don't walk past a NUL character once you hit arg_end */
		if (pos + got >= arg_end) {
			int n = 0;

			/*
			 * If we started before 'arg_end' but ended up
			 * at or after it, we start the NUL character
			 * check at arg_end-1 (where we expect the normal
			 * EOF to be).
			 *
			 * NOTE! This is smaller than 'got', because
			 * pos + got >= arg_end
			 */
			if (pos < arg_end)
				n = arg_end - pos - 1;

			/* Cut off at first NUL after 'n' */
			got = n + strnlen(page+n, got-n);
			if (!got)
				break;
282
		}
283
284
285
286
287
288

		got -= copy_to_user(buf, page, got);
		if (unlikely(!got)) {
			if (!len)
				len = -EFAULT;
			break;
289
		}
290
291
292
293
		pos += got;
		buf += got;
		len += got;
		count -= got;
294
295
296
	}

	free_page((unsigned long)page);
297
	return len;
Linus Torvalds's avatar
Linus Torvalds committed
298
299
}

300
301
302
303
304
305
306
307
308
309
310
static ssize_t get_task_cmdline(struct task_struct *tsk, char __user *buf,
				size_t count, loff_t *pos)
{
	struct mm_struct *mm;
	ssize_t ret;

	mm = get_task_mm(tsk);
	if (!mm)
		return 0;

	ret = get_mm_cmdline(mm, buf, count, pos);
311
	mmput(mm);
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
	return ret;
}

static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf,
				     size_t count, loff_t *pos)
{
	struct task_struct *tsk;
	ssize_t ret;

	BUG_ON(*pos < 0);

	tsk = get_proc_task(file_inode(file));
	if (!tsk)
		return -ESRCH;
	ret = get_task_cmdline(tsk, buf, count, pos);
	put_task_struct(tsk);
	if (ret > 0)
		*pos += ret;
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
331
332
}

333
334
335
336
337
static const struct file_operations proc_pid_cmdline_ops = {
	.read	= proc_pid_cmdline_read,
	.llseek	= generic_file_llseek,
};

Linus Torvalds's avatar
Linus Torvalds committed
338
339
340
341
342
#ifdef CONFIG_KALLSYMS
/*
 * Provides a wchan file via kallsyms in a proper one-value-per-file format.
 * Returns the resolved symbol.  If that fails, simply return the address.
 */
343
344
static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
			  struct pid *pid, struct task_struct *task)
Linus Torvalds's avatar
Linus Torvalds committed
345
{
Alexey Dobriyan's avatar
Alexey Dobriyan committed
346
	unsigned long wchan;
347
	char symname[KSYM_NAME_LEN];
Linus Torvalds's avatar
Linus Torvalds committed
348

349
350
	if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
		goto print0;
Linus Torvalds's avatar
Linus Torvalds committed
351

352
353
	wchan = get_wchan(task);
	if (wchan && !lookup_symbol_name(wchan, symname)) {
354
		seq_puts(m, symname);
355
356
		return 0;
	}
357

358
359
print0:
	seq_putc(m, '0');
360
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
361
362
363
}
#endif /* CONFIG_KALLSYMS */

364
365
366
367
368
static int lock_trace(struct task_struct *task)
{
	int err = mutex_lock_killable(&task->signal->cred_guard_mutex);
	if (err)
		return err;
369
	if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) {
370
371
372
373
374
375
376
377
378
379
380
		mutex_unlock(&task->signal->cred_guard_mutex);
		return -EPERM;
	}
	return 0;
}

static void unlock_trace(struct task_struct *task)
{
	mutex_unlock(&task->signal->cred_guard_mutex);
}

Ken Chen's avatar
Ken Chen committed
381
382
383
384
385
386
387
388
389
#ifdef CONFIG_STACKTRACE

#define MAX_STACK_TRACE_DEPTH	64

static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
			  struct pid *pid, struct task_struct *task)
{
	struct stack_trace trace;
	unsigned long *entries;
390
	int err;
Ken Chen's avatar
Ken Chen committed
391

392
393
	entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
				GFP_KERNEL);
Ken Chen's avatar
Ken Chen committed
394
395
396
397
398
399
400
401
	if (!entries)
		return -ENOMEM;

	trace.nr_entries	= 0;
	trace.max_entries	= MAX_STACK_TRACE_DEPTH;
	trace.entries		= entries;
	trace.skip		= 0;

402
403
	err = lock_trace(task);
	if (!err) {
404
405
		unsigned int i;

406
407
408
		save_stack_trace_tsk(task, &trace);

		for (i = 0; i < trace.nr_entries; i++) {
409
			seq_printf(m, "[<0>] %pB\n", (void *)entries[i]);
410
411
		}
		unlock_trace(task);
Ken Chen's avatar
Ken Chen committed
412
413
414
	}
	kfree(entries);

415
	return err;
Ken Chen's avatar
Ken Chen committed
416
417
418
}
#endif

419
#ifdef CONFIG_SCHED_INFO
Linus Torvalds's avatar
Linus Torvalds committed
420
421
422
/*
 * Provides /proc/PID/schedstat
 */
423
424
static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
			      struct pid *pid, struct task_struct *task)
Linus Torvalds's avatar
Linus Torvalds committed
425
{
426
427
428
429
	if (unlikely(!sched_info_on()))
		seq_printf(m, "0 0 0\n");
	else
		seq_printf(m, "%llu %llu %lu\n",
430
431
432
433
434
		   (unsigned long long)task->se.sum_exec_runtime,
		   (unsigned long long)task->sched_info.run_delay,
		   task->sched_info.pcount);

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
435
436
437
}
#endif

Arjan van de Ven's avatar
Arjan van de Ven committed
438
439
440
441
#ifdef CONFIG_LATENCYTOP
static int lstats_show_proc(struct seq_file *m, void *v)
{
	int i;
442
443
	struct inode *inode = m->private;
	struct task_struct *task = get_proc_task(inode);
Arjan van de Ven's avatar
Arjan van de Ven committed
444

445
446
447
	if (!task)
		return -ESRCH;
	seq_puts(m, "Latency Top version : v0.1\n");
Arjan van de Ven's avatar
Arjan van de Ven committed
448
	for (i = 0; i < 32; i++) {
449
450
		struct latency_record *lr = &task->latency_record[i];
		if (lr->backtrace[0]) {
Arjan van de Ven's avatar
Arjan van de Ven committed
451
			int q;
452
453
			seq_printf(m, "%i %li %li",
				   lr->count, lr->time, lr->max);
Arjan van de Ven's avatar
Arjan van de Ven committed
454
			for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
455
456
				unsigned long bt = lr->backtrace[q];
				if (!bt)
Arjan van de Ven's avatar
Arjan van de Ven committed
457
					break;
458
				if (bt == ULONG_MAX)
Arjan van de Ven's avatar
Arjan van de Ven committed
459
					break;
460
				seq_printf(m, " %ps", (void *)bt);
Arjan van de Ven's avatar
Arjan van de Ven committed
461
			}
462
			seq_putc(m, '\n');
Arjan van de Ven's avatar
Arjan van de Ven committed
463
464
465
		}

	}
466
	put_task_struct(task);
Arjan van de Ven's avatar
Arjan van de Ven committed
467
468
469
470
471
	return 0;
}

static int lstats_open(struct inode *inode, struct file *file)
{
472
	return single_open(file, lstats_show_proc, inode);
473
474
}

Arjan van de Ven's avatar
Arjan van de Ven committed
475
476
477
static ssize_t lstats_write(struct file *file, const char __user *buf,
			    size_t count, loff_t *offs)
{
Al Viro's avatar
Al Viro committed
478
	struct task_struct *task = get_proc_task(file_inode(file));
Arjan van de Ven's avatar
Arjan van de Ven committed
479

480
481
	if (!task)
		return -ESRCH;
Arjan van de Ven's avatar
Arjan van de Ven committed
482
	clear_all_latency_tracing(task);
483
	put_task_struct(task);
Arjan van de Ven's avatar
Arjan van de Ven committed
484
485
486
487
488
489
490
491
492

	return count;
}

static const struct file_operations proc_lstats_operations = {
	.open		= lstats_open,
	.read		= seq_read,
	.write		= lstats_write,
	.llseek		= seq_lseek,
493
	.release	= single_release,
Arjan van de Ven's avatar
Arjan van de Ven committed
494
495
496
497
};

#endif

498
499
static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
			  struct pid *pid, struct task_struct *task)
Linus Torvalds's avatar
Linus Torvalds committed
500
{
501
	unsigned long totalpages = totalram_pages + total_swap_pages;
502
	unsigned long points = 0;
Linus Torvalds's avatar
Linus Torvalds committed
503

504
505
	points = oom_badness(task, NULL, NULL, totalpages) *
					1000 / totalpages;
506
507
508
	seq_printf(m, "%lu\n", points);

	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
509
510
}

511
struct limit_names {
512
513
	const char *name;
	const char *unit;
514
515
516
};

static const struct limit_names lnames[RLIM_NLIMITS] = {
517
	[RLIMIT_CPU] = {"Max cpu time", "seconds"},
518
519
520
521
522
523
524
525
526
527
528
529
530
531
	[RLIMIT_FSIZE] = {"Max file size", "bytes"},
	[RLIMIT_DATA] = {"Max data size", "bytes"},
	[RLIMIT_STACK] = {"Max stack size", "bytes"},
	[RLIMIT_CORE] = {"Max core file size", "bytes"},
	[RLIMIT_RSS] = {"Max resident set", "bytes"},
	[RLIMIT_NPROC] = {"Max processes", "processes"},
	[RLIMIT_NOFILE] = {"Max open files", "files"},
	[RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"},
	[RLIMIT_AS] = {"Max address space", "bytes"},
	[RLIMIT_LOCKS] = {"Max file locks", "locks"},
	[RLIMIT_SIGPENDING] = {"Max pending signals", "signals"},
	[RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"},
	[RLIMIT_NICE] = {"Max nice priority", NULL},
	[RLIMIT_RTPRIO] = {"Max realtime priority", NULL},
532
	[RLIMIT_RTTIME] = {"Max realtime timeout", "us"},
533
534
535
};

/* Display limits for a process */
536
537
static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
			   struct pid *pid, struct task_struct *task)
538
539
540
541
542
543
{
	unsigned int i;
	unsigned long flags;

	struct rlimit rlim[RLIM_NLIMITS];

544
	if (!lock_task_sighand(task, &flags))
545
546
547
548
549
550
551
		return 0;
	memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS);
	unlock_task_sighand(task, &flags);

	/*
	 * print the file header
	 */
552
       seq_printf(m, "%-25s %-20s %-20s %-10s\n",
553
		  "Limit", "Soft Limit", "Hard Limit", "Units");
554
555
556

	for (i = 0; i < RLIM_NLIMITS; i++) {
		if (rlim[i].rlim_cur == RLIM_INFINITY)
557
			seq_printf(m, "%-25s %-20s ",
558
				   lnames[i].name, "unlimited");
559
		else
560
			seq_printf(m, "%-25s %-20lu ",
561
				   lnames[i].name, rlim[i].rlim_cur);
562
563

		if (rlim[i].rlim_max == RLIM_INFINITY)
564
			seq_printf(m, "%-20s ", "unlimited");
565
		else
566
			seq_printf(m, "%-20lu ", rlim[i].rlim_max);
567
568

		if (lnames[i].unit)
569
			seq_printf(m, "%-10s\n", lnames[i].unit);
570
		else
571
			seq_putc(m, '\n');
572
573
	}

574
	return 0;
575
576
}

Roland McGrath's avatar
Roland McGrath committed
577
#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
578
579
static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
			    struct pid *pid, struct task_struct *task)
Roland McGrath's avatar
Roland McGrath committed
580
581
582
{
	long nr;
	unsigned long args[6], sp, pc;
583
584
585
	int res;

	res = lock_trace(task);
586
587
	if (res)
		return res;
Roland McGrath's avatar
Roland McGrath committed
588
589

	if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
590
		seq_puts(m, "running\n");
591
	else if (nr < 0)
592
		seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
593
	else
594
		seq_printf(m,
Roland McGrath's avatar
Roland McGrath committed
595
596
597
598
		       "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
		       nr,
		       args[0], args[1], args[2], args[3], args[4], args[5],
		       sp, pc);
599
	unlock_trace(task);
600
601

	return 0;
Roland McGrath's avatar
Roland McGrath committed
602
603
604
}
#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */

Linus Torvalds's avatar
Linus Torvalds committed
605
606
607
608
609
/************************************************************************/
/*                       Here the fs part begins                        */
/************************************************************************/

/* permission checks */
610
static int proc_fd_access_allowed(struct inode *inode)
Linus Torvalds's avatar
Linus Torvalds committed
611
{
612
613
	struct task_struct *task;
	int allowed = 0;
614
615
616
	/* Allow access to a task's file descriptors if it is us or we
	 * may use ptrace attach to the process and find out that
	 * information.
617
618
	 */
	task = get_proc_task(inode);
619
	if (task) {
620
		allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
621
		put_task_struct(task);
622
	}
623
	return allowed;
Linus Torvalds's avatar
Linus Torvalds committed
624
625
}

626
int proc_setattr(struct dentry *dentry, struct iattr *attr)
627
628
{
	int error;
629
	struct inode *inode = d_inode(dentry);
630
631
632
633

	if (attr->ia_valid & ATTR_MODE)
		return -EPERM;

634
	error = setattr_prepare(dentry, attr);
Christoph Hellwig's avatar
Christoph Hellwig committed
635
636
637
638
639
640
	if (error)
		return error;

	setattr_copy(inode, attr);
	mark_inode_dirty(inode);
	return 0;
641
642
}

643
644
645
646
647
648
649
650
651
652
653
654
/*
 * May current process learn task's sched/cmdline info (for hide_pid_min=1)
 * or euid/egid (for hide_pid_min=2)?
 */
static bool has_pid_permissions(struct pid_namespace *pid,
				 struct task_struct *task,
				 int hide_pid_min)
{
	if (pid->hide_pid < hide_pid_min)
		return true;
	if (in_group_p(pid->pid_gid))
		return true;
655
	return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS);
656
657
658
659
660
}


static int proc_pid_permission(struct inode *inode, int mask)
{
661
	struct pid_namespace *pid = proc_pid_ns(inode);
662
663
664
665
	struct task_struct *task;
	bool has_perms;

	task = get_proc_task(inode);
666
667
	if (!task)
		return -ESRCH;
668
	has_perms = has_pid_permissions(pid, task, HIDEPID_NO_ACCESS);
669
670
671
	put_task_struct(task);

	if (!has_perms) {
672
		if (pid->hide_pid == HIDEPID_INVISIBLE) {
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
			/*
			 * Let's make getdents(), stat(), and open()
			 * consistent with each other.  If a process
			 * may not stat() a file, it shouldn't be seen
			 * in procfs at all.
			 */
			return -ENOENT;
		}

		return -EPERM;
	}
	return generic_permission(inode, mask);
}



689
static const struct inode_operations proc_def_inode_operations = {
690
691
692
	.setattr	= proc_setattr,
};

693
694
695
static int proc_single_show(struct seq_file *m, void *v)
{
	struct inode *inode = m->private;
696
697
	struct pid_namespace *ns = proc_pid_ns(inode);
	struct pid *pid = proc_pid(inode);
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
	struct task_struct *task;
	int ret;

	task = get_pid_task(pid, PIDTYPE_PID);
	if (!task)
		return -ESRCH;

	ret = PROC_I(inode)->op.proc_show(m, ns, pid, task);

	put_task_struct(task);
	return ret;
}

static int proc_single_open(struct inode *inode, struct file *filp)
{
Jovi Zhang's avatar
Jovi Zhang committed
713
	return single_open(filp, proc_single_show, inode);
714
715
716
717
718
719
720
721
722
}

static const struct file_operations proc_single_file_operations = {
	.open		= proc_single_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};

723
724

struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
Linus Torvalds's avatar
Linus Torvalds committed
725
{
726
727
	struct task_struct *task = get_proc_task(inode);
	struct mm_struct *mm = ERR_PTR(-ESRCH);
728

729
	if (task) {
730
		mm = mm_access(task, mode | PTRACE_MODE_FSCREDS);
731
		put_task_struct(task);
732

733
734
		if (!IS_ERR_OR_NULL(mm)) {
			/* ensure this mm_struct can't be freed */
Vegard Nossum's avatar
Vegard Nossum committed
735
			mmgrab(mm);
736
737
738
739
740
741
742
743
744
745
746
			/* but do not pin its memory */
			mmput(mm);
		}
	}

	return mm;
}

static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
{
	struct mm_struct *mm = proc_mem_open(inode, mode);
747
748
749
750
751

	if (IS_ERR(mm))
		return PTR_ERR(mm);

	file->private_data = mm;
Linus Torvalds's avatar
Linus Torvalds committed
752
753
754
	return 0;
}

755
756
static int mem_open(struct inode *inode, struct file *file)
{
757
758
759
760
761
762
	int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH);

	/* OK to pass negative loff_t, we can catch out-of-range */
	file->f_mode |= FMODE_UNSIGNED_OFFSET;

	return ret;
763
764
}

765
766
static ssize_t mem_rw(struct file *file, char __user *buf,
			size_t count, loff_t *ppos, int write)
Linus Torvalds's avatar
Linus Torvalds committed
767
{
768
	struct mm_struct *mm = file->private_data;
769
770
	unsigned long addr = *ppos;
	ssize_t copied;
Linus Torvalds's avatar
Linus Torvalds committed
771
	char *page;
772
	unsigned int flags;
Linus Torvalds's avatar
Linus Torvalds committed
773

774
775
	if (!mm)
		return 0;
776

777
	page = (char *)__get_free_page(GFP_KERNEL);
778
	if (!page)
779
		return -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed
780

781
	copied = 0;
782
	if (!mmget_not_zero(mm))
783
784
		goto free;

785
	flags = FOLL_FORCE | (write ? FOLL_WRITE : 0);
786

Linus Torvalds's avatar
Linus Torvalds committed
787
	while (count > 0) {
788
		int this_len = min_t(int, count, PAGE_SIZE);
Linus Torvalds's avatar
Linus Torvalds committed
789

790
		if (write && copy_from_user(page, buf, this_len)) {
Linus Torvalds's avatar
Linus Torvalds committed
791
792
793
			copied = -EFAULT;
			break;
		}
794

795
		this_len = access_remote_vm(mm, addr, page, this_len, flags);
796
		if (!this_len) {
Linus Torvalds's avatar
Linus Torvalds committed
797
798
799
800
			if (!copied)
				copied = -EIO;
			break;
		}
801
802
803
804
805
806
807
808
809
810

		if (!write && copy_to_user(buf, page, this_len)) {
			copied = -EFAULT;
			break;
		}

		buf += this_len;
		addr += this_len;
		copied += this_len;
		count -= this_len;
Linus Torvalds's avatar
Linus Torvalds committed
811
	}
812
	*ppos = addr;
813

814
815
	mmput(mm);
free:
816
	free_page((unsigned long) page);
Linus Torvalds's avatar
Linus Torvalds committed
817
818
819
	return copied;
}

820
821
822
823
824
825
826
827
828
829
830
831
static ssize_t mem_read(struct file *file, char __user *buf,
			size_t count, loff_t *ppos)
{
	return mem_rw(file, buf, count, ppos, 0);
}

static ssize_t mem_write(struct file *file, const char __user *buf,
			 size_t count, loff_t *ppos)
{
	return mem_rw(file, (char __user*)buf, count, ppos, 1);
}

832
loff_t mem_lseek(struct file *file, loff_t offset, int orig)
Linus Torvalds's avatar
Linus Torvalds committed
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
{
	switch (orig) {
	case 0:
		file->f_pos = offset;
		break;
	case 1:
		file->f_pos += offset;
		break;
	default:
		return -EINVAL;
	}
	force_successful_syscall_return();
	return file->f_pos;
}

848
849
850
static int mem_release(struct inode *inode, struct file *file)
{
	struct mm_struct *mm = file->private_data;
851
	if (mm)
852
		mmdrop(mm);
853
854
855
	return 0;
}

856
static const struct file_operations proc_mem_operations = {
Linus Torvalds's avatar
Linus Torvalds committed
857
858
859
860
	.llseek		= mem_lseek,
	.read		= mem_read,
	.write		= mem_write,
	.open		= mem_open,
861
	.release	= mem_release,
Linus Torvalds's avatar
Linus Torvalds committed
862
863
};

864
865
866
867
868
static int environ_open(struct inode *inode, struct file *file)
{
	return __mem_open(inode, file, PTRACE_MODE_READ);
}

869
870
871
872
873
static ssize_t environ_read(struct file *file, char __user *buf,
			size_t count, loff_t *ppos)
{
	char *page;
	unsigned long src = *ppos;
874
875
	int ret = 0;
	struct mm_struct *mm = file->private_data;
876
	unsigned long env_start, env_end;
877

878
879
	/* Ensure the process spawned far enough to have an environment. */
	if (!mm || !mm->env_end)
880
		return 0;
881

882
	page = (char *)__get_free_page(GFP_KERNEL);
883
	if (!page)
884
		return -ENOMEM;
885

Al Viro's avatar
Al Viro committed
886
	ret = 0;
887
	if (!mmget_not_zero(mm))
888
		goto free;
889

890
	spin_lock(&mm->arg_lock);
891
892
	env_start = mm->env_start;
	env_end = mm->env_end;
893
	spin_unlock(&mm->arg_lock);
894

895
	while (count > 0) {
896
897
		size_t this_len, max_len;
		int retval;
898

899
		if (src >= (env_end - env_start))
900
901
			break;

902
		this_len = env_end - (env_start + src);
903
904
905

		max_len = min_t(size_t, PAGE_SIZE, count);
		this_len = min(max_len, this_len);
906

907
		retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON);
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925

		if (retval <= 0) {
			ret = retval;
			break;
		}

		if (copy_to_user(buf, page, retval)) {
			ret = -EFAULT;
			break;
		}

		ret += retval;
		src += retval;
		buf += retval;
		count -= retval;
	}
	*ppos = src;
	mmput(mm);
926
927

free:
928
929
930
931
932
	free_page((unsigned long) page);
	return ret;
}

static const struct file_operations proc_environ_operations = {
933
	.open		= environ_open,
934
	.read		= environ_read,
935
	.llseek		= generic_file_llseek,
936
	.release	= mem_release,
937
938
};

939
940
941
942
943
944
945
946
947
948
static int auxv_open(struct inode *inode, struct file *file)
{
	return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
}

static ssize_t auxv_read(struct file *file, char __user *buf,
			size_t count, loff_t *ppos)
{
	struct mm_struct *mm = file->private_data;
	unsigned int nwords = 0;
949
950
951

	if (!mm)
		return 0;
952
953
954
955
956
957
958
959
960
961
962
963
964
965
	do {
		nwords += 2;
	} while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
	return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv,
				       nwords * sizeof(mm->saved_auxv[0]));
}

static const struct file_operations proc_auxv_operations = {
	.open		= auxv_open,
	.read		= auxv_read,
	.llseek		= generic_file_llseek,
	.release	= mem_release,
};

966
967
968
static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
			    loff_t *ppos)
{
Al Viro's avatar
Al Viro committed
969
	struct task_struct *task = get_proc_task(file_inode(file));
970
971
972
973
974
975
	char buffer[PROC_NUMBUF];
	int oom_adj = OOM_ADJUST_MIN;
	size_t len;

	if (!task)
		return -ESRCH;
976
977
978
979
980
	if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX)
		oom_adj = OOM_ADJUST_MAX;
	else
		oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) /
			  OOM_SCORE_ADJ_MAX;
981
982
983
984
985
	put_task_struct(task);
	len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj);
	return simple_read_from_buffer(buf, count, ppos, buffer, len);
}

986
987
988
static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
{
	static DEFINE_MUTEX(oom_adj_mutex);
989
	struct mm_struct *mm = NULL;
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
	struct task_struct *task;
	int err = 0;

	task = get_proc_task(file_inode(file));
	if (!task)
		return -ESRCH;

	mutex_lock(&oom_adj_mutex);
	if (legacy) {
		if (oom_adj < task->signal->oom_score_adj &&
				!capable(CAP_SYS_RESOURCE)) {
			err = -EACCES;
			goto err_unlock;
		}
		/*
		 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
		 * /proc/pid/oom_score_adj instead.
		 */
		pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
			  current->comm, task_pid_nr(current), task_pid_nr(task),
			  task_pid_nr(task));
	} else {
		if ((short)oom_adj < task->signal->oom_score_adj_min &&
				!capable(CAP_SYS_RESOURCE)) {
			err = -EACCES;
			goto err_unlock;
		}
	}

1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
	/*
	 * Make sure we will check other processes sharing the mm if this is
	 * not vfrok which wants its own oom_score_adj.
	 * pin the mm so it doesn't go away and get reused after task_unlock
	 */
	if (!task->vfork_done) {
		struct task_struct *p = find_lock_task_mm(task);

		if (p) {
			if (atomic_read(&p->mm->mm_users) > 1) {
				mm = p->mm;
Vegard Nossum's avatar
Vegard Nossum committed
1030
				mmgrab(mm);
1031
1032
1033
1034
1035
			}
			task_unlock(p);
		}
	}

1036
1037
1038
1039
	task->signal->oom_score_adj = oom_adj;
	if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
		task->signal->oom_score_adj_min = (short)oom_adj;
	trace_oom_score_adj_update(task);
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067

	if (mm) {
		struct task_struct *p;

		rcu_read_lock();
		for_each_process(p) {
			if (same_thread_group(task, p))
				continue;

			/* do not touch kernel threads or the global init */
			if (p->flags & PF_KTHREAD || is_global_init(p))
				continue;

			task_lock(p);
			if (!p->vfork_done && process_shares_mm(p, mm)) {
				pr_info("updating oom_score_adj for %d (%s) from %d to %d because it shares mm with %d (%s). Report if this is unexpected.\n",
						task_pid_nr(p), p->comm,
						p->signal->oom_score_adj, oom_adj,
						task_pid_nr(task), task->comm);
				p->signal->oom_score_adj = oom_adj;
				if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE))
					p->signal->oom_score_adj_min = (short)oom_adj;
			}
			task_unlock(p);
		}
		rcu_read_unlock();
		mmdrop(mm);
	}
1068
1069
1070
1071
1072
err_unlock:
	mutex_unlock(&oom_adj_mutex);
	put_task_struct(task);
	return err;
}
1073

1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
/*
 * /proc/pid/oom_adj exists solely for backwards compatibility with previous
 * kernels.  The effective policy is defined by oom_score_adj, which has a
 * different scale: oom_adj grew exponentially and oom_score_adj grows linearly.
 * Values written to oom_adj are simply mapped linearly to oom_score_adj.
 * Processes that become oom disabled via oom_adj will still be oom disabled
 * with this implementation.
 *
 * oom_adj cannot be removed since existing userspace binaries use it.
 */
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
static ssize_t oom_adj_write(struct file *file, const char __user *buf,
			     size_t count, loff_t *ppos)
{
	char buffer[PROC_NUMBUF];
	int oom_adj;
	int err;

	memset(buffer, 0, sizeof(buffer));
	if (count > sizeof(buffer) - 1)
		count = sizeof(buffer) - 1;
	if (copy_from_user(buffer, buf, count)) {
		err = -EFAULT;
		goto out;
	}

	err = kstrtoint(strstrip(buffer), 0, &oom_adj);
	if (err)
		goto out;
	if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) &&
	     oom_adj != OOM_DISABLE) {
		err = -EINVAL;
		goto out;
	}

	/*
	 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
	 * value is always attainable.
	 */
	if (oom_adj == OOM_ADJUST_MAX)
		oom_adj = OOM_SCORE_ADJ_MAX;
	else
		oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE;

1117
	err = __set_oom_adj(file, oom_adj, true);
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
out:
	return err < 0 ? err : count;
}

static const struct file_operations proc_oom_adj_operations = {
	.read		= oom_adj_read,
	.write		= oom_adj_write,
	.llseek		= generic_file_llseek,
};