kvm_host.h 46.2 KB
Newer Older
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
3
#ifndef __KVM_HOST_H
#define __KVM_HOST_H
Avi Kivity's avatar
Avi Kivity committed
4
5
6


#include <linux/types.h>
7
#include <linux/hardirq.h>
Avi Kivity's avatar
Avi Kivity committed
8
9
10
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
Markus Rechberger's avatar
Markus Rechberger committed
11
12
#include <linux/signal.h>
#include <linux/sched.h>
13
#include <linux/sched/stat.h>
14
#include <linux/bug.h>
15
#include <linux/minmax.h>
Avi Kivity's avatar
Avi Kivity committed
16
#include <linux/mm.h>
17
#include <linux/mmu_notifier.h>
18
#include <linux/preempt.h>
19
#include <linux/msi.h>
20
#include <linux/slab.h>
21
#include <linux/vmalloc.h>
22
#include <linux/rcupdate.h>
23
#include <linux/ratelimit.h>
24
#include <linux/err.h>
25
#include <linux/irqflags.h>
26
#include <linux/context_tracking.h>
27
#include <linux/irqbypass.h>
28
#include <linux/rcuwait.h>
29
#include <linux/refcount.h>
Paolo Bonzini's avatar
Paolo Bonzini committed
30
#include <linux/nospec.h>
Alexey Dobriyan's avatar
Alexey Dobriyan committed
31
#include <asm/signal.h>
Avi Kivity's avatar
Avi Kivity committed
32
33

#include <linux/kvm.h>
Ingo Molnar's avatar
Ingo Molnar committed
34
#include <linux/kvm_para.h>
Avi Kivity's avatar
Avi Kivity committed
35

36
#include <linux/kvm_types.h>
37

38
#include <asm/kvm_host.h>
39
#include <linux/kvm_dirty_ring.h>
40

Greg Kurz's avatar
Greg Kurz committed
41
42
43
44
#ifndef KVM_MAX_VCPU_ID
#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
#endif

45
46
47
48
49
50
51
/*
 * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
 * in kvm, other bits are visible for userspace which are defined in
 * include/linux/kvm_h.
 */
#define KVM_MEMSLOT_INVALID	(1UL << 16)

52
/*
53
 * Bit 63 of the memslot generation number is an "update in-progress flag",
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
 * e.g. is temporarily set for the duration of install_new_memslots().
 * This flag effectively creates a unique generation number that is used to
 * mark cached memslot data, e.g. MMIO accesses, as potentially being stale,
 * i.e. may (or may not) have come from the previous memslots generation.
 *
 * This is necessary because the actual memslots update is not atomic with
 * respect to the generation number update.  Updating the generation number
 * first would allow a vCPU to cache a spte from the old memslots using the
 * new generation number, and updating the generation number after switching
 * to the new memslots would allow cache hits using the old generation number
 * to reference the defunct memslots.
 *
 * This mechanism is used to prevent getting hits in KVM's caches while a
 * memslot update is in-progress, and to prevent cache hits *after* updating
 * the actual generation number against accesses that were inserted into the
 * cache *before* the memslots were updated.
 */
71
#define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS	BIT_ULL(63)
72

73
74
/* Two fragments for cross MMIO pages. */
#define KVM_MAX_MMIO_FRAGMENTS	2
Avi Kivity's avatar
Avi Kivity committed
75

76
77
78
79
#ifndef KVM_ADDRESS_SPACE_NUM
#define KVM_ADDRESS_SPACE_NUM	1
#endif

80
81
/*
 * For the normal pfn, the highest 12 bits should be zero,
82
83
 * so we can mask bit 62 ~ bit 52  to indicate the error pfn,
 * mask bit 63 to indicate the noslot pfn.
84
 */
85
86
87
#define KVM_PFN_ERR_MASK	(0x7ffULL << 52)
#define KVM_PFN_ERR_NOSLOT_MASK	(0xfffULL << 52)
#define KVM_PFN_NOSLOT		(0x1ULL << 63)
88
89
90

#define KVM_PFN_ERR_FAULT	(KVM_PFN_ERR_MASK)
#define KVM_PFN_ERR_HWPOISON	(KVM_PFN_ERR_MASK + 1)
91
#define KVM_PFN_ERR_RO_FAULT	(KVM_PFN_ERR_MASK + 2)
92

93
94
95
96
/*
 * error pfns indicate that the gfn is in slot but faild to
 * translate it to pfn on host.
 */
Dan Williams's avatar
Dan Williams committed
97
static inline bool is_error_pfn(kvm_pfn_t pfn)
98
{
99
	return !!(pfn & KVM_PFN_ERR_MASK);
100
101
}

102
103
104
105
106
/*
 * error_noslot pfns indicate that the gfn can not be
 * translated to pfn - it is not in slot or failed to
 * translate it to pfn.
 */
Dan Williams's avatar
Dan Williams committed
107
static inline bool is_error_noslot_pfn(kvm_pfn_t pfn)
108
{
109
	return !!(pfn & KVM_PFN_ERR_NOSLOT_MASK);
110
111
}

112
/* noslot pfn indicates that the gfn is not in slot. */
Dan Williams's avatar
Dan Williams committed
113
static inline bool is_noslot_pfn(kvm_pfn_t pfn)
114
{
115
	return pfn == KVM_PFN_NOSLOT;
116
117
}

118
119
120
121
122
123
/*
 * architectures with KVM_HVA_ERR_BAD other than PAGE_OFFSET (e.g. s390)
 * provide own defines and kvm_is_error_hva
 */
#ifndef KVM_HVA_ERR_BAD

124
125
#define KVM_HVA_ERR_BAD		(PAGE_OFFSET)
#define KVM_HVA_ERR_RO_BAD	(PAGE_OFFSET + PAGE_SIZE)
126
127
128

static inline bool kvm_is_error_hva(unsigned long addr)
{
129
	return addr >= PAGE_OFFSET;
130
131
}

132
133
#endif

134
135
#define KVM_ERR_PTR_BAD_PAGE	(ERR_PTR(-ENOENT))

136
static inline bool is_error_page(struct page *page)
137
138
139
140
{
	return IS_ERR(page);
}

141
142
#define KVM_REQUEST_MASK           GENMASK(7,0)
#define KVM_REQUEST_NO_WAKEUP      BIT(8)
143
#define KVM_REQUEST_WAIT           BIT(9)
144
/*
145
146
 * Architecture-independent vcpu->requests bit members
 * Bits 4-7 are reserved for more arch-independent bits.
147
 */
148
149
#define KVM_REQ_TLB_FLUSH         (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_MMU_RELOAD        (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
150
#define KVM_REQ_UNBLOCK           2
151
#define KVM_REQ_UNHALT            3
152
153
154
#define KVM_REQUEST_ARCH_BASE     8

#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
155
	BUILD_BUG_ON((unsigned)(nr) >= (sizeof_field(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \
156
157
158
	(unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \
})
#define KVM_ARCH_REQ(nr)           KVM_ARCH_REQ_FLAGS(nr, 0)
159

160
161
#define KVM_USERSPACE_IRQ_SOURCE_ID		0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
162

163
extern struct mutex kvm_lock;
164
165
extern struct list_head vm_list;

166
167
168
169
170
171
struct kvm_io_range {
	gpa_t addr;
	int len;
	struct kvm_io_device *dev;
};

172
#define NR_IOBUS_DEVS 1000
173

174
struct kvm_io_bus {
175
176
	int dev_count;
	int ioeventfd_count;
177
	struct kvm_io_range range[];
178
179
};

Marcelo Tosatti's avatar
Marcelo Tosatti committed
180
181
182
enum kvm_bus {
	KVM_MMIO_BUS,
	KVM_PIO_BUS,
183
	KVM_VIRTIO_CCW_NOTIFY_BUS,
184
	KVM_FAST_MMIO_BUS,
Marcelo Tosatti's avatar
Marcelo Tosatti committed
185
186
187
	KVM_NR_BUSES
};

188
int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
Marcelo Tosatti's avatar
Marcelo Tosatti committed
189
		     int len, const void *val);
190
191
192
193
int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
			    gpa_t addr, int len, const void *val, long cookie);
int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
		    int len, void *val);
194
195
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
			    int len, struct kvm_io_device *dev);
196
197
int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
			      struct kvm_io_device *dev);
198
199
struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx,
					 gpa_t addr);
200

201
202
203
204
205
206
207
#ifdef CONFIG_KVM_ASYNC_PF
struct kvm_async_pf {
	struct work_struct work;
	struct list_head link;
	struct list_head queue;
	struct kvm_vcpu *vcpu;
	struct mm_struct *mm;
208
	gpa_t cr2_or_gpa;
209
210
	unsigned long addr;
	struct kvm_arch_async_pf arch;
211
	bool   wakeup_all;
212
	bool notpresent_injected;
213
214
215
216
};

void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu);
void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu);
217
218
bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
			unsigned long hva, struct kvm_arch_async_pf *arch);
219
int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
220
221
#endif

222
#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
223
224
225
226
227
228
229
230
231
232
233
struct kvm_gfn_range {
	struct kvm_memory_slot *slot;
	gfn_t start;
	gfn_t end;
	pte_t pte;
	bool may_block;
};
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
234
235
#endif

236
237
238
enum {
	OUTSIDE_GUEST_MODE,
	IN_GUEST_MODE,
239
240
	EXITING_GUEST_MODE,
	READING_SHADOW_PAGE_TABLES,
241
242
};

243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
#define KVM_UNMAPPED_PAGE	((void *) 0x500 + POISON_POINTER_DELTA)

struct kvm_host_map {
	/*
	 * Only valid if the 'pfn' is managed by the host kernel (i.e. There is
	 * a 'struct page' for it. When using mem= kernel parameter some memory
	 * can be used as guest memory but they are not managed by host
	 * kernel).
	 * If 'pfn' is not managed by the host kernel, this field is
	 * initialized to KVM_UNMAPPED_PAGE.
	 */
	struct page *page;
	void *hva;
	kvm_pfn_t pfn;
	kvm_pfn_t gfn;
};

/*
 * Used to check if the mapping is valid or not. Never use 'kvm_host_map'
 * directly to check for that.
 */
static inline bool kvm_vcpu_mapped(struct kvm_host_map *map)
{
	return !!map->hva;
}

269
270
271
272
273
static inline bool kvm_vcpu_can_poll(ktime_t cur, ktime_t stop)
{
	return single_task_running() && !need_resched() && ktime_before(cur, stop);
}

Avi Kivity's avatar
Avi Kivity committed
274
275
276
277
278
279
280
281
282
283
/*
 * Sometimes a large or cross-page mmio needs to be broken up into separate
 * exits for userspace servicing.
 */
struct kvm_mmio_fragment {
	gpa_t gpa;
	void *data;
	unsigned len;
};

284
285
struct kvm_vcpu {
	struct kvm *kvm;
286
#ifdef CONFIG_PREEMPT_NOTIFIERS
287
	struct preempt_notifier preempt_notifier;
288
#endif
289
	int cpu;
290
291
	int vcpu_id; /* id given by userspace at creation */
	int vcpu_idx; /* index in kvm->vcpus array */
292
293
	int srcu_idx;
	int mode;
294
	u64 requests;
Jan Kiszka's avatar
Jan Kiszka committed
295
	unsigned long guest_debug;
296

297
298
299
	int pre_pcpu;
	struct list_head blocked_vcpu_list;

300
301
	struct mutex mutex;
	struct kvm_run *run;
302

303
	struct rcuwait wait;
304
	struct pid __rcu *pid;
305
306
307
	int sigset_active;
	sigset_t sigset;
	struct kvm_vcpu_stat stat;
Wanpeng Li's avatar
Wanpeng Li committed
308
	unsigned int halt_poll_ns;
309
	bool valid_wakeup;
310

311
#ifdef CONFIG_HAS_IOMEM
312
313
314
	int mmio_needed;
	int mmio_read_completed;
	int mmio_is_write;
Avi Kivity's avatar
Avi Kivity committed
315
316
317
	int mmio_cur_fragment;
	int mmio_nr_fragments;
	struct kvm_mmio_fragment mmio_fragments[KVM_MAX_MMIO_FRAGMENTS];
318
#endif
Avi Kivity's avatar
Avi Kivity committed
319

320
321
322
323
324
325
326
327
328
#ifdef CONFIG_KVM_ASYNC_PF
	struct {
		u32 queued;
		struct list_head queue;
		struct list_head done;
		spinlock_t lock;
	} async_pf;
#endif

329
330
331
332
333
334
335
336
337
338
339
340
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
	/*
	 * Cpu relax intercept or pause loop exit optimization
	 * in_spin_loop: set when a vcpu does a pause loop exit
	 *  or cpu relax intercepted.
	 * dy_eligible: indicates whether vcpu is eligible for directed yield.
	 */
	struct {
		bool in_spin_loop;
		bool dy_eligible;
	} spin_loop;
#endif
341
	bool preempted;
342
	bool ready;
343
	struct kvm_vcpu_arch arch;
344
	struct kvm_dirty_ring dirty_ring;
345
346
};

347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
/* must be called with irqs disabled */
static __always_inline void guest_enter_irqoff(void)
{
	/*
	 * This is running in ioctl context so its safe to assume that it's the
	 * stime pending cputime to flush.
	 */
	instrumentation_begin();
	vtime_account_guest_enter();
	instrumentation_end();

	/*
	 * KVM does not hold any references to rcu protected data when it
	 * switches CPU into a guest mode. In fact switching to a guest mode
	 * is very similar to exiting to userspace from rcu point of view. In
	 * addition CPU may stay in a guest mode for quite a long time (up to
	 * one time slice). Lets treat guest mode as quiescent state, just like
	 * we do with user-mode execution.
	 */
	if (!context_tracking_guest_enter()) {
		instrumentation_begin();
		rcu_virt_note_context_switch(smp_processor_id());
		instrumentation_end();
	}
}

static __always_inline void guest_exit_irqoff(void)
{
	context_tracking_guest_exit();

	instrumentation_begin();
	/* Flush the guest cputime we spent on the guest */
	vtime_account_guest_exit();
	instrumentation_end();
}

static inline void guest_exit(void)
{
	unsigned long flags;

	local_irq_save(flags);
	guest_exit_irqoff();
	local_irq_restore(flags);
}

392
393
static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
{
394
395
396
397
398
399
	/*
	 * The memory barrier ensures a previous write to vcpu->requests cannot
	 * be reordered with the read of vcpu->mode.  It pairs with the general
	 * memory barrier following the write of vcpu->mode in VCPU RUN.
	 */
	smp_mb__before_atomic();
400
401
402
	return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
}

403
404
405
406
407
408
/*
 * Some of the bitops functions do not support too long bitmaps.
 * This number must be determined not to exceed such limits.
 */
#define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1)

Avi Kivity's avatar
Avi Kivity committed
409
410
411
412
struct kvm_memory_slot {
	gfn_t base_gfn;
	unsigned long npages;
	unsigned long *dirty_bitmap;
413
	struct kvm_arch_memory_slot arch;
414
	unsigned long userspace_addr;
415
	u32 flags;
416
	short id;
Peter Xu's avatar
Peter Xu committed
417
	u16 as_id;
Avi Kivity's avatar
Avi Kivity committed
418
419
};

420
421
422
423
424
static inline bool kvm_slot_dirty_track_enabled(struct kvm_memory_slot *slot)
{
	return slot->flags & KVM_MEM_LOG_DIRTY_PAGES;
}

425
426
427
428
429
static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot)
{
	return ALIGN(memslot->npages, BITS_PER_LONG) / 8;
}

430
431
432
433
434
435
436
static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *memslot)
{
	unsigned long len = kvm_dirty_bitmap_bytes(memslot);

	return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap);
}

437
438
439
440
#ifndef KVM_DIRTY_LOG_MANUAL_CAPS
#define KVM_DIRTY_LOG_MANUAL_CAPS KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE
#endif

441
442
443
444
445
446
447
448
struct kvm_s390_adapter_int {
	u64 ind_addr;
	u64 summary_addr;
	u64 ind_offset;
	u32 summary_offset;
	u32 adapter_id;
};

449
450
451
452
453
struct kvm_hv_sint {
	u32 vcpu;
	u32 sint;
};

454
455
struct kvm_kernel_irq_routing_entry {
	u32 gsi;
456
	u32 type;
457
	int (*set)(struct kvm_kernel_irq_routing_entry *e,
458
459
		   struct kvm *kvm, int irq_source_id, int level,
		   bool line_status);
460
461
462
463
464
	union {
		struct {
			unsigned irqchip;
			unsigned pin;
		} irqchip;
465
466
467
468
469
470
471
		struct {
			u32 address_lo;
			u32 address_hi;
			u32 data;
			u32 flags;
			u32 devid;
		} msi;
472
		struct kvm_s390_adapter_int adapter;
473
		struct kvm_hv_sint hv_sint;
474
	};
475
476
477
	struct hlist_node link;
};

478
479
480
481
482
483
484
485
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
struct kvm_irq_routing_table {
	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
	u32 nr_rt_entries;
	/*
	 * Array indexed by gsi. Each entry contains list of irq chips
	 * the gsi is connected to.
	 */
486
	struct hlist_head map[];
487
488
489
};
#endif

490
491
492
493
#ifndef KVM_PRIVATE_MEM_SLOTS
#define KVM_PRIVATE_MEM_SLOTS 0
#endif

494
495
#define KVM_MEM_SLOTS_NUM SHRT_MAX
#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_PRIVATE_MEM_SLOTS)
496

497
498
499
500
501
502
503
#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
{
	return 0;
}
#endif

504
505
506
507
508
/*
 * Note:
 * memslots are not sorted by id anymore, please use id_to_memslot()
 * to get the memslot by its id.
 */
509
struct kvm_memslots {
510
	u64 generation;
511
	/* The mapping table from slot id to the index in memslots[]. */
512
	short id_to_index[KVM_MEM_SLOTS_NUM];
513
	atomic_t lru_slot;
514
	int used_slots;
515
	struct kvm_memory_slot memslots[];
516
517
};

Avi Kivity's avatar
Avi Kivity committed
518
struct kvm {
519
520
521
#ifdef KVM_HAVE_MMU_RWLOCK
	rwlock_t mmu_lock;
#else
522
	spinlock_t mmu_lock;
523
524
#endif /* KVM_HAVE_MMU_RWLOCK */

525
	struct mutex slots_lock;
526
	struct mm_struct *mm; /* userspace tied to this vm */
527
	struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
528
	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
529
530
531
532
533
534
535

	/*
	 * created_vcpus is protected by kvm->lock, and is incremented
	 * at the beginning of KVM_CREATE_VCPU.  online_vcpus is only
	 * incremented after storing the kvm_vcpu pointer in vcpus,
	 * and is accessed atomically.
	 */
536
	atomic_t online_vcpus;
537
	int created_vcpus;
538
	int last_boosted_vcpu;
539
	struct list_head vm_list;
540
	struct mutex lock;
541
	struct kvm_io_bus __rcu *buses[KVM_NR_BUSES];
Gregory Haskins's avatar
Gregory Haskins committed
542
543
544
545
#ifdef CONFIG_HAVE_KVM_EVENTFD
	struct {
		spinlock_t        lock;
		struct list_head  items;
546
547
		struct list_head  resampler_list;
		struct mutex      resampler_lock;
Gregory Haskins's avatar
Gregory Haskins committed
548
	} irqfds;
Gregory Haskins's avatar
Gregory Haskins committed
549
	struct list_head ioeventfds;
Gregory Haskins's avatar
Gregory Haskins committed
550
#endif
551
	struct kvm_vm_stat stat;
552
	struct kvm_arch arch;
553
	refcount_t users_count;
554
#ifdef CONFIG_KVM_MMIO
555
	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
556
557
	spinlock_t ring_lock;
	struct list_head coalesced_zones;
558
#endif
559

560
	struct mutex irq_lock;
561
#ifdef CONFIG_HAVE_KVM_IRQCHIP
562
	/*
563
	 * Update side is protected by irq_lock.
564
	 */
Arnd Bergmann's avatar
Arnd Bergmann committed
565
	struct kvm_irq_routing_table __rcu *irq_routing;
566
567
#endif
#ifdef CONFIG_HAVE_KVM_IRQFD
568
	struct hlist_head irq_ack_notifier_list;
569
570
#endif

571
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
572
573
574
	struct mmu_notifier mmu_notifier;
	unsigned long mmu_notifier_seq;
	long mmu_notifier_count;
575
576
	unsigned long mmu_notifier_range_start;
	unsigned long mmu_notifier_range_end;
577
#endif
578
	long tlbs_dirty;
579
	struct list_head devices;
580
	u64 manual_dirty_log_protect;
581
582
	struct dentry *debugfs_dentry;
	struct kvm_stat_data **debugfs_stat_data;
583
584
	struct srcu_struct srcu;
	struct srcu_struct irq_srcu;
585
	pid_t userspace_pid;
586
	unsigned int max_halt_poll_ns;
587
	u32 dirty_ring_size;
Avi Kivity's avatar
Avi Kivity committed
588
589
};

590
591
592
593
594
595
#define kvm_err(fmt, ...) \
	pr_err("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
#define kvm_info(fmt, ...) \
	pr_info("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
#define kvm_debug(fmt, ...) \
	pr_debug("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__)
596
597
598
#define kvm_debug_ratelimited(fmt, ...) \
	pr_debug_ratelimited("kvm [%i]: " fmt, task_pid_nr(current), \
			     ## __VA_ARGS__)
599
600
601
#define kvm_pr_unimpl(fmt, ...) \
	pr_err_ratelimited("kvm [%i]: " fmt, \
			   task_tgid_nr(current), ## __VA_ARGS__)
602

603
604
/* The guest did something we don't support. */
#define vcpu_unimpl(vcpu, fmt, ...)					\
605
606
	kvm_pr_unimpl("vcpu%i, guest rIP: 0x%lx " fmt,			\
			(vcpu)->vcpu_id, kvm_rip_read(vcpu), ## __VA_ARGS__)
Avi Kivity's avatar
Avi Kivity committed
607

608
609
#define vcpu_debug(vcpu, fmt, ...)					\
	kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
610
611
612
#define vcpu_debug_ratelimited(vcpu, fmt, ...)				\
	kvm_debug_ratelimited("vcpu%i " fmt, (vcpu)->vcpu_id,           \
			      ## __VA_ARGS__)
613
614
#define vcpu_err(vcpu, fmt, ...)					\
	kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
615

616
617
618
619
620
static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm)
{
	return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET);
}

621
622
623
static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
{
	return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
624
625
				      lockdep_is_held(&kvm->slots_lock) ||
				      !refcount_read(&kvm->users_count));
626
627
}

628
629
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
{
Paolo Bonzini's avatar
Paolo Bonzini committed
630
631
632
633
	int num_vcpus = atomic_read(&kvm->online_vcpus);
	i = array_index_nospec(i, num_vcpus);

	/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu.  */
634
635
636
637
638
	smp_rmb();
	return kvm->vcpus[i];
}

#define kvm_for_each_vcpu(idx, vcpup, kvm) \
639
640
641
642
	for (idx = 0; \
	     idx < atomic_read(&kvm->online_vcpus) && \
	     (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
	     idx++)
643

644
645
static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
{
646
	struct kvm_vcpu *vcpu = NULL;
647
648
	int i;

649
	if (id < 0)
650
		return NULL;
651
652
	if (id < KVM_MAX_VCPUS)
		vcpu = kvm_get_vcpu(kvm, id);
653
654
	if (vcpu && vcpu->vcpu_id == id)
		return vcpu;
655
656
657
658
659
660
	kvm_for_each_vcpu(i, vcpu, kvm)
		if (vcpu->vcpu_id == id)
			return vcpu;
	return NULL;
}

661
662
static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
{
663
	return vcpu->vcpu_idx;
664
665
}

666
667
668
669
670
#define kvm_for_each_memslot(memslot, slots)				\
	for (memslot = &slots->memslots[0];				\
	     memslot < slots->memslots + slots->used_slots; memslot++)	\
		if (WARN_ON_ONCE(!memslot->npages)) {			\
		} else
671

672
void kvm_vcpu_destroy(struct kvm_vcpu *vcpu);
673

674
void vcpu_load(struct kvm_vcpu *vcpu);
675
676
void vcpu_put(struct kvm_vcpu *vcpu);

677
#ifdef __KVM_HAVE_IOAPIC
678
void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm);
679
void kvm_arch_post_irq_routing_update(struct kvm *kvm);
680
#else
681
static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
682
683
{
}
684
static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm)
685
686
{
}
687
688
#endif

689
#ifdef CONFIG_HAVE_KVM_IRQFD
690
691
692
693
694
695
696
697
698
699
700
701
int kvm_irqfd_init(void);
void kvm_irqfd_exit(void);
#else
static inline int kvm_irqfd_init(void)
{
	return 0;
}

static inline void kvm_irqfd_exit(void)
{
}
#endif
702
int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
703
		  struct module *module);
704
void kvm_exit(void);
Avi Kivity's avatar
Avi Kivity committed
705

Izik Eidus's avatar
Izik Eidus committed
706
707
void kvm_get_kvm(struct kvm *kvm);
void kvm_put_kvm(struct kvm *kvm);
708
bool file_is_kvm(struct file *file);
709
void kvm_put_kvm_no_destroy(struct kvm *kvm);
Izik Eidus's avatar
Izik Eidus committed
710

711
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
712
{
Paolo Bonzini's avatar
Paolo Bonzini committed
713
	as_id = array_index_nospec(as_id, KVM_ADDRESS_SPACE_NUM);
714
	return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
715
716
			lockdep_is_held(&kvm->slots_lock) ||
			!refcount_read(&kvm->users_count));
717
718
}

719
720
721
722
723
static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
{
	return __kvm_memslots(kvm, 0);
}

724
725
static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
{
726
727
728
	int as_id = kvm_arch_vcpu_memslots_id(vcpu);

	return __kvm_memslots(vcpu->kvm, as_id);
729
730
}

731
732
static inline
struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id)
733
{
734
735
	int index = slots->id_to_index[id];
	struct kvm_memory_slot *slot;
736

737
738
739
	if (index < 0)
		return NULL;

740
	slot = &slots->memslots[index];
741

742
743
	WARN_ON(slot->id != id);
	return slot;
744
745
}

746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
/*
 * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations:
 * - create a new memory slot
 * - delete an existing memory slot
 * - modify an existing memory slot
 *   -- move it in the guest physical memory space
 *   -- just change its flags
 *
 * Since flags can be changed by some of these operations, the following
 * differentiation is the best we can do for __kvm_set_memory_region():
 */
enum kvm_mr_change {
	KVM_MR_CREATE,
	KVM_MR_DELETE,
	KVM_MR_MOVE,
	KVM_MR_FLAGS_ONLY,
};

764
int kvm_set_memory_region(struct kvm *kvm,
765
			  const struct kvm_userspace_memory_region *mem);
766
int __kvm_set_memory_region(struct kvm *kvm,
767
			    const struct kvm_userspace_memory_region *mem);
768
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot);
769
void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen);
770
771
int kvm_arch_prepare_memory_region(struct kvm *kvm,
				struct kvm_memory_slot *memslot,
772
				const struct kvm_userspace_memory_region *mem,
773
				enum kvm_mr_change change);
774
void kvm_arch_commit_memory_region(struct kvm *kvm,
775
				const struct kvm_userspace_memory_region *mem,
776
				struct kvm_memory_slot *old,
777
				const struct kvm_memory_slot *new,
778
				enum kvm_mr_change change);
779
780
781
782
783
/* flush all memory translations */
void kvm_arch_flush_shadow_all(struct kvm *kvm);
/* flush memory translations pointing to 'slot' */
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
				   struct kvm_memory_slot *slot);
784

785
786
int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn,
			    struct page **pages, int nr_pages);
787

Avi Kivity's avatar
Avi Kivity committed
788
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
Marcelo Tosatti's avatar
Marcelo Tosatti committed
789
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
790
unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
791
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
792
793
unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
				      bool *writable);
794
795
void kvm_release_page_clean(struct page *page);
void kvm_release_page_dirty(struct page *page);
796
797
void kvm_set_page_accessed(struct page *page);

Dan Williams's avatar
Dan Williams committed
798
799
kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
800
		      bool *writable);
Dan Williams's avatar
Dan Williams committed
801
802
803
804
kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
			       bool atomic, bool *async, bool write_fault,
805
			       bool *writable, hva_t *hva);
806

Dan Williams's avatar
Dan Williams committed
807
void kvm_release_pfn_clean(kvm_pfn_t pfn);
808
void kvm_release_pfn_dirty(kvm_pfn_t pfn);
Dan Williams's avatar
Dan Williams committed
809
810
811
void kvm_set_pfn_dirty(kvm_pfn_t pfn);
void kvm_set_pfn_accessed(kvm_pfn_t pfn);
void kvm_get_pfn(kvm_pfn_t pfn);
812

813
void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
814
815
816
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
			int len);
int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
817
818
int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
			   void *data, unsigned long len);
819
820
821
int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
				 void *data, unsigned int offset,
				 unsigned long len);
822
823
824
825
int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
			 int offset, int len);
int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
		    unsigned long len);
826
827
828
int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
			   void *data, unsigned long len);
int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
829
830
				  void *data, unsigned int offset,
				  unsigned long len);
831
832
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
			      gpa_t gpa, unsigned long len);
Steven Price's avatar
Steven Price committed
833

834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
#define __kvm_get_guest(kvm, gfn, offset, v)				\
({									\
	unsigned long __addr = gfn_to_hva(kvm, gfn);			\
	typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset);	\
	int __ret = -EFAULT;						\
									\
	if (!kvm_is_error_hva(__addr))					\
		__ret = get_user(v, __uaddr);				\
	__ret;								\
})

#define kvm_get_guest(kvm, gpa, v)					\
({									\
	gpa_t __gpa = gpa;						\
	struct kvm *__kvm = kvm;					\
									\
	__kvm_get_guest(__kvm, __gpa >> PAGE_SHIFT,			\
			offset_in_page(__gpa), v);			\
})

854
#define __kvm_put_guest(kvm, gfn, offset, v)				\
Steven Price's avatar
Steven Price committed
855
856
({									\
	unsigned long __addr = gfn_to_hva(kvm, gfn);			\
857
	typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset);	\
Steven Price's avatar
Steven Price committed
858
859
860
	int __ret = -EFAULT;						\
									\
	if (!kvm_is_error_hva(__addr))					\
861
		__ret = put_user(v, __uaddr);				\
Steven Price's avatar
Steven Price committed
862
863
864
865
866
	if (!__ret)							\
		mark_page_dirty(kvm, gfn);				\
	__ret;								\
})

867
#define kvm_put_guest(kvm, gpa, v)					\
Steven Price's avatar
Steven Price committed
868
869
870
({									\
	gpa_t __gpa = gpa;						\
	struct kvm *__kvm = kvm;					\
871
									\
Steven Price's avatar
Steven Price committed
872
	__kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT,			\
873
			offset_in_page(__gpa), v);			\
Steven Price's avatar
Steven Price committed
874
875
})

876
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
Avi Kivity's avatar
Avi Kivity committed
877
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
878
bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
879
bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
880
unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn);
881
void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, gfn_t gfn);
Avi Kivity's avatar
Avi Kivity committed
882
883
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);

884
885
struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn);
Dan Williams's avatar
Dan Williams committed
886
887
kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
888
int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
889
890
int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
		struct gfn_to_pfn_cache *cache, bool atomic);
891
struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
892
void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
893
894
int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
		  struct gfn_to_pfn_cache *cache, bool dirty, bool atomic);
895
896
897
898
899
900
901
902
903
904
905
906
907
908
unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
			     int len);
int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, void *data,
			       unsigned long len);
int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data,
			unsigned long len);
int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, const void *data,
			      int offset, int len);
int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
			 unsigned long len);
void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);

909
910
911
void kvm_sigset_activate(struct kvm_vcpu *vcpu);
void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);

912
void kvm_vcpu_block(struct kvm_vcpu *vcpu);
913
914
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
915
bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
916
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
917
int kvm_vcpu_yield_to(struct kvm_vcpu *target);
918
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible);
919

920
void kvm_flush_remote_tlbs(struct kvm *kvm);
921
void kvm_reload_remote_mmus(struct kvm *kvm);
922

923
924
925
926
927
928
929
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min);
int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc);
void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc);
void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
#endif

930
bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
931
				 struct kvm_vcpu *except,
932
				 unsigned long *vcpu_bitmap, cpumask_var_t tmp);
933
bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
934
935
bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
				      struct kvm_vcpu *except);
936
937
bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
				unsigned long *vcpu_bitmap);
Avi Kivity's avatar
Avi Kivity committed
938

939
940
long kvm_arch_dev_ioctl(struct file *filp,
			unsigned int ioctl, unsigned long arg);
941
942
long kvm_arch_vcpu_ioctl(struct file *filp,
			 unsigned int ioctl, unsigned long arg);
943
vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
944

945
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext);
946

947
void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
948
949
950
					struct kvm_memory_slot *slot,
					gfn_t gfn_offset,
					unsigned long mask);
951
952
953
954
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot);

#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
955
					const struct kvm_memory_slot *memslot);
956
957
958
#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log);
int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
959
		      int *is_dirty, struct kvm_memory_slot **memslot);
960
#endif
961

962
963
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
			bool line_status);
964
965
int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
			    struct kvm_enable_cap *cap);
966
967
long kvm_arch_vm_ioctl(struct file *filp,
		       unsigned int ioctl, unsigned long arg);
968

969
970
971
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);

972
973
974
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
				    struct kvm_translation *tr);