eventfd.c 22.4 KB
Newer Older
Gregory Haskins's avatar
Gregory Haskins committed
1
2
3
4
/*
 * kvm eventfd support - use eventfd objects to signal various KVM events
 *
 * Copyright 2009 Novell.  All Rights Reserved.
Avi Kivity's avatar
Avi Kivity committed
5
 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
Gregory Haskins's avatar
Gregory Haskins committed
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
 *
 * Author:
 *	Gregory Haskins <ghaskins@novell.com>
 *
 * This file is free software; you can redistribute it and/or modify
 * it under the terms of version 2 of the GNU General Public License
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include <linux/kvm_host.h>
Gregory Haskins's avatar
Gregory Haskins committed
25
#include <linux/kvm.h>
Eric Auger's avatar
Eric Auger committed
26
#include <linux/kvm_irqfd.h>
Gregory Haskins's avatar
Gregory Haskins committed
27
28
29
30
31
32
33
#include <linux/workqueue.h>
#include <linux/syscalls.h>
#include <linux/wait.h>
#include <linux/poll.h>
#include <linux/file.h>
#include <linux/list.h>
#include <linux/eventfd.h>
Gregory Haskins's avatar
Gregory Haskins committed
34
#include <linux/kernel.h>
35
#include <linux/srcu.h>
36
#include <linux/slab.h>
37
#include <linux/seqlock.h>
38
#include <linux/irqbypass.h>
39
#include <trace/events/kvm.h>
Gregory Haskins's avatar
Gregory Haskins committed
40

41
#include <kvm/iodev.h>
Gregory Haskins's avatar
Gregory Haskins committed
42

43
#ifdef CONFIG_HAVE_KVM_IRQFD
Gregory Haskins's avatar
Gregory Haskins committed
44

Paolo Bonzini's avatar
Paolo Bonzini committed
45
static struct workqueue_struct *irqfd_cleanup_wq;
Gregory Haskins's avatar
Gregory Haskins committed
46
47
48
49

static void
irqfd_inject(struct work_struct *work)
{
Eric Auger's avatar
Eric Auger committed
50
51
	struct kvm_kernel_irqfd *irqfd =
		container_of(work, struct kvm_kernel_irqfd, inject);
Gregory Haskins's avatar
Gregory Haskins committed
52
53
	struct kvm *kvm = irqfd->kvm;

54
	if (!irqfd->resampler) {
55
56
57
58
		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
				false);
		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
				false);
59
60
	} else
		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
61
			    irqfd->gsi, 1, false);
62
63
64
65
66
67
68
69
70
71
}

/*
 * Since resampler irqfds share an IRQ source ID, we de-assert once
 * then notify all of the resampler irqfds using this GSI.  We can't
 * do multiple de-asserts or we risk racing with incoming re-asserts.
 */
static void
irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
{
Eric Auger's avatar
Eric Auger committed
72
	struct kvm_kernel_irqfd_resampler *resampler;
73
	struct kvm *kvm;
Eric Auger's avatar
Eric Auger committed
74
	struct kvm_kernel_irqfd *irqfd;
75
	int idx;
76

Eric Auger's avatar
Eric Auger committed
77
78
	resampler = container_of(kian,
			struct kvm_kernel_irqfd_resampler, notifier);
79
	kvm = resampler->kvm;
80

81
	kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
82
		    resampler->notifier.gsi, 0, false);
83

84
	idx = srcu_read_lock(&kvm->irq_srcu);
85
86
87
88

	list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
		eventfd_signal(irqfd->resamplefd, 1);

89
	srcu_read_unlock(&kvm->irq_srcu, idx);
90
91
92
}

static void
Eric Auger's avatar
Eric Auger committed
93
irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
94
{
Eric Auger's avatar
Eric Auger committed
95
	struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
96
97
98
99
100
	struct kvm *kvm = resampler->kvm;

	mutex_lock(&kvm->irqfds.resampler_lock);

	list_del_rcu(&irqfd->resampler_link);
101
	synchronize_srcu(&kvm->irq_srcu);
102
103
104
105
106

	if (list_empty(&resampler->list)) {
		list_del(&resampler->link);
		kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
107
			    resampler->notifier.gsi, 0, false);
108
109
110
111
		kfree(resampler);
	}

	mutex_unlock(&kvm->irqfds.resampler_lock);
Gregory Haskins's avatar
Gregory Haskins committed
112
113
114
115
116
117
118
119
}

/*
 * Race-free decouple logic (ordering is critical)
 */
static void
irqfd_shutdown(struct work_struct *work)
{
Eric Auger's avatar
Eric Auger committed
120
121
	struct kvm_kernel_irqfd *irqfd =
		container_of(work, struct kvm_kernel_irqfd, shutdown);
122
	u64 cnt;
Gregory Haskins's avatar
Gregory Haskins committed
123
124
125
126
127

	/*
	 * Synchronize with the wait-queue and unhook ourselves to prevent
	 * further events.
	 */
128
	eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
Gregory Haskins's avatar
Gregory Haskins committed
129
130
131
132
133

	/*
	 * We know no new events will be scheduled at this point, so block
	 * until all previously outstanding events have completed
	 */
134
	flush_work(&irqfd->inject);
Gregory Haskins's avatar
Gregory Haskins committed
135

136
137
138
139
140
	if (irqfd->resampler) {
		irqfd_resampler_shutdown(irqfd);
		eventfd_ctx_put(irqfd->resamplefd);
	}

Gregory Haskins's avatar
Gregory Haskins committed
141
142
143
	/*
	 * It is now safe to release the object's resources
	 */
144
145
146
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
	irq_bypass_unregister_consumer(&irqfd->consumer);
#endif
Gregory Haskins's avatar
Gregory Haskins committed
147
148
149
150
151
152
153
	eventfd_ctx_put(irqfd->eventfd);
	kfree(irqfd);
}


/* assumes kvm->irqfds.lock is held */
static bool
Eric Auger's avatar
Eric Auger committed
154
irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
Gregory Haskins's avatar
Gregory Haskins committed
155
156
157
158
159
160
161
162
163
164
{
	return list_empty(&irqfd->list) ? false : true;
}

/*
 * Mark the irqfd as inactive and schedule it for removal
 *
 * assumes kvm->irqfds.lock is held
 */
static void
Eric Auger's avatar
Eric Auger committed
165
irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
Gregory Haskins's avatar
Gregory Haskins committed
166
167
168
169
170
{
	BUG_ON(!irqfd_is_active(irqfd));

	list_del_init(&irqfd->list);

Paolo Bonzini's avatar
Paolo Bonzini committed
171
	queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
Gregory Haskins's avatar
Gregory Haskins committed
172
173
}

174
int __attribute__((weak)) kvm_arch_set_irq_inatomic(
175
176
177
178
179
180
181
182
				struct kvm_kernel_irq_routing_entry *irq,
				struct kvm *kvm, int irq_source_id,
				int level,
				bool line_status)
{
	return -EWOULDBLOCK;
}

Gregory Haskins's avatar
Gregory Haskins committed
183
184
185
186
187
188
/*
 * Called with wqh->lock held and interrupts disabled
 */
static int
irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
{
Eric Auger's avatar
Eric Auger committed
189
190
	struct kvm_kernel_irqfd *irqfd =
		container_of(wait, struct kvm_kernel_irqfd, wait);
Gregory Haskins's avatar
Gregory Haskins committed
191
	unsigned long flags = (unsigned long)key;
192
	struct kvm_kernel_irq_routing_entry irq;
193
	struct kvm *kvm = irqfd->kvm;
194
	unsigned seq;
195
	int idx;
Gregory Haskins's avatar
Gregory Haskins committed
196

197
	if (flags & POLLIN) {
198
		idx = srcu_read_lock(&kvm->irq_srcu);
199
200
201
202
		do {
			seq = read_seqcount_begin(&irqfd->irq_entry_sc);
			irq = irqfd->irq_entry;
		} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
Gregory Haskins's avatar
Gregory Haskins committed
203
		/* An event has been signaled, inject an interrupt */
204
205
206
		if (kvm_arch_set_irq_inatomic(&irq, kvm,
					      KVM_USERSPACE_IRQ_SOURCE_ID, 1,
					      false) == -EWOULDBLOCK)
207
			schedule_work(&irqfd->inject);
208
		srcu_read_unlock(&kvm->irq_srcu, idx);
209
	}
Gregory Haskins's avatar
Gregory Haskins committed
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238

	if (flags & POLLHUP) {
		/* The eventfd is closing, detach from KVM */
		unsigned long flags;

		spin_lock_irqsave(&kvm->irqfds.lock, flags);

		/*
		 * We must check if someone deactivated the irqfd before
		 * we could acquire the irqfds.lock since the item is
		 * deactivated from the KVM side before it is unhooked from
		 * the wait-queue.  If it is already deactivated, we can
		 * simply return knowing the other side will cleanup for us.
		 * We cannot race against the irqfd going away since the
		 * other side is required to acquire wqh->lock, which we hold
		 */
		if (irqfd_is_active(irqfd))
			irqfd_deactivate(irqfd);

		spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
	}

	return 0;
}

static void
irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
			poll_table *pt)
{
Eric Auger's avatar
Eric Auger committed
239
240
	struct kvm_kernel_irqfd *irqfd =
		container_of(pt, struct kvm_kernel_irqfd, pt);
Gregory Haskins's avatar
Gregory Haskins committed
241
242
243
	add_wait_queue(wqh, &irqfd->wait);
}

244
/* Must be called under irqfds.lock */
Eric Auger's avatar
Eric Auger committed
245
static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
246
247
{
	struct kvm_kernel_irq_routing_entry *e;
248
	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
249
	int n_entries;
250

251
	n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
252

253
254
	write_seqcount_begin(&irqfd->irq_entry_sc);

255
	e = entries;
256
257
258
259
	if (n_entries == 1)
		irqfd->irq_entry = *e;
	else
		irqfd->irq_entry.type = 0;
260
261

	write_seqcount_end(&irqfd->irq_entry_sc);
262
263
}

264
265
266
267
268
269
270
271
272
273
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
void __attribute__((weak)) kvm_arch_irq_bypass_stop(
				struct irq_bypass_consumer *cons)
{
}

void __attribute__((weak)) kvm_arch_irq_bypass_start(
				struct irq_bypass_consumer *cons)
{
}
274
275
276
277
278
279
280

int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
				struct kvm *kvm, unsigned int host_irq,
				uint32_t guest_irq, bool set)
{
	return 0;
}
281
282
#endif

Gregory Haskins's avatar
Gregory Haskins committed
283
static int
284
kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
Gregory Haskins's avatar
Gregory Haskins committed
285
{
Eric Auger's avatar
Eric Auger committed
286
	struct kvm_kernel_irqfd *irqfd, *tmp;
Al Viro's avatar
Al Viro committed
287
	struct fd f;
288
	struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
Gregory Haskins's avatar
Gregory Haskins committed
289
290
	int ret;
	unsigned int events;
291
	int idx;
Gregory Haskins's avatar
Gregory Haskins committed
292

293
294
295
	if (!kvm_arch_intc_initialized(kvm))
		return -EAGAIN;

Gregory Haskins's avatar
Gregory Haskins committed
296
297
298
299
300
	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
	if (!irqfd)
		return -ENOMEM;

	irqfd->kvm = kvm;
301
	irqfd->gsi = args->gsi;
Gregory Haskins's avatar
Gregory Haskins committed
302
303
304
	INIT_LIST_HEAD(&irqfd->list);
	INIT_WORK(&irqfd->inject, irqfd_inject);
	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
305
	seqcount_init(&irqfd->irq_entry_sc);
Gregory Haskins's avatar
Gregory Haskins committed
306

Al Viro's avatar
Al Viro committed
307
308
309
310
	f = fdget(args->fd);
	if (!f.file) {
		ret = -EBADF;
		goto out;
Gregory Haskins's avatar
Gregory Haskins committed
311
312
	}

Al Viro's avatar
Al Viro committed
313
	eventfd = eventfd_ctx_fileget(f.file);
Gregory Haskins's avatar
Gregory Haskins committed
314
315
316
317
318
319
320
	if (IS_ERR(eventfd)) {
		ret = PTR_ERR(eventfd);
		goto fail;
	}

	irqfd->eventfd = eventfd;

321
	if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
Eric Auger's avatar
Eric Auger committed
322
		struct kvm_kernel_irqfd_resampler *resampler;
323
324
325
326
327
328
329
330
331
332
333
334
335

		resamplefd = eventfd_ctx_fdget(args->resamplefd);
		if (IS_ERR(resamplefd)) {
			ret = PTR_ERR(resamplefd);
			goto fail;
		}

		irqfd->resamplefd = resamplefd;
		INIT_LIST_HEAD(&irqfd->resampler_link);

		mutex_lock(&kvm->irqfds.resampler_lock);

		list_for_each_entry(resampler,
336
				    &kvm->irqfds.resampler_list, link) {
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
			if (resampler->notifier.gsi == irqfd->gsi) {
				irqfd->resampler = resampler;
				break;
			}
		}

		if (!irqfd->resampler) {
			resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
			if (!resampler) {
				ret = -ENOMEM;
				mutex_unlock(&kvm->irqfds.resampler_lock);
				goto fail;
			}

			resampler->kvm = kvm;
			INIT_LIST_HEAD(&resampler->list);
			resampler->notifier.gsi = irqfd->gsi;
			resampler->notifier.irq_acked = irqfd_resampler_ack;
			INIT_LIST_HEAD(&resampler->link);

			list_add(&resampler->link, &kvm->irqfds.resampler_list);
			kvm_register_irq_ack_notifier(kvm,
						      &resampler->notifier);
			irqfd->resampler = resampler;
		}

		list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
364
		synchronize_srcu(&kvm->irq_srcu);
365
366
367
368

		mutex_unlock(&kvm->irqfds.resampler_lock);
	}

Gregory Haskins's avatar
Gregory Haskins committed
369
370
371
372
373
374
375
	/*
	 * Install our own custom wake-up handling so we are notified via
	 * a callback whenever someone signals the underlying eventfd
	 */
	init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
	init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);

376
377
378
379
380
381
382
383
384
385
386
387
	spin_lock_irq(&kvm->irqfds.lock);

	ret = 0;
	list_for_each_entry(tmp, &kvm->irqfds.items, list) {
		if (irqfd->eventfd != tmp->eventfd)
			continue;
		/* This fd is used for another irq already. */
		ret = -EBUSY;
		spin_unlock_irq(&kvm->irqfds.lock);
		goto fail;
	}

388
389
390
	idx = srcu_read_lock(&kvm->irq_srcu);
	irqfd_update(kvm, irqfd);
	srcu_read_unlock(&kvm->irq_srcu, idx);
391

Gregory Haskins's avatar
Gregory Haskins committed
392
393
	list_add_tail(&irqfd->list, &kvm->irqfds.items);

394
395
	spin_unlock_irq(&kvm->irqfds.lock);

Gregory Haskins's avatar
Gregory Haskins committed
396
397
398
399
	/*
	 * Check if there was an event already pending on the eventfd
	 * before we registered, and trigger it as if we didn't miss it.
	 */
400
401
	events = f.file->f_op->poll(f.file, &irqfd->pt);

Gregory Haskins's avatar
Gregory Haskins committed
402
403
404
405
406
407
408
	if (events & POLLIN)
		schedule_work(&irqfd->inject);

	/*
	 * do not drop the file until the irqfd is fully initialized, otherwise
	 * we might race against the POLLHUP
	 */
Al Viro's avatar
Al Viro committed
409
	fdput(f);
410
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
411
412
413
414
415
416
417
418
419
	if (kvm_arch_has_irq_bypass()) {
		irqfd->consumer.token = (void *)irqfd->eventfd;
		irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
		irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
		irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
		irqfd->consumer.start = kvm_arch_irq_bypass_start;
		ret = irq_bypass_register_consumer(&irqfd->consumer);
		if (ret)
			pr_info("irq bypass consumer (token %p) registration fails: %d\n",
420
				irqfd->consumer.token, ret);
421
	}
422
#endif
Gregory Haskins's avatar
Gregory Haskins committed
423
424
425
426

	return 0;

fail:
427
428
429
430
431
432
	if (irqfd->resampler)
		irqfd_resampler_shutdown(irqfd);

	if (resamplefd && !IS_ERR(resamplefd))
		eventfd_ctx_put(resamplefd);

Gregory Haskins's avatar
Gregory Haskins committed
433
434
435
	if (eventfd && !IS_ERR(eventfd))
		eventfd_ctx_put(eventfd);

Al Viro's avatar
Al Viro committed
436
	fdput(f);
Gregory Haskins's avatar
Gregory Haskins committed
437

Al Viro's avatar
Al Viro committed
438
out:
Gregory Haskins's avatar
Gregory Haskins committed
439
440
441
	kfree(irqfd);
	return ret;
}
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463

bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
	struct kvm_irq_ack_notifier *kian;
	int gsi, idx;

	idx = srcu_read_lock(&kvm->irq_srcu);
	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
	if (gsi != -1)
		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
					 link)
			if (kian->gsi == gsi) {
				srcu_read_unlock(&kvm->irq_srcu, idx);
				return true;
			}

	srcu_read_unlock(&kvm->irq_srcu, idx);

	return false;
}
EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);

464
void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
465
466
{
	struct kvm_irq_ack_notifier *kian;
467
468
469
470
471
472
473
474
475

	hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
				 link)
		if (kian->gsi == gsi)
			kian->irq_acked(kian);
}

void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
476
477
478
479
480
481
482
	int gsi, idx;

	trace_kvm_ack_irq(irqchip, pin);

	idx = srcu_read_lock(&kvm->irq_srcu);
	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
	if (gsi != -1)
483
		kvm_notify_acked_gsi(kvm, gsi);
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
	srcu_read_unlock(&kvm->irq_srcu, idx);
}

void kvm_register_irq_ack_notifier(struct kvm *kvm,
				   struct kvm_irq_ack_notifier *kian)
{
	mutex_lock(&kvm->irq_lock);
	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
	mutex_unlock(&kvm->irq_lock);
	kvm_vcpu_request_scan_ioapic(kvm);
}

void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
				    struct kvm_irq_ack_notifier *kian)
{
	mutex_lock(&kvm->irq_lock);
	hlist_del_init_rcu(&kian->link);
	mutex_unlock(&kvm->irq_lock);
	synchronize_srcu(&kvm->irq_srcu);
	kvm_vcpu_request_scan_ioapic(kvm);
}
505
#endif
Gregory Haskins's avatar
Gregory Haskins committed
506
507

void
Gregory Haskins's avatar
Gregory Haskins committed
508
kvm_eventfd_init(struct kvm *kvm)
Gregory Haskins's avatar
Gregory Haskins committed
509
{
510
#ifdef CONFIG_HAVE_KVM_IRQFD
Gregory Haskins's avatar
Gregory Haskins committed
511
512
	spin_lock_init(&kvm->irqfds.lock);
	INIT_LIST_HEAD(&kvm->irqfds.items);
513
514
	INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
	mutex_init(&kvm->irqfds.resampler_lock);
515
#endif
Gregory Haskins's avatar
Gregory Haskins committed
516
	INIT_LIST_HEAD(&kvm->ioeventfds);
Gregory Haskins's avatar
Gregory Haskins committed
517
518
}

519
#ifdef CONFIG_HAVE_KVM_IRQFD
Gregory Haskins's avatar
Gregory Haskins committed
520
521
522
523
/*
 * shutdown any irqfd's that match fd+gsi
 */
static int
524
kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
Gregory Haskins's avatar
Gregory Haskins committed
525
{
Eric Auger's avatar
Eric Auger committed
526
	struct kvm_kernel_irqfd *irqfd, *tmp;
Gregory Haskins's avatar
Gregory Haskins committed
527
528
	struct eventfd_ctx *eventfd;

529
	eventfd = eventfd_ctx_fdget(args->fd);
Gregory Haskins's avatar
Gregory Haskins committed
530
531
532
533
534
535
	if (IS_ERR(eventfd))
		return PTR_ERR(eventfd);

	spin_lock_irq(&kvm->irqfds.lock);

	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
536
		if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
537
			/*
538
			 * This clearing of irq_entry.type is needed for when
539
540
541
			 * another thread calls kvm_irq_routing_update before
			 * we flush workqueue below (we synchronize with
			 * kvm_irq_routing_update using irqfds.lock).
542
			 */
543
544
545
			write_seqcount_begin(&irqfd->irq_entry_sc);
			irqfd->irq_entry.type = 0;
			write_seqcount_end(&irqfd->irq_entry_sc);
Gregory Haskins's avatar
Gregory Haskins committed
546
			irqfd_deactivate(irqfd);
547
		}
Gregory Haskins's avatar
Gregory Haskins committed
548
549
550
551
552
553
554
555
556
557
	}

	spin_unlock_irq(&kvm->irqfds.lock);
	eventfd_ctx_put(eventfd);

	/*
	 * Block until we know all outstanding shutdown jobs have completed
	 * so that we guarantee there will not be any more interrupts on this
	 * gsi once this deassign function returns.
	 */
Paolo Bonzini's avatar
Paolo Bonzini committed
558
	flush_workqueue(irqfd_cleanup_wq);
Gregory Haskins's avatar
Gregory Haskins committed
559
560
561
562
563

	return 0;
}

int
564
kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
Gregory Haskins's avatar
Gregory Haskins committed
565
{
566
	if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
567
568
		return -EINVAL;

569
570
	if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
		return kvm_irqfd_deassign(kvm, args);
Gregory Haskins's avatar
Gregory Haskins committed
571

572
	return kvm_irqfd_assign(kvm, args);
Gregory Haskins's avatar
Gregory Haskins committed
573
574
575
576
577
578
579
580
581
}

/*
 * This function is called as the kvm VM fd is being released. Shutdown all
 * irqfds that still remain open
 */
void
kvm_irqfd_release(struct kvm *kvm)
{
Eric Auger's avatar
Eric Auger committed
582
	struct kvm_kernel_irqfd *irqfd, *tmp;
Gregory Haskins's avatar
Gregory Haskins committed
583
584
585
586
587
588
589
590
591
592
593
594

	spin_lock_irq(&kvm->irqfds.lock);

	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
		irqfd_deactivate(irqfd);

	spin_unlock_irq(&kvm->irqfds.lock);

	/*
	 * Block until we know all outstanding shutdown jobs have completed
	 * since we do not take a kvm* reference.
	 */
Paolo Bonzini's avatar
Paolo Bonzini committed
595
	flush_workqueue(irqfd_cleanup_wq);
Gregory Haskins's avatar
Gregory Haskins committed
596
597
598

}

599
/*
600
 * Take note of a change in irq routing.
601
 * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
602
 */
603
void kvm_irq_routing_update(struct kvm *kvm)
604
{
Eric Auger's avatar
Eric Auger committed
605
	struct kvm_kernel_irqfd *irqfd;
606
607
608

	spin_lock_irq(&kvm->irqfds.lock);

609
	list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
610
		irqfd_update(kvm, irqfd);
611

612
613
614
615
616
617
618
619
620
621
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
		if (irqfd->producer) {
			int ret = kvm_arch_update_irqfd_routing(
					irqfd->kvm, irqfd->producer->irq,
					irqfd->gsi, 1);
			WARN_ON(ret);
		}
#endif
	}

622
623
624
	spin_unlock_irq(&kvm->irqfds.lock);
}

Paolo Bonzini's avatar
Paolo Bonzini committed
625
626
627
628
629
630
631
632
633
634
635
636
637
638
/*
 * create a host-wide workqueue for issuing deferred shutdown requests
 * aggregated from all vm* instances. We need our own isolated
 * queue to ease flushing work items when a VM exits.
 */
int kvm_irqfd_init(void)
{
	irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0);
	if (!irqfd_cleanup_wq)
		return -ENOMEM;

	return 0;
}

639
void kvm_irqfd_exit(void)
Gregory Haskins's avatar
Gregory Haskins committed
640
{
Paolo Bonzini's avatar
Paolo Bonzini committed
641
	destroy_workqueue(irqfd_cleanup_wq);
Gregory Haskins's avatar
Gregory Haskins committed
642
}
643
#endif
Gregory Haskins's avatar
Gregory Haskins committed
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660

/*
 * --------------------------------------------------------------------
 * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
 *
 * userspace can register a PIO/MMIO address with an eventfd for receiving
 * notification when the memory has been touched.
 * --------------------------------------------------------------------
 */

struct _ioeventfd {
	struct list_head     list;
	u64                  addr;
	int                  length;
	struct eventfd_ctx  *eventfd;
	u64                  datamatch;
	struct kvm_io_device dev;
661
	u8                   bus_idx;
Gregory Haskins's avatar
Gregory Haskins committed
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
	bool                 wildcard;
};

static inline struct _ioeventfd *
to_ioeventfd(struct kvm_io_device *dev)
{
	return container_of(dev, struct _ioeventfd, dev);
}

static void
ioeventfd_release(struct _ioeventfd *p)
{
	eventfd_ctx_put(p->eventfd);
	list_del(&p->list);
	kfree(p);
}

static bool
ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
{
	u64 _val;

684
685
686
687
688
689
690
691
692
	if (addr != p->addr)
		/* address must be precise for a hit */
		return false;

	if (!p->length)
		/* length = 0 means only look at the address, so always a hit */
		return true;

	if (len != p->length)
Gregory Haskins's avatar
Gregory Haskins committed
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
		/* address-range must be precise for a hit */
		return false;

	if (p->wildcard)
		/* all else equal, wildcard is always a hit */
		return true;

	/* otherwise, we have to actually compare the data */

	BUG_ON(!IS_ALIGNED((unsigned long)val, len));

	switch (len) {
	case 1:
		_val = *(u8 *)val;
		break;
	case 2:
		_val = *(u16 *)val;
		break;
	case 4:
		_val = *(u32 *)val;
		break;
	case 8:
		_val = *(u64 *)val;
		break;
	default:
		return false;
	}

	return _val == p->datamatch ? true : false;
}

/* MMIO/PIO writes trigger an event if the addr/val match */
static int
726
727
ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
		int len, const void *val)
Gregory Haskins's avatar
Gregory Haskins committed
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
{
	struct _ioeventfd *p = to_ioeventfd(this);

	if (!ioeventfd_in_range(p, addr, len, val))
		return -EOPNOTSUPP;

	eventfd_signal(p->eventfd, 1);
	return 0;
}

/*
 * This function is called as KVM is completely shutting down.  We do not
 * need to worry about locking just nuke anything we have as quickly as possible
 */
static void
ioeventfd_destructor(struct kvm_io_device *this)
{
	struct _ioeventfd *p = to_ioeventfd(this);

	ioeventfd_release(p);
}

static const struct kvm_io_device_ops ioeventfd_ops = {
	.write      = ioeventfd_write,
	.destructor = ioeventfd_destructor,
};

/* assumes kvm->slots_lock held */
static bool
ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
{
	struct _ioeventfd *_p;

	list_for_each_entry(_p, &kvm->ioeventfds, list)
762
		if (_p->bus_idx == p->bus_idx &&
763
764
765
766
767
		    _p->addr == p->addr &&
		    (!_p->length || !p->length ||
		     (_p->length == p->length &&
		      (_p->wildcard || p->wildcard ||
		       _p->datamatch == p->datamatch))))
Gregory Haskins's avatar
Gregory Haskins committed
768
769
770
771
772
			return true;

	return false;
}

773
774
775
776
777
778
779
780
781
static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
{
	if (flags & KVM_IOEVENTFD_FLAG_PIO)
		return KVM_PIO_BUS;
	if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
		return KVM_VIRTIO_CCW_NOTIFY_BUS;
	return KVM_MMIO_BUS;
}

782
783
784
static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
				enum kvm_bus bus_idx,
				struct kvm_ioeventfd *args)
Gregory Haskins's avatar
Gregory Haskins committed
785
786
{

787
788
789
	struct eventfd_ctx *eventfd;
	struct _ioeventfd *p;
	int ret;
790

Gregory Haskins's avatar
Gregory Haskins committed
791
792
793
794
795
796
797
798
799
800
801
802
	eventfd = eventfd_ctx_fdget(args->fd);
	if (IS_ERR(eventfd))
		return PTR_ERR(eventfd);

	p = kzalloc(sizeof(*p), GFP_KERNEL);
	if (!p) {
		ret = -ENOMEM;
		goto fail;
	}

	INIT_LIST_HEAD(&p->list);
	p->addr    = args->addr;
803
	p->bus_idx = bus_idx;
Gregory Haskins's avatar
Gregory Haskins committed
804
805
806
807
808
809
810
811
812
	p->length  = args->len;
	p->eventfd = eventfd;

	/* The datamatch feature is optional, otherwise this is a wildcard */
	if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
		p->datamatch = args->datamatch;
	else
		p->wildcard = true;

813
	mutex_lock(&kvm->slots_lock);
Gregory Haskins's avatar
Gregory Haskins committed
814

Lucas De Marchi's avatar
Lucas De Marchi committed
815
	/* Verify that there isn't a match already */
Gregory Haskins's avatar
Gregory Haskins committed
816
817
818
819
820
821
822
	if (ioeventfd_check_collision(kvm, p)) {
		ret = -EEXIST;
		goto unlock_fail;
	}

	kvm_iodevice_init(&p->dev, &ioeventfd_ops);

823
824
	ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
				      &p->dev);
Gregory Haskins's avatar
Gregory Haskins committed
825
826
827
	if (ret < 0)
		goto unlock_fail;

828
	kvm->buses[bus_idx]->ioeventfd_count++;
Gregory Haskins's avatar
Gregory Haskins committed
829
830
	list_add_tail(&p->list, &kvm->ioeventfds);

831
	mutex_unlock(&kvm->slots_lock);
Gregory Haskins's avatar
Gregory Haskins committed
832
833
834
835

	return 0;

unlock_fail:
836
	mutex_unlock(&kvm->slots_lock);
Gregory Haskins's avatar
Gregory Haskins committed
837
838
839
840
841
842
843
844
845

fail:
	kfree(p);
	eventfd_ctx_put(eventfd);

	return ret;
}

static int
846
847
kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
			   struct kvm_ioeventfd *args)
Gregory Haskins's avatar
Gregory Haskins committed
848
849
850
851
852
853
854
855
856
{
	struct _ioeventfd        *p, *tmp;
	struct eventfd_ctx       *eventfd;
	int                       ret = -ENOENT;

	eventfd = eventfd_ctx_fdget(args->fd);
	if (IS_ERR(eventfd))
		return PTR_ERR(eventfd);

857
	mutex_lock(&kvm->slots_lock);
Gregory Haskins's avatar
Gregory Haskins committed
858
859
860
861

	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);

862
863
		if (p->bus_idx != bus_idx ||
		    p->eventfd != eventfd  ||
Gregory Haskins's avatar
Gregory Haskins committed
864
865
866
867
868
869
870
871
		    p->addr != args->addr  ||
		    p->length != args->len ||
		    p->wildcard != wildcard)
			continue;

		if (!p->wildcard && p->datamatch != args->datamatch)
			continue;

Marcelo Tosatti's avatar
Marcelo Tosatti committed
872
		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
873
874
		if (kvm->buses[bus_idx])
			kvm->buses[bus_idx]->ioeventfd_count--;
Gregory Haskins's avatar
Gregory Haskins committed
875
876
877
878
879
		ioeventfd_release(p);
		ret = 0;
		break;
	}

880
	mutex_unlock(&kvm->slots_lock);
Gregory Haskins's avatar
Gregory Haskins committed
881
882
883
884
885
886

	eventfd_ctx_put(eventfd);

	return ret;
}

887
888
889
static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
	enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
890
891
892
893
	int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);

	if (!args->len && bus_idx == KVM_MMIO_BUS)
		kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
894

895
	return ret;
896
897
898
899
900
901
}

static int
kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
	enum kvm_bus              bus_idx;
902
	int ret;
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925

	bus_idx = ioeventfd_bus_from_flags(args->flags);
	/* must be natural-word sized, or 0 to ignore length */
	switch (args->len) {
	case 0:
	case 1:
	case 2:
	case 4:
	case 8:
		break;
	default:
		return -EINVAL;
	}

	/* check for range overflow */
	if (args->addr + args->len < args->addr)
		return -EINVAL;

	/* check for extra flags that we don't understand */
	if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
		return -EINVAL;

	/* ioeventfd with no length can't be combined with DATAMATCH */
926
	if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
927
928
		return -EINVAL;

929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
	ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
	if (ret)
		goto fail;

	/* When length is ignored, MMIO is also put on a separate bus, for
	 * faster lookups.
	 */
	if (!args->len && bus_idx == KVM_MMIO_BUS) {
		ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
		if (ret < 0)
			goto fast_fail;
	}

	return 0;

fast_fail:
	kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
fail:
	return ret;
948
949
}

Gregory Haskins's avatar
Gregory Haskins committed
950
951
952
953
954
955
956
957
int
kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
	if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
		return kvm_deassign_ioeventfd(kvm, args);

	return kvm_assign_ioeventfd(kvm, args);
}