channel_mgmt.c 31.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21
22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24
#include <linux/interrupt.h>
25
26
#include <linux/sched.h>
#include <linux/wait.h>
27
#include <linux/mm.h>
28
#include <linux/slab.h>
29
#include <linux/list.h>
30
#include <linux/module.h>
31
#include <linux/completion.h>
32
#include <linux/delay.h>
33
#include <linux/hyperv.h>
34
#include <asm/mshyperv.h>
35

36
#include "hyperv_vmbus.h"
37

38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);

static const struct vmbus_device vmbus_devs[] = {
	/* IDE */
	{ .dev_type = HV_IDE,
	  HV_IDE_GUID,
	  .perf_device = true,
	},

	/* SCSI */
	{ .dev_type = HV_SCSI,
	  HV_SCSI_GUID,
	  .perf_device = true,
	},

	/* Fibre Channel */
	{ .dev_type = HV_FC,
	  HV_SYNTHFC_GUID,
	  .perf_device = true,
	},

	/* Synthetic NIC */
	{ .dev_type = HV_NIC,
	  HV_NIC_GUID,
	  .perf_device = true,
	},

	/* Network Direct */
	{ .dev_type = HV_ND,
	  HV_ND_GUID,
	  .perf_device = true,
	},

	/* PCIE */
	{ .dev_type = HV_PCIE,
	  HV_PCIE_GUID,
	  .perf_device = true,
	},

	/* Synthetic Frame Buffer */
	{ .dev_type = HV_FB,
	  HV_SYNTHVID_GUID,
	  .perf_device = false,
	},

	/* Synthetic Keyboard */
	{ .dev_type = HV_KBD,
	  HV_KBD_GUID,
	  .perf_device = false,
	},

	/* Synthetic MOUSE */
	{ .dev_type = HV_MOUSE,
	  HV_MOUSE_GUID,
	  .perf_device = false,
	},

	/* KVP */
	{ .dev_type = HV_KVP,
	  HV_KVP_GUID,
	  .perf_device = false,
	},

	/* Time Synch */
	{ .dev_type = HV_TS,
	  HV_TS_GUID,
	  .perf_device = false,
	},

	/* Heartbeat */
	{ .dev_type = HV_HB,
	  HV_HEART_BEAT_GUID,
	  .perf_device = false,
	},

	/* Shutdown */
	{ .dev_type = HV_SHUTDOWN,
	  HV_SHUTDOWN_GUID,
	  .perf_device = false,
	},

	/* File copy */
	{ .dev_type = HV_FCOPY,
	  HV_FCOPY_GUID,
	  .perf_device = false,
	},

	/* Backup */
	{ .dev_type = HV_BACKUP,
	  HV_VSS_GUID,
	  .perf_device = false,
	},

	/* Dynamic Memory */
	{ .dev_type = HV_DM,
	  HV_DM_GUID,
	  .perf_device = false,
	},

	/* Unknown GUID */
138
	{ .dev_type = HV_UNKNOWN,
139
140
141
142
	  .perf_device = false,
	},
};

143
144
145
146
147
148
149
150
static const struct {
	uuid_le guid;
} vmbus_unsupported_devs[] = {
	{ HV_AVMA1_GUID },
	{ HV_AVMA2_GUID },
	{ HV_RDV_GUID	},
};

151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/*
 * The rescinded channel may be blocked waiting for a response from the host;
 * take care of that.
 */
static void vmbus_rescind_cleanup(struct vmbus_channel *channel)
{
	struct vmbus_channel_msginfo *msginfo;
	unsigned long flags;


	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);

	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {

		if (msginfo->waiting_channel == channel) {
			complete(&msginfo->waitevent);
			break;
		}
	}
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
}

174
175
176
177
178
179
180
181
182
183
184
static bool is_unsupported_vmbus_devs(const uuid_le *guid)
{
	int i;

	for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
		if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid))
			return true;
	return false;
}

static u16 hv_get_dev_type(const struct vmbus_channel *channel)
185
{
186
	const uuid_le *guid = &channel->offermsg.offer.if_type;
187
188
	u16 i;

189
	if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
190
		return HV_UNKNOWN;
191

192
	for (i = HV_IDE; i < HV_UNKNOWN; i++) {
193
194
195
196
197
198
		if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
			return i;
	}
	pr_info("Unknown GUID: %pUl\n", guid);
	return i;
}
199

200
/**
201
 * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
202
203
204
205
206
 * @icmsghdrp: Pointer to msg header structure
 * @icmsg_negotiate: Pointer to negotiate message structure
 * @buf: Raw buffer channel data
 *
 * @icmsghdrp is of type &struct icmsg_hdr.
207
208
 * Set up and fill in default negotiate response message.
 *
209
210
211
212
213
214
215
216
217
 * The fw_version and fw_vercnt specifies the framework version that
 * we can support.
 *
 * The srv_version and srv_vercnt specifies the service
 * versions we can support.
 *
 * Versions are given in decreasing order.
 *
 * nego_fw_version and nego_srv_version store the selected protocol versions.
218
219
220
 *
 * Mainly used by Hyper-V drivers.
 */
221
bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
222
223
224
				u8 *buf, const int *fw_version, int fw_vercnt,
				const int *srv_version, int srv_vercnt,
				int *nego_fw_version, int *nego_srv_version)
225
{
226
227
228
229
	int icframe_major, icframe_minor;
	int icmsg_major, icmsg_minor;
	int fw_major, fw_minor;
	int srv_major, srv_minor;
230
	int i, j;
231
	bool found_match = false;
232
	struct icmsg_negotiate *negop;
233

234
235
236
237
	icmsghdrp->icmsgsize = 0x10;
	negop = (struct icmsg_negotiate *)&buf[
		sizeof(struct vmbuspipe_hdr) +
		sizeof(struct icmsg_hdr)];
238

239
240
241
242
243
	icframe_major = negop->icframe_vercnt;
	icframe_minor = 0;

	icmsg_major = negop->icmsg_vercnt;
	icmsg_minor = 0;
244
245
246
247
248
249

	/*
	 * Select the framework version number we will
	 * support.
	 */

250
251
252
253
254
255
256
257
258
259
260
261
	for (i = 0; i < fw_vercnt; i++) {
		fw_major = (fw_version[i] >> 16);
		fw_minor = (fw_version[i] & 0xFFFF);

		for (j = 0; j < negop->icframe_vercnt; j++) {
			if ((negop->icversion_data[j].major == fw_major) &&
			    (negop->icversion_data[j].minor == fw_minor)) {
				icframe_major = negop->icversion_data[j].major;
				icframe_minor = negop->icversion_data[j].minor;
				found_match = true;
				break;
			}
262
		}
263
264
265

		if (found_match)
			break;
266
267
	}

268
269
270
271
272
	if (!found_match)
		goto fw_error;

	found_match = false;

273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
	for (i = 0; i < srv_vercnt; i++) {
		srv_major = (srv_version[i] >> 16);
		srv_minor = (srv_version[i] & 0xFFFF);

		for (j = negop->icframe_vercnt;
			(j < negop->icframe_vercnt + negop->icmsg_vercnt);
			j++) {

			if ((negop->icversion_data[j].major == srv_major) &&
				(negop->icversion_data[j].minor == srv_minor)) {

				icmsg_major = negop->icversion_data[j].major;
				icmsg_minor = negop->icversion_data[j].minor;
				found_match = true;
				break;
			}
289
		}
290
291
292

		if (found_match)
			break;
293
	}
294

295
	/*
296
	 * Respond with the framework and service
297
298
	 * version numbers we can support.
	 */
299
300
301
302
303
304
305
306
307
308

fw_error:
	if (!found_match) {
		negop->icframe_vercnt = 0;
		negop->icmsg_vercnt = 0;
	} else {
		negop->icframe_vercnt = 1;
		negop->icmsg_vercnt = 1;
	}

309
310
311
312
313
314
	if (nego_fw_version)
		*nego_fw_version = (icframe_major << 16) | icframe_minor;

	if (nego_srv_version)
		*nego_srv_version = (icmsg_major << 16) | icmsg_minor;

315
316
317
318
319
	negop->icversion_data[0].major = icframe_major;
	negop->icversion_data[0].minor = icframe_minor;
	negop->icversion_data[1].major = icmsg_major;
	negop->icversion_data[1].minor = icmsg_minor;
	return found_match;
320
}
321

322
EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
323

324
/*
325
 * alloc_channel - Allocate and initialize a vmbus channel object
326
 */
327
static struct vmbus_channel *alloc_channel(void)
328
{
329
	struct vmbus_channel *channel;
330

331
	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
332
333
334
	if (!channel)
		return NULL;

335
	spin_lock_init(&channel->inbound_lock);
336
	spin_lock_init(&channel->lock);
337
338

	INIT_LIST_HEAD(&channel->sc_list);
339
	INIT_LIST_HEAD(&channel->percpu_list);
340

341
342
343
	tasklet_init(&channel->callback_event,
		     vmbus_on_event, (unsigned long)channel);

344
345
346
	return channel;
}

347
/*
348
 * free_channel - Release the resources used by the vmbus channel object
349
 */
350
static void free_channel(struct vmbus_channel *channel)
351
{
352
	tasklet_kill(&channel->callback_event);
353
354

	kfree_rcu(channel, rcu);
355
356
}

357
358
359
static void percpu_channel_enq(void *arg)
{
	struct vmbus_channel *channel = arg;
360
361
	struct hv_per_cpu_context *hv_cpu
		= this_cpu_ptr(hv_context.cpu_context);
362

363
	list_add_tail_rcu(&channel->percpu_list, &hv_cpu->chan_list);
364
}
365

366
367
368
369
static void percpu_channel_deq(void *arg)
{
	struct vmbus_channel *channel = arg;

370
	list_del_rcu(&channel->percpu_list);
371
}
372

373

374
static void vmbus_release_relid(u32 relid)
375
{
376
	struct vmbus_channel_relid_released msg;
377

378
	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
379
	msg.child_relid = relid;
380
	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
381
382
	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released),
		       true);
383
}
384

385
386
387
388
389
void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
{
	unsigned long flags;
	struct vmbus_channel *primary_channel;

390
	BUG_ON(!channel->rescind);
391
	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
392

393
394
	if (channel->target_cpu != get_cpu()) {
		put_cpu();
395
396
		smp_call_function_single(channel->target_cpu,
					 percpu_channel_deq, channel, true);
397
	} else {
398
		percpu_channel_deq(channel);
399
400
		put_cpu();
	}
401

402
403
	if (channel->primary_channel == NULL) {
		list_del(&channel->listentry);
404
405

		primary_channel = channel;
406
407
	} else {
		primary_channel = channel->primary_channel;
408
		spin_lock_irqsave(&primary_channel->lock, flags);
409
		list_del(&channel->sc_list);
410
		primary_channel->num_sc--;
411
		spin_unlock_irqrestore(&primary_channel->lock, flags);
412
	}
413
414
415
416
417

	/*
	 * We need to free the bit for init_vp_index() to work in the case
	 * of sub-channel, when we reload drivers like hv_netvsc.
	 */
418
419
420
	if (channel->affinity_policy == HV_LOCALIZED)
		cpumask_clear_cpu(channel->target_cpu,
				  &primary_channel->alloced_cpus_in_node);
421

422
423
	vmbus_release_relid(relid);

424
	free_channel(channel);
425
}
426

427
428
void vmbus_free_channels(void)
{
429
430
	struct vmbus_channel *channel, *tmp;

431
	mutex_lock(&vmbus_connection.channel_mutex);
432
433
	list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
		listentry) {
434
		/* hv_process_channel_removal() needs this */
435
		channel->rescind = true;
436
437
438

		vmbus_device_unregister(channel->device_obj);
	}
439
	mutex_unlock(&vmbus_connection.channel_mutex);
440
441
}

442
/*
443
 * vmbus_process_offer - Process the offer by creating a channel/device
444
 * associated with this offer
445
 */
446
static void vmbus_process_offer(struct vmbus_channel *newchannel)
447
{
448
	struct vmbus_channel *channel;
449
	bool fnew = true;
450
	unsigned long flags;
451
	u16 dev_type;
452
	int ret;
453

454
	/* Make sure this is a new offer */
455
	mutex_lock(&vmbus_connection.channel_mutex);
456

457
	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
458
459
460
461
		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
			newchannel->offermsg.offer.if_type) &&
			!uuid_le_cmp(channel->offermsg.offer.if_instance,
				newchannel->offermsg.offer.if_instance)) {
462
			fnew = false;
463
464
465
466
			break;
		}
	}

467
	if (fnew)
468
		list_add_tail(&newchannel->listentry,
469
			      &vmbus_connection.chn_list);
470

471
	mutex_unlock(&vmbus_connection.channel_mutex);
472

473
	if (!fnew) {
474
475
476
477
478
479
480
481
		/*
		 * Check to see if this is a sub-channel.
		 */
		if (newchannel->offermsg.offer.sub_channel_index != 0) {
			/*
			 * Process the sub-channel.
			 */
			newchannel->primary_channel = channel;
482
			spin_lock_irqsave(&channel->lock, flags);
483
			list_add_tail(&newchannel->sc_list, &channel->sc_list);
484
			channel->num_sc++;
485
			spin_unlock_irqrestore(&channel->lock, flags);
486
487
488
		} else
			goto err_free_chan;
	}
489

490
	dev_type = hv_get_dev_type(newchannel);
491
492

	init_vp_index(newchannel, dev_type);
493

494
495
496
497
498
499
500
501
	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_enq,
					 newchannel, true);
	} else {
		percpu_channel_enq(newchannel);
		put_cpu();
502
503
	}

504
505
506
507
508
509
510
	/*
	 * This state is used to indicate a successful open
	 * so that when we do close the channel normally, we
	 * can cleanup properly
	 */
	newchannel->state = CHANNEL_OPEN_STATE;

511
512
513
514
515
516
	if (!fnew) {
		if (channel->sc_creation_callback != NULL)
			channel->sc_creation_callback(newchannel);
		return;
	}

517
518
519
	/*
	 * Start the process of binding this offer to the driver
	 * We need to set the DeviceObject field before calling
520
	 * vmbus_child_dev_add()
521
	 */
522
	newchannel->device_obj = vmbus_device_create(
523
524
		&newchannel->offermsg.offer.if_type,
		&newchannel->offermsg.offer.if_instance,
525
		newchannel);
526
	if (!newchannel->device_obj)
527
		goto err_deq_chan;
528

529
	newchannel->device_obj->device_id = dev_type;
530
531
532
533
534
	/*
	 * Add the new device to the bus. This will kick off device-driver
	 * binding which eventually invokes the device driver's AddDevice()
	 * method.
	 */
535
536
537
538
539
	mutex_lock(&vmbus_connection.channel_mutex);
	ret = vmbus_device_register(newchannel->device_obj);
	mutex_unlock(&vmbus_connection.channel_mutex);

	if (ret != 0) {
540
541
542
543
544
		pr_err("unable to add child device object (relid %d)\n",
			newchannel->offermsg.child_relid);
		kfree(newchannel->device_obj);
		goto err_deq_chan;
	}
545
	return;
546

547
err_deq_chan:
548
	mutex_lock(&vmbus_connection.channel_mutex);
549
	list_del(&newchannel->listentry);
550
	mutex_unlock(&vmbus_connection.channel_mutex);
551
552
553
554
555
556
557
558
559

	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_deq, newchannel, true);
	} else {
		percpu_channel_deq(newchannel);
		put_cpu();
	}
560
561

	vmbus_release_relid(newchannel->offermsg.child_relid);
562

563
564
err_free_chan:
	free_channel(newchannel);
565
566
}

567
568
569
/*
 * We use this state to statically distribute the channel interrupt load.
 */
570
static int next_numa_node_id;
571
572
573

/*
 * Starting with Win8, we can statically distribute the incoming
574
575
576
577
578
579
580
581
 * channel interrupt load by binding a channel to VCPU.
 * We do this in a hierarchical fashion:
 * First distribute the primary channels across available NUMA nodes
 * and then distribute the subchannels amongst the CPUs in the NUMA
 * node assigned to the primary channel.
 *
 * For pre-win8 hosts or non-performance critical channels we assign the
 * first CPU in the first NUMA node.
582
 */
583
static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
584
585
{
	u32 cur_cpu;
586
	bool perf_chn = vmbus_devs[dev_type].perf_device;
587
588
589
	struct vmbus_channel *primary = channel->primary_channel;
	int next_node;
	struct cpumask available_mask;
590
	struct cpumask *alloced_mask;
591
592
593
594
595
596
597
598
599

	if ((vmbus_proto_version == VERSION_WS2008) ||
	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
		/*
		 * Prior to win8, all channel interrupts are
		 * delivered on cpu 0.
		 * Also if the channel is not a performance critical
		 * channel, bind it to cpu 0.
		 */
600
		channel->numa_node = 0;
601
		channel->target_cpu = 0;
602
		channel->target_vp = hv_context.vp_index[0];
603
		return;
604
	}
605
606

	/*
607
608
	 * Based on the channel affinity policy, we will assign the NUMA
	 * nodes.
609
	 */
610
611

	if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
612
613
		while (true) {
			next_node = next_numa_node_id++;
614
			if (next_node == nr_node_ids) {
615
				next_node = next_numa_node_id = 0;
616
617
				continue;
			}
618
619
620
621
622
623
624
			if (cpumask_empty(cpumask_of_node(next_node)))
				continue;
			break;
		}
		channel->numa_node = next_node;
		primary = channel;
	}
625
	alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
626

627
	if (cpumask_weight(alloced_mask) ==
628
	    cpumask_weight(cpumask_of_node(primary->numa_node))) {
629
		/*
630
631
		 * We have cycled through all the CPUs in the node;
		 * reset the alloced map.
632
		 */
633
		cpumask_clear(alloced_mask);
634
635
	}

636
	cpumask_xor(&available_mask, alloced_mask,
637
638
		    cpumask_of_node(primary->numa_node));

639
	cur_cpu = -1;
640

641
642
643
644
645
646
647
648
649
650
651
	if (primary->affinity_policy == HV_LOCALIZED) {
		/*
		 * Normally Hyper-V host doesn't create more subchannels
		 * than there are VCPUs on the node but it is possible when not
		 * all present VCPUs on the node are initialized by guest.
		 * Clear the alloced_cpus_in_node to start over.
		 */
		if (cpumask_equal(&primary->alloced_cpus_in_node,
				  cpumask_of_node(primary->numa_node)))
			cpumask_clear(&primary->alloced_cpus_in_node);
	}
652

653
654
655
656
657
658
659
660
661
	while (true) {
		cur_cpu = cpumask_next(cur_cpu, &available_mask);
		if (cur_cpu >= nr_cpu_ids) {
			cur_cpu = -1;
			cpumask_copy(&available_mask,
				     cpumask_of_node(primary->numa_node));
			continue;
		}

662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
		if (primary->affinity_policy == HV_LOCALIZED) {
			/*
			 * NOTE: in the case of sub-channel, we clear the
			 * sub-channel related bit(s) in
			 * primary->alloced_cpus_in_node in
			 * hv_process_channel_removal(), so when we
			 * reload drivers like hv_netvsc in SMP guest, here
			 * we're able to re-allocate
			 * bit from primary->alloced_cpus_in_node.
			 */
			if (!cpumask_test_cpu(cur_cpu,
					      &primary->alloced_cpus_in_node)) {
				cpumask_set_cpu(cur_cpu,
						&primary->alloced_cpus_in_node);
				cpumask_set_cpu(cur_cpu, alloced_mask);
				break;
			}
		} else {
680
681
682
683
			cpumask_set_cpu(cur_cpu, alloced_mask);
			break;
		}
	}
684

685
686
	channel->target_cpu = cur_cpu;
	channel->target_vp = hv_context.vp_index[cur_cpu];
687
688
}

689
690
static void vmbus_wait_for_unload(void)
{
691
692
693
	int cpu;
	void *page_addr;
	struct hv_message *msg;
694
	struct vmbus_channel_message_header *hdr;
695
	u32 message_type;
696

697
698
699
700
701
702
703
704
705
	/*
	 * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
	 * used for initial contact or to CPU0 depending on host version. When
	 * we're crashing on a different CPU let's hope that IRQ handler on
	 * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
	 * functional and vmbus_unload_response() will complete
	 * vmbus_connection.unload_event. If not, the last thing we can do is
	 * read message pages for all CPUs directly.
	 */
706
	while (1) {
707
708
		if (completion_done(&vmbus_connection.unload_event))
			break;
709

710
		for_each_online_cpu(cpu) {
711
712
713
714
715
716
			struct hv_per_cpu_context *hv_cpu
				= per_cpu_ptr(hv_context.cpu_context, cpu);

			page_addr = hv_cpu->synic_message_page;
			msg = (struct hv_message *)page_addr
				+ VMBUS_MESSAGE_SINT;
717

718
719
720
			message_type = READ_ONCE(msg->header.message_type);
			if (message_type == HVMSG_NONE)
				continue;
721

722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
			hdr = (struct vmbus_channel_message_header *)
				msg->u.payload;

			if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
				complete(&vmbus_connection.unload_event);

			vmbus_signal_eom(msg, message_type);
		}

		mdelay(10);
	}

	/*
	 * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
	 * maybe-pending messages on all CPUs to be able to receive new
	 * messages after we reconnect.
	 */
	for_each_online_cpu(cpu) {
740
741
742
743
		struct hv_per_cpu_context *hv_cpu
			= per_cpu_ptr(hv_context.cpu_context, cpu);

		page_addr = hv_cpu->synic_message_page;
744
745
		msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
		msg->header.message_type = HVMSG_NONE;
746
747
748
	}
}

749
750
751
752
753
754
755
756
757
758
759
760
/*
 * vmbus_unload_response - Handler for the unload response.
 */
static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
{
	/*
	 * This is a global event; just wakeup the waiting thread.
	 * Once we successfully unload, we can cleanup the monitor state.
	 */
	complete(&vmbus_connection.unload_event);
}

761
void vmbus_initiate_unload(bool crash)
762
763
764
{
	struct vmbus_channel_message_header hdr;

765
766
767
768
	/* Pre-Win2012R2 hosts don't support reconnect */
	if (vmbus_proto_version < VERSION_WIN8_1)
		return;

769
770
771
	init_completion(&vmbus_connection.unload_event);
	memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
	hdr.msgtype = CHANNELMSG_UNLOAD;
772
773
	vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header),
		       !crash);
774

775
776
777
778
	/*
	 * vmbus_initiate_unload() is also called on crash and the crash can be
	 * happening in an interrupt context, where scheduling is impossible.
	 */
779
	if (!crash)
780
781
782
		wait_for_completion(&vmbus_connection.unload_event);
	else
		vmbus_wait_for_unload();
783
784
}

785
/*
786
 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
787
788
 *
 */
789
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
790
{
791
	struct vmbus_channel_offer_channel *offer;
792
	struct vmbus_channel *newchannel;
793

794
	offer = (struct vmbus_channel_offer_channel *)hdr;
795

796
	/* Allocate the channel object and save this offer. */
797
	newchannel = alloc_channel();
798
	if (!newchannel) {
799
		vmbus_release_relid(offer->child_relid);
800
		pr_err("Unable to allocate channel object\n");
801
802
803
		return;
	}

804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
	/*
	 * Setup state for signalling the host.
	 */
	newchannel->sig_event = (struct hv_input_signal_event *)
				(ALIGN((unsigned long)
				&newchannel->sig_buf,
				HV_HYPERCALL_PARAM_ALIGN));

	newchannel->sig_event->connectionid.asu32 = 0;
	newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
	newchannel->sig_event->flag_number = 0;
	newchannel->sig_event->rsvdz = 0;

	if (vmbus_proto_version != VERSION_WS2008) {
		newchannel->is_dedicated_interrupt =
				(offer->is_dedicated_interrupt != 0);
		newchannel->sig_event->connectionid.u.id =
				offer->connection_id;
	}

824
	memcpy(&newchannel->offermsg, offer,
825
	       sizeof(struct vmbus_channel_offer_channel));
826
827
	newchannel->monitor_grp = (u8)offer->monitorid / 32;
	newchannel->monitor_bit = (u8)offer->monitorid % 32;
828

829
	vmbus_process_offer(newchannel);
830
831
}

832
/*
833
 * vmbus_onoffer_rescind - Rescind offer handler.
834
835
836
 *
 * We queue a work item to process this offer synchronously
 */
837
static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
838
{
839
	struct vmbus_channel_rescind_offer *rescind;
840
	struct vmbus_channel *channel;
841
842
	unsigned long flags;
	struct device *dev;
843

844
	rescind = (struct vmbus_channel_rescind_offer *)hdr;
845
846

	mutex_lock(&vmbus_connection.channel_mutex);
847
	channel = relid2channel(rescind->child_relid);
848

849
	if (channel == NULL) {
850
851
852
853
854
		/*
		 * This is very impossible, because in
		 * vmbus_process_offer(), we have already invoked
		 * vmbus_release_relid() on error.
		 */
855
		goto out;
856
	}
857

858
859
860
861
	spin_lock_irqsave(&channel->lock, flags);
	channel->rescind = true;
	spin_unlock_irqrestore(&channel->lock, flags);

862
863
	vmbus_rescind_cleanup(channel);

864
	if (channel->device_obj) {
865
866
		if (channel->chn_rescind_callback) {
			channel->chn_rescind_callback(channel);
867
			goto out;
868
		}
869
870
871
872
873
874
875
876
877
878
879
880
		/*
		 * We will have to unregister this device from the
		 * driver core.
		 */
		dev = get_device(&channel->device_obj->device);
		if (dev) {
			vmbus_device_unregister(channel->device_obj);
			put_device(dev);
		}
	} else {
		hv_process_channel_removal(channel,
			channel->offermsg.child_relid);
881
	}
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896

out:
	mutex_unlock(&vmbus_connection.channel_mutex);
}

void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
{
	mutex_lock(&vmbus_connection.channel_mutex);

	BUG_ON(!is_hvsock_channel(channel));

	channel->rescind = true;
	vmbus_device_unregister(channel->device_obj);

	mutex_unlock(&vmbus_connection.channel_mutex);
897
}
898
899
EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);

900

901
/*
902
903
 * vmbus_onoffers_delivered -
 * This is invoked when all offers have been delivered.
904
905
906
 *
 * Nothing to do here.
 */
907
static void vmbus_onoffers_delivered(
908
			struct vmbus_channel_message_header *hdr)
909
910
911
{
}

912
/*
913
 * vmbus_onopen_result - Open result handler.
914
915
916
917
918
 *
 * This is invoked when we received a response to our channel open request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
919
static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
920
{
921
	struct vmbus_channel_open_result *result;
922
923
924
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_open_channel *openmsg;
925
	unsigned long flags;
926

927
	result = (struct vmbus_channel_open_result *)hdr;
928

929
930
931
	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
932
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
933

934
935
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
936
		requestheader =
937
			(struct vmbus_channel_message_header *)msginfo->msg;
938

939
		if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
940
			openmsg =
941
942
943
944
			(struct vmbus_channel_open_channel *)msginfo->msg;
			if (openmsg->child_relid == result->child_relid &&
			    openmsg->openid == result->openid) {
				memcpy(&msginfo->response.open_result,
945
				       result,
946
947
948
				       sizeof(
					struct vmbus_channel_open_result));
				complete(&msginfo->waitevent);
949
950
951
952
				break;
			}
		}
	}
953
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
954
955
}

956
/*
957
 * vmbus_ongpadl_created - GPADL created handler.
958
959
960
961
962
 *
 * This is invoked when we received a response to our gpadl create request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
963
static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
964
{
965
966
967
968
	struct vmbus_channel_gpadl_created *gpadlcreated;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_header *gpadlheader;
969
	unsigned long flags;
970

971
	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
972

973
974
975
976
	/*
	 * Find the establish msg, copy the result and signal/unblock the wait
	 * event
	 */
977
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
978

979
980
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
981
		requestheader =
982
			(struct vmbus_channel_message_header *)msginfo->msg;
983

984
		if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
985
986
987
			gpadlheader =
			(struct vmbus_channel_gpadl_header *)requestheader;

988
989
990
991
			if ((gpadlcreated->child_relid ==
			     gpadlheader->child_relid) &&
			    (gpadlcreated->gpadl == gpadlheader->gpadl)) {
				memcpy(&msginfo->response.gpadl_created,
992
				       gpadlcreated,
993
994
995
				       sizeof(
					struct vmbus_channel_gpadl_created));
				complete(&msginfo->waitevent);
996
997
998
999
				break;
			}
		}
	}
1000
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);