bond_alb.c 48.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds's avatar
Linus Torvalds committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/*
 * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved.
 */

#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/pkt_sched.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/if_bonding.h>
#include <linux/if_vlan.h>
#include <linux/in.h>
#include <net/ipx.h>
#include <net/arp.h>
22
#include <net/ipv6.h>
Linus Torvalds's avatar
Linus Torvalds committed
23
#include <asm/byteorder.h>
24
25
#include <net/bonding.h>
#include <net/bond_alb.h>
Linus Torvalds's avatar
Linus Torvalds committed
26

27
static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
28
29
	0x33, 0x33, 0x00, 0x00, 0x00, 0x01
};
Linus Torvalds's avatar
Linus Torvalds committed
30
31
32
33
34
35
static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC;

#pragma pack(1)
struct learning_pkt {
	u8 mac_dst[ETH_ALEN];
	u8 mac_src[ETH_ALEN];
36
	__be16 type;
Linus Torvalds's avatar
Linus Torvalds committed
37
38
39
40
	u8 padding[ETH_ZLEN - ETH_HLEN];
};

struct arp_pkt {
41
42
	__be16  hw_addr_space;
	__be16  prot_addr_space;
Linus Torvalds's avatar
Linus Torvalds committed
43
44
	u8      hw_addr_len;
	u8      prot_addr_len;
45
	__be16  op_code;
Linus Torvalds's avatar
Linus Torvalds committed
46
	u8      mac_src[ETH_ALEN];	/* sender hardware address */
47
	__be32  ip_src;			/* sender IP address */
Linus Torvalds's avatar
Linus Torvalds committed
48
	u8      mac_dst[ETH_ALEN];	/* target hardware address */
49
	__be32  ip_dst;			/* target IP address */
Linus Torvalds's avatar
Linus Torvalds committed
50
51
52
};
#pragma pack()

53
54
static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
{
55
	return (struct arp_pkt *)skb_network_header(skb);
56
57
}

Linus Torvalds's avatar
Linus Torvalds committed
58
/* Forward declaration */
59
60
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[],
				      bool strict_match);
61
62
63
64
static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp);
static void rlb_src_unlink(struct bonding *bond, u32 index);
static void rlb_src_link(struct bonding *bond, u32 ip_src_hash,
			 u32 ip_dst_hash);
Linus Torvalds's avatar
Linus Torvalds committed
65

66
static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
Linus Torvalds's avatar
Linus Torvalds committed
67
68
69
70
{
	int i;
	u8 hash = 0;

71
	for (i = 0; i < hash_size; i++)
Linus Torvalds's avatar
Linus Torvalds committed
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
		hash ^= hash_start[i];

	return hash;
}

/*********************** tlb specific functions ***************************/

static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
{
	if (save_load) {
		entry->load_history = 1 + entry->tx_bytes /
				      BOND_TLB_REBALANCE_INTERVAL;
		entry->tx_bytes = 0;
	}

	entry->tx_slave = NULL;
	entry->next = TLB_NULL_INDEX;
	entry->prev = TLB_NULL_INDEX;
}

static inline void tlb_init_slave(struct slave *slave)
{
	SLAVE_TLB_INFO(slave).load = 0;
	SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX;
}

98
99
static void __tlb_clear_slave(struct bonding *bond, struct slave *slave,
			 int save_load)
Linus Torvalds's avatar
Linus Torvalds committed
100
101
102
103
104
105
106
{
	struct tlb_client_info *tx_hash_table;
	u32 index;

	/* clear slave from tx_hashtbl */
	tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;

107
108
109
110
111
112
113
114
	/* skip this if we've already freed the tx hash table */
	if (tx_hash_table) {
		index = SLAVE_TLB_INFO(slave).head;
		while (index != TLB_NULL_INDEX) {
			u32 next_index = tx_hash_table[index].next;
			tlb_init_table_entry(&tx_hash_table[index], save_load);
			index = next_index;
		}
Linus Torvalds's avatar
Linus Torvalds committed
115
116
117
	}

	tlb_init_slave(slave);
118
}
119

120
121
122
static void tlb_clear_slave(struct bonding *bond, struct slave *slave,
			 int save_load)
{
123
	spin_lock_bh(&bond->mode_lock);
124
	__tlb_clear_slave(bond, slave, save_load);
125
	spin_unlock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
126
127
128
129
130
131
132
}

/* Must be called before starting the monitor timer */
static int tlb_initialize(struct bonding *bond)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	int size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info);
133
	struct tlb_client_info *new_hashtbl;
Linus Torvalds's avatar
Linus Torvalds committed
134
135
	int i;

136
	new_hashtbl = kzalloc(size, GFP_KERNEL);
137
	if (!new_hashtbl)
138
		return -ENOMEM;
139

140
	spin_lock_bh(&bond->mode_lock);
141
142

	bond_info->tx_hashtbl = new_hashtbl;
Linus Torvalds's avatar
Linus Torvalds committed
143

144
	for (i = 0; i < TLB_HASH_TABLE_SIZE; i++)
145
		tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0);
Linus Torvalds's avatar
Linus Torvalds committed
146

147
	spin_unlock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
148
149
150
151
152
153
154
155
156

	return 0;
}

/* Must be called only after all slaves have been released */
static void tlb_deinitialize(struct bonding *bond)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));

157
	spin_lock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
158
159
160
161

	kfree(bond_info->tx_hashtbl);
	bond_info->tx_hashtbl = NULL;

162
	spin_unlock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
163
164
}

165
166
167
168
169
170
static long long compute_gap(struct slave *slave)
{
	return (s64) (slave->speed << 20) - /* Convert to Megabit per sec */
	       (s64) (SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */
}

Linus Torvalds's avatar
Linus Torvalds committed
171
172
173
static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
{
	struct slave *slave, *least_loaded;
174
	struct list_head *iter;
175
	long long max_gap;
Linus Torvalds's avatar
Linus Torvalds committed
176

177
178
	least_loaded = NULL;
	max_gap = LLONG_MIN;
Linus Torvalds's avatar
Linus Torvalds committed
179
180

	/* Find the slave with the largest gap */
181
	bond_for_each_slave_rcu(bond, slave, iter) {
182
		if (bond_slave_can_tx(slave)) {
183
184
			long long gap = compute_gap(slave);

Linus Torvalds's avatar
Linus Torvalds committed
185
186
187
188
189
190
191
192
193
194
			if (max_gap < gap) {
				least_loaded = slave;
				max_gap = gap;
			}
		}
	}

	return least_loaded;
}

195
196
static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index,
						u32 skb_len)
Linus Torvalds's avatar
Linus Torvalds committed
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	struct tlb_client_info *hash_table;
	struct slave *assigned_slave;

	hash_table = bond_info->tx_hashtbl;
	assigned_slave = hash_table[hash_index].tx_slave;
	if (!assigned_slave) {
		assigned_slave = tlb_get_least_loaded_slave(bond);

		if (assigned_slave) {
			struct tlb_slave_info *slave_info =
				&(SLAVE_TLB_INFO(assigned_slave));
			u32 next_index = slave_info->head;

			hash_table[hash_index].tx_slave = assigned_slave;
			hash_table[hash_index].next = next_index;
			hash_table[hash_index].prev = TLB_NULL_INDEX;

216
			if (next_index != TLB_NULL_INDEX)
Linus Torvalds's avatar
Linus Torvalds committed
217
218
219
220
221
222
223
224
				hash_table[next_index].prev = hash_index;

			slave_info->head = hash_index;
			slave_info->load +=
				hash_table[hash_index].load_history;
		}
	}

225
	if (assigned_slave)
Linus Torvalds's avatar
Linus Torvalds committed
226
227
228
229
230
		hash_table[hash_index].tx_bytes += skb_len;

	return assigned_slave;
}

231
232
233
234
static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index,
					u32 skb_len)
{
	struct slave *tx_slave;
235
236

	/* We don't need to disable softirq here, becase
237
238
239
	 * tlb_choose_channel() is only called by bond_alb_xmit()
	 * which already has softirq disabled.
	 */
240
	spin_lock(&bond->mode_lock);
241
	tx_slave = __tlb_choose_channel(bond, hash_index, skb_len);
242
	spin_unlock(&bond->mode_lock);
243

244
245
246
	return tx_slave;
}

Linus Torvalds's avatar
Linus Torvalds committed
247
/*********************** rlb specific functions ***************************/
248

Linus Torvalds's avatar
Linus Torvalds committed
249
250
251
252
253
254
255
256
257
/* when an ARP REPLY is received from a client update its info
 * in the rx_hashtbl
 */
static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	struct rlb_client_info *client_info;
	u32 hash_index;

258
	spin_lock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
259

260
	hash_index = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src));
Linus Torvalds's avatar
Linus Torvalds committed
261
262
263
264
	client_info = &(bond_info->rx_hashtbl[hash_index]);

	if ((client_info->assigned) &&
	    (client_info->ip_src == arp->ip_dst) &&
265
	    (client_info->ip_dst == arp->ip_src) &&
266
	    (!ether_addr_equal_64bits(client_info->mac_dst, arp->mac_src))) {
Linus Torvalds's avatar
Linus Torvalds committed
267
		/* update the clients MAC address */
268
		ether_addr_copy(client_info->mac_dst, arp->mac_src);
Linus Torvalds's avatar
Linus Torvalds committed
269
270
271
272
		client_info->ntt = 1;
		bond_info->rx_ntt = 1;
	}

273
	spin_unlock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
274
275
}

276
277
static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond,
			struct slave *slave)
Linus Torvalds's avatar
Linus Torvalds committed
278
{
279
	struct arp_pkt *arp, _arp;
Linus Torvalds's avatar
Linus Torvalds committed
280

281
	if (skb->protocol != cpu_to_be16(ETH_P_ARP))
282
		goto out;
Linus Torvalds's avatar
Linus Torvalds committed
283

284
285
	arp = skb_header_pointer(skb, 0, sizeof(_arp), &_arp);
	if (!arp)
286
		goto out;
Linus Torvalds's avatar
Linus Torvalds committed
287

288
289
290
291
292
293
294
295
296
297
298
299
	/* We received an ARP from arp->ip_src.
	 * We might have used this IP address previously (on the bonding host
	 * itself or on a system that is bridged together with the bond).
	 * However, if arp->mac_src is different than what is stored in
	 * rx_hashtbl, some other host is now using the IP and we must prevent
	 * sending out client updates with this IP address and the old MAC
	 * address.
	 * Clean up all hash table entries that have this address as ip_src but
	 * have a different mac_src.
	 */
	rlb_purge_src_ip(bond, arp);

Linus Torvalds's avatar
Linus Torvalds committed
300
301
302
	if (arp->op_code == htons(ARPOP_REPLY)) {
		/* update rx hash table for this ARP */
		rlb_update_entry_from_arp(bond, arp);
303
		slave_dbg(bond->dev, slave->dev, "Server received an ARP Reply from client\n");
Linus Torvalds's avatar
Linus Torvalds committed
304
	}
305
306
out:
	return RX_HANDLER_ANOTHER;
Linus Torvalds's avatar
Linus Torvalds committed
307
308
}

309
310
/* Caller must hold rcu_read_lock() */
static struct slave *__rlb_next_rx_slave(struct bonding *bond)
Linus Torvalds's avatar
Linus Torvalds committed
311
312
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
313
314
315
	struct slave *before = NULL, *rx_slave = NULL, *slave;
	struct list_head *iter;
	bool found = false;
Linus Torvalds's avatar
Linus Torvalds committed
316

317
	bond_for_each_slave_rcu(bond, slave, iter) {
318
		if (!bond_slave_can_tx(slave))
319
320
321
322
323
324
			continue;
		if (!found) {
			if (!before || before->speed < slave->speed)
				before = slave;
		} else {
			if (!rx_slave || rx_slave->speed < slave->speed)
Linus Torvalds's avatar
Linus Torvalds committed
325
326
				rx_slave = slave;
		}
327
328
		if (slave == bond_info->rx_slave)
			found = true;
Linus Torvalds's avatar
Linus Torvalds committed
329
	}
330
331
332
333
334
	/* we didn't find anything after the current or we have something
	 * better before and up to the current slave
	 */
	if (!rx_slave || (before && rx_slave->speed < before->speed))
		rx_slave = before;
Linus Torvalds's avatar
Linus Torvalds committed
335

336
337
	if (rx_slave)
		bond_info->rx_slave = rx_slave;
Linus Torvalds's avatar
Linus Torvalds committed
338
339
340
341

	return rx_slave;
}

342
343
/* Caller must hold RTNL, rcu_read_lock is obtained only to silence checkers */
static struct slave *rlb_next_rx_slave(struct bonding *bond)
344
{
345
	struct slave *rx_slave;
346

347
	ASSERT_RTNL();
348

349
350
351
	rcu_read_lock();
	rx_slave = __rlb_next_rx_slave(bond);
	rcu_read_unlock();
352
353
354
355

	return rx_slave;
}

Linus Torvalds's avatar
Linus Torvalds committed
356
357
358
/* teach the switch the mac of a disabled slave
 * on the primary for fault tolerance
 *
359
 * Caller must hold RTNL
Linus Torvalds's avatar
Linus Torvalds committed
360
361
362
 */
static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[])
{
363
	struct slave *curr_active = rtnl_dereference(bond->curr_active_slave);
364
365

	if (!curr_active)
Linus Torvalds's avatar
Linus Torvalds committed
366
367
368
		return;

	if (!bond->alb_info.primary_is_promisc) {
369
		if (!dev_set_promiscuity(curr_active->dev, 1))
370
371
372
			bond->alb_info.primary_is_promisc = 1;
		else
			bond->alb_info.primary_is_promisc = 0;
Linus Torvalds's avatar
Linus Torvalds committed
373
374
375
376
	}

	bond->alb_info.rlb_promisc_timeout_counter = 0;

377
	alb_send_learning_packets(curr_active, addr, true);
Linus Torvalds's avatar
Linus Torvalds committed
378
379
380
381
}

/* slave being removed should not be active at this point
 *
382
 * Caller must hold rtnl.
Linus Torvalds's avatar
Linus Torvalds committed
383
384
385
386
387
388
389
390
 */
static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	struct rlb_client_info *rx_hash_table;
	u32 index, next_index;

	/* clear slave from rx_hashtbl */
391
	spin_lock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
392
393

	rx_hash_table = bond_info->rx_hashtbl;
394
	index = bond_info->rx_hashtbl_used_head;
Linus Torvalds's avatar
Linus Torvalds committed
395
	for (; index != RLB_NULL_INDEX; index = next_index) {
396
		next_index = rx_hash_table[index].used_next;
Linus Torvalds's avatar
Linus Torvalds committed
397
398
399
400
401
		if (rx_hash_table[index].slave == slave) {
			struct slave *assigned_slave = rlb_next_rx_slave(bond);

			if (assigned_slave) {
				rx_hash_table[index].slave = assigned_slave;
402
				if (is_valid_ether_addr(rx_hash_table[index].mac_dst)) {
Linus Torvalds's avatar
Linus Torvalds committed
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
					bond_info->rx_hashtbl[index].ntt = 1;
					bond_info->rx_ntt = 1;
					/* A slave has been removed from the
					 * table because it is either disabled
					 * or being released. We must retry the
					 * update to avoid clients from not
					 * being updated & disconnecting when
					 * there is stress
					 */
					bond_info->rlb_update_retry_counter =
						RLB_UPDATE_RETRY;
				}
			} else {  /* there is no active slave */
				rx_hash_table[index].slave = NULL;
			}
		}
	}

421
	spin_unlock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
422

423
	if (slave != rtnl_dereference(bond->curr_active_slave))
Linus Torvalds's avatar
Linus Torvalds committed
424
425
426
427
428
429
430
		rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr);
}

static void rlb_update_client(struct rlb_client_info *client_info)
{
	int i;

431
	if (!client_info->slave || !is_valid_ether_addr(client_info->mac_dst))
Linus Torvalds's avatar
Linus Torvalds committed
432
433
434
435
436
437
438
439
440
441
442
443
444
		return;

	for (i = 0; i < RLB_ARP_BURST_SIZE; i++) {
		struct sk_buff *skb;

		skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
				 client_info->ip_dst,
				 client_info->slave->dev,
				 client_info->ip_src,
				 client_info->mac_dst,
				 client_info->slave->dev->dev_addr,
				 client_info->mac_dst);
		if (!skb) {
445
446
447
			slave_err(client_info->slave->bond->dev,
				  client_info->slave->dev,
				  "failed to create an ARP packet\n");
Linus Torvalds's avatar
Linus Torvalds committed
448
449
450
451
452
			continue;
		}

		skb->dev = client_info->slave->dev;

453
		if (client_info->vlan_id) {
Jiri Pirko's avatar
Jiri Pirko committed
454
455
			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
					       client_info->vlan_id);
Linus Torvalds's avatar
Linus Torvalds committed
456
457
458
459
460
461
462
463
464
465
466
467
468
		}

		arp_xmit(skb);
	}
}

/* sends ARP REPLIES that update the clients that need updating */
static void rlb_update_rx_clients(struct bonding *bond)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	struct rlb_client_info *client_info;
	u32 hash_index;

469
	spin_lock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
470

471
472
473
	hash_index = bond_info->rx_hashtbl_used_head;
	for (; hash_index != RLB_NULL_INDEX;
	     hash_index = client_info->used_next) {
Linus Torvalds's avatar
Linus Torvalds committed
474
475
476
		client_info = &(bond_info->rx_hashtbl[hash_index]);
		if (client_info->ntt) {
			rlb_update_client(client_info);
477
			if (bond_info->rlb_update_retry_counter == 0)
Linus Torvalds's avatar
Linus Torvalds committed
478
479
480
481
				client_info->ntt = 0;
		}
	}

482
	/* do not update the entries again until this counter is zero so that
Linus Torvalds's avatar
Linus Torvalds committed
483
484
485
486
	 * not to confuse the clients.
	 */
	bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;

487
	spin_unlock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
488
489
490
491
492
493
494
495
496
497
}

/* The slave was assigned a new mac address - update the clients */
static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	struct rlb_client_info *client_info;
	int ntt = 0;
	u32 hash_index;

498
	spin_lock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
499

500
501
502
	hash_index = bond_info->rx_hashtbl_used_head;
	for (; hash_index != RLB_NULL_INDEX;
	     hash_index = client_info->used_next) {
Linus Torvalds's avatar
Linus Torvalds committed
503
504
505
		client_info = &(bond_info->rx_hashtbl[hash_index]);

		if ((client_info->slave == slave) &&
506
		    is_valid_ether_addr(client_info->mac_dst)) {
Linus Torvalds's avatar
Linus Torvalds committed
507
508
509
510
511
			client_info->ntt = 1;
			ntt = 1;
		}
	}

512
	/* update the team's flag only after the whole iteration */
Linus Torvalds's avatar
Linus Torvalds committed
513
514
	if (ntt) {
		bond_info->rx_ntt = 1;
515
		/* fasten the change */
Linus Torvalds's avatar
Linus Torvalds committed
516
517
518
		bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
	}

519
	spin_unlock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
520
521
522
}

/* mark all clients using src_ip to be updated */
523
static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
Linus Torvalds's avatar
Linus Torvalds committed
524
525
526
527
528
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	struct rlb_client_info *client_info;
	u32 hash_index;

529
	spin_lock(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
530

531
532
533
	hash_index = bond_info->rx_hashtbl_used_head;
	for (; hash_index != RLB_NULL_INDEX;
	     hash_index = client_info->used_next) {
Linus Torvalds's avatar
Linus Torvalds committed
534
535
536
		client_info = &(bond_info->rx_hashtbl[hash_index]);

		if (!client_info->slave) {
537
			netdev_err(bond->dev, "found a client with no channel in the client's hash table\n");
Linus Torvalds's avatar
Linus Torvalds committed
538
539
			continue;
		}
540
		/* update all clients using this src_ip, that are not assigned
Linus Torvalds's avatar
Linus Torvalds committed
541
542
543
544
		 * to the team's address (curr_active_slave) and have a known
		 * unicast mac address.
		 */
		if ((client_info->ip_src == src_ip) &&
545
546
		    !ether_addr_equal_64bits(client_info->slave->dev->dev_addr,
					     bond->dev->dev_addr) &&
547
		    is_valid_ether_addr(client_info->mac_dst)) {
Linus Torvalds's avatar
Linus Torvalds committed
548
549
550
551
552
			client_info->ntt = 1;
			bond_info->rx_ntt = 1;
		}
	}

553
	spin_unlock(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
554
555
556
557
558
}

static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
559
	struct arp_pkt *arp = arp_pkt(skb);
560
	struct slave *assigned_slave, *curr_active_slave;
Linus Torvalds's avatar
Linus Torvalds committed
561
562
563
	struct rlb_client_info *client_info;
	u32 hash_index = 0;

564
	spin_lock(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
565

566
567
	curr_active_slave = rcu_dereference(bond->curr_active_slave);

568
	hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst));
Linus Torvalds's avatar
Linus Torvalds committed
569
570
571
572
573
574
	client_info = &(bond_info->rx_hashtbl[hash_index]);

	if (client_info->assigned) {
		if ((client_info->ip_src == arp->ip_src) &&
		    (client_info->ip_dst == arp->ip_dst)) {
			/* the entry is already assigned to this client */
575
			if (!is_broadcast_ether_addr(arp->mac_dst)) {
Linus Torvalds's avatar
Linus Torvalds committed
576
				/* update mac address from arp */
577
				ether_addr_copy(client_info->mac_dst, arp->mac_dst);
Linus Torvalds's avatar
Linus Torvalds committed
578
			}
579
			ether_addr_copy(client_info->mac_src, arp->mac_src);
Linus Torvalds's avatar
Linus Torvalds committed
580
581
582

			assigned_slave = client_info->slave;
			if (assigned_slave) {
583
				spin_unlock(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
584
585
586
587
588
589
590
				return assigned_slave;
			}
		} else {
			/* the entry is already assigned to some other client,
			 * move the old client to primary (curr_active_slave) so
			 * that the new client can be assigned to this entry.
			 */
591
			if (curr_active_slave &&
592
593
			    client_info->slave != curr_active_slave) {
				client_info->slave = curr_active_slave;
Linus Torvalds's avatar
Linus Torvalds committed
594
595
596
597
598
				rlb_update_client(client_info);
			}
		}
	}
	/* assign a new slave */
599
	assigned_slave = __rlb_next_rx_slave(bond);
Linus Torvalds's avatar
Linus Torvalds committed
600
601

	if (assigned_slave) {
602
603
604
605
606
607
608
609
610
611
612
		if (!(client_info->assigned &&
		      client_info->ip_src == arp->ip_src)) {
			/* ip_src is going to be updated,
			 * fix the src hash list
			 */
			u32 hash_src = _simple_hash((u8 *)&arp->ip_src,
						    sizeof(arp->ip_src));
			rlb_src_unlink(bond, hash_index);
			rlb_src_link(bond, hash_src, hash_index);
		}

Linus Torvalds's avatar
Linus Torvalds committed
613
614
615
616
617
618
		client_info->ip_src = arp->ip_src;
		client_info->ip_dst = arp->ip_dst;
		/* arp->mac_dst is broadcast for arp reqeusts.
		 * will be updated with clients actual unicast mac address
		 * upon receiving an arp reply.
		 */
619
620
		ether_addr_copy(client_info->mac_dst, arp->mac_dst);
		ether_addr_copy(client_info->mac_src, arp->mac_src);
Linus Torvalds's avatar
Linus Torvalds committed
621
622
		client_info->slave = assigned_slave;

623
		if (is_valid_ether_addr(client_info->mac_dst)) {
Linus Torvalds's avatar
Linus Torvalds committed
624
625
626
627
628
629
			client_info->ntt = 1;
			bond->alb_info.rx_ntt = 1;
		} else {
			client_info->ntt = 0;
		}

630
		if (vlan_get_tag(skb, &client_info->vlan_id))
631
			client_info->vlan_id = 0;
Linus Torvalds's avatar
Linus Torvalds committed
632
633

		if (!client_info->assigned) {
634
635
636
			u32 prev_tbl_head = bond_info->rx_hashtbl_used_head;
			bond_info->rx_hashtbl_used_head = hash_index;
			client_info->used_next = prev_tbl_head;
Linus Torvalds's avatar
Linus Torvalds committed
637
			if (prev_tbl_head != RLB_NULL_INDEX) {
638
				bond_info->rx_hashtbl[prev_tbl_head].used_prev =
Linus Torvalds's avatar
Linus Torvalds committed
639
640
641
642
643
644
					hash_index;
			}
			client_info->assigned = 1;
		}
	}

645
	spin_unlock(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
646
647
648
649
650
651
652
653
654
655

	return assigned_slave;
}

/* chooses (and returns) transmit channel for arp reply
 * does not choose channel for other arp types since they are
 * sent on the curr_active_slave
 */
static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
{
656
	struct arp_pkt *arp = arp_pkt(skb);
Linus Torvalds's avatar
Linus Torvalds committed
657
658
	struct slave *tx_slave = NULL;

659
660
661
	/* Don't modify or load balance ARPs that do not originate locally
	 * (e.g.,arrive via a bridge).
	 */
662
	if (!bond_slave_has_mac_rx(bond, arp->mac_src))
663
664
		return NULL;

665
	if (arp->op_code == htons(ARPOP_REPLY)) {
666
		/* the arp must be sent on the selected rx channel */
Linus Torvalds's avatar
Linus Torvalds committed
667
		tx_slave = rlb_choose_channel(skb, bond);
668
		if (tx_slave)
669
670
			bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr,
					  tx_slave->dev->addr_len);
671
672
		netdev_dbg(bond->dev, "(slave %s): Server sent ARP Reply packet\n",
			   tx_slave ? tx_slave->dev->name : "NULL");
673
	} else if (arp->op_code == htons(ARPOP_REQUEST)) {
Linus Torvalds's avatar
Linus Torvalds committed
674
675
676
677
678
		/* Create an entry in the rx_hashtbl for this client as a
		 * place holder.
		 * When the arp reply is received the entry will be updated
		 * with the correct unicast address of the client.
		 */
679
		tx_slave = rlb_choose_channel(skb, bond);
Linus Torvalds's avatar
Linus Torvalds committed
680

Peter Pan(潘卫平)'s avatar
Peter Pan(潘卫平) committed
681
		/* The ARP reply packets must be delayed so that
Linus Torvalds's avatar
Linus Torvalds committed
682
683
684
685
686
687
688
689
690
691
		 * they can cancel out the influence of the ARP request.
		 */
		bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY;

		/* arp requests are broadcast and are sent on the primary
		 * the arp request will collapse all clients on the subnet to
		 * the primary slave. We must register these clients to be
		 * updated with their assigned mac.
		 */
		rlb_req_update_subnet_clients(bond, arp->ip_src);
692
693
		netdev_dbg(bond->dev, "(slave %s): Server sent ARP Request packet\n",
			   tx_slave ? tx_slave->dev->name : "NULL");
Linus Torvalds's avatar
Linus Torvalds committed
694
695
696
697
698
699
700
701
702
703
704
705
706
	}

	return tx_slave;
}

static void rlb_rebalance(struct bonding *bond)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	struct slave *assigned_slave;
	struct rlb_client_info *client_info;
	int ntt;
	u32 hash_index;

707
	spin_lock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
708
709

	ntt = 0;
710
711
712
	hash_index = bond_info->rx_hashtbl_used_head;
	for (; hash_index != RLB_NULL_INDEX;
	     hash_index = client_info->used_next) {
Linus Torvalds's avatar
Linus Torvalds committed
713
		client_info = &(bond_info->rx_hashtbl[hash_index]);
714
		assigned_slave = __rlb_next_rx_slave(bond);
Linus Torvalds's avatar
Linus Torvalds committed
715
716
		if (assigned_slave && (client_info->slave != assigned_slave)) {
			client_info->slave = assigned_slave;
717
718
719
720
			if (!is_zero_ether_addr(client_info->mac_dst)) {
				client_info->ntt = 1;
				ntt = 1;
			}
Linus Torvalds's avatar
Linus Torvalds committed
721
722
723
724
		}
	}

	/* update the team's flag only after the whole iteration */
725
	if (ntt)
Linus Torvalds's avatar
Linus Torvalds committed
726
		bond_info->rx_ntt = 1;
727
	spin_unlock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
728
729
}

730
/* Caller must hold mode_lock */
731
732
733
734
735
736
static void rlb_init_table_entry_dst(struct rlb_client_info *entry)
{
	entry->used_next = RLB_NULL_INDEX;
	entry->used_prev = RLB_NULL_INDEX;
	entry->assigned = 0;
	entry->slave = NULL;
737
	entry->vlan_id = 0;
738
739
740
741
742
743
744
745
}
static void rlb_init_table_entry_src(struct rlb_client_info *entry)
{
	entry->src_first = RLB_NULL_INDEX;
	entry->src_prev = RLB_NULL_INDEX;
	entry->src_next = RLB_NULL_INDEX;
}

Linus Torvalds's avatar
Linus Torvalds committed
746
747
748
static void rlb_init_table_entry(struct rlb_client_info *entry)
{
	memset(entry, 0, sizeof(struct rlb_client_info));
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
	rlb_init_table_entry_dst(entry);
	rlb_init_table_entry_src(entry);
}

static void rlb_delete_table_entry_dst(struct bonding *bond, u32 index)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	u32 next_index = bond_info->rx_hashtbl[index].used_next;
	u32 prev_index = bond_info->rx_hashtbl[index].used_prev;

	if (index == bond_info->rx_hashtbl_used_head)
		bond_info->rx_hashtbl_used_head = next_index;
	if (prev_index != RLB_NULL_INDEX)
		bond_info->rx_hashtbl[prev_index].used_next = next_index;
	if (next_index != RLB_NULL_INDEX)
		bond_info->rx_hashtbl[next_index].used_prev = prev_index;
}

/* unlink a rlb hash table entry from the src list */
static void rlb_src_unlink(struct bonding *bond, u32 index)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	u32 next_index = bond_info->rx_hashtbl[index].src_next;
	u32 prev_index = bond_info->rx_hashtbl[index].src_prev;

	bond_info->rx_hashtbl[index].src_next = RLB_NULL_INDEX;
	bond_info->rx_hashtbl[index].src_prev = RLB_NULL_INDEX;

	if (next_index != RLB_NULL_INDEX)
		bond_info->rx_hashtbl[next_index].src_prev = prev_index;

	if (prev_index == RLB_NULL_INDEX)
		return;

	/* is prev_index pointing to the head of this list? */
	if (bond_info->rx_hashtbl[prev_index].src_first == index)
		bond_info->rx_hashtbl[prev_index].src_first = next_index;
	else
		bond_info->rx_hashtbl[prev_index].src_next = next_index;

}

static void rlb_delete_table_entry(struct bonding *bond, u32 index)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);

	rlb_delete_table_entry_dst(bond, index);
	rlb_init_table_entry_dst(entry);

	rlb_src_unlink(bond, index);
}

/* add the rx_hashtbl[ip_dst_hash] entry to the list
 * of entries with identical ip_src_hash
 */
static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	u32 next;

	bond_info->rx_hashtbl[ip_dst_hash].src_prev = ip_src_hash;
	next = bond_info->rx_hashtbl[ip_src_hash].src_first;
	bond_info->rx_hashtbl[ip_dst_hash].src_next = next;
	if (next != RLB_NULL_INDEX)
		bond_info->rx_hashtbl[next].src_prev = ip_dst_hash;
	bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash;
}

818
819
820
/* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does
 * not match arp->mac_src
 */
821
822
823
static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
824
	u32 ip_src_hash = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src));
825
826
	u32 index;

827
	spin_lock_bh(&bond->mode_lock);
828
829
830
831
832
833
834
835
836
837

	index = bond_info->rx_hashtbl[ip_src_hash].src_first;
	while (index != RLB_NULL_INDEX) {
		struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
		u32 next_index = entry->src_next;
		if (entry->ip_src == arp->ip_src &&
		    !ether_addr_equal_64bits(arp->mac_src, entry->mac_src))
				rlb_delete_table_entry(bond, index);
		index = next_index;
	}
838
	spin_unlock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
839
840
841
842
843
}

static int rlb_initialize(struct bonding *bond)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
844
	struct rlb_client_info	*new_hashtbl;
Linus Torvalds's avatar
Linus Torvalds committed
845
846
847
	int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
	int i;

848
	new_hashtbl = kmalloc(size, GFP_KERNEL);
849
	if (!new_hashtbl)
Linus Torvalds's avatar
Linus Torvalds committed
850
		return -1;
851

852
	spin_lock_bh(&bond->mode_lock);
853
854

	bond_info->rx_hashtbl = new_hashtbl;
Linus Torvalds's avatar
Linus Torvalds committed
855

856
	bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
Linus Torvalds's avatar
Linus Torvalds committed
857

858
	for (i = 0; i < RLB_HASH_TABLE_SIZE; i++)
Linus Torvalds's avatar
Linus Torvalds committed
859
860
		rlb_init_table_entry(bond_info->rx_hashtbl + i);

861
	spin_unlock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
862
863

	/* register to receive ARPs */
864
	bond->recv_probe = rlb_arp_recv;
Linus Torvalds's avatar
Linus Torvalds committed
865
866
867
868
869
870
871
872

	return 0;
}

static void rlb_deinitialize(struct bonding *bond)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));

873
	spin_lock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
874
875
876

	kfree(bond_info->rx_hashtbl);
	bond_info->rx_hashtbl = NULL;
877
	bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
Linus Torvalds's avatar
Linus Torvalds committed
878

879
	spin_unlock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
880
881
882
883
884
885
886
}

static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
{
	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
	u32 curr_index;

887
	spin_lock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
888

889
	curr_index = bond_info->rx_hashtbl_used_head;
Linus Torvalds's avatar
Linus Torvalds committed
890
891
	while (curr_index != RLB_NULL_INDEX) {
		struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]);
892
		u32 next_index = bond_info->rx_hashtbl[curr_index].used_next;
Linus Torvalds's avatar
Linus Torvalds committed
893

894
		if (curr->vlan_id == vlan_id)
895
			rlb_delete_table_entry(bond, curr_index);
Linus Torvalds's avatar
Linus Torvalds committed
896
897
898
899

		curr_index = next_index;
	}

900
	spin_unlock_bh(&bond->mode_lock);
Linus Torvalds's avatar
Linus Torvalds committed
901
902
903
904
}

/*********************** tlb/rlb shared functions *********************/

905
static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[],
906
			    __be16 vlan_proto, u16 vid)
Linus Torvalds's avatar
Linus Torvalds committed
907
908
{
	struct learning_pkt pkt;
909
	struct sk_buff *skb;
Linus Torvalds's avatar
Linus Torvalds committed
910
911
912
	int size = sizeof(struct learning_pkt);

	memset(&pkt, 0, size);
913
914
	ether_addr_copy(pkt.mac_dst, mac_addr);
	ether_addr_copy(pkt.mac_src, mac_addr);
915
	pkt.type = cpu_to_be16(ETH_P_LOOPBACK);
Linus Torvalds's avatar
Linus Torvalds committed
916

917
918
919
	skb = dev_alloc_skb(size);
	if (!skb)
		return;
Linus Torvalds's avatar
Linus Torvalds committed
920

921
	skb_put_data(skb, &pkt, size);
922
923
924
925
926
927
928

	skb_reset_mac_header(skb);
	skb->network_header = skb->mac_header + ETH_HLEN;
	skb->protocol = pkt.type;
	skb->priority = TC_PRIO_CONTROL;
	skb->dev = slave->dev;

929
930
	slave_dbg(slave->bond->dev, slave->dev,
		  "Send learning packet: mac %pM vlan %d\n", mac_addr, vid);
931

Jiri Pirko's avatar
Jiri Pirko committed
932
933
	if (vid)
		__vlan_hwaccel_put_tag(skb, vlan_proto, vid);
934
935
936

	dev_queue_xmit(skb);
}
Linus Torvalds's avatar
Linus Torvalds committed
937

938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
struct alb_walk_data {
	struct bonding *bond;
	struct slave *slave;
	u8 *mac_addr;
	bool strict_match;
};

static int alb_upper_dev_walk(struct net_device *upper, void *_data)
{
	struct alb_walk_data *data = _data;
	bool strict_match = data->strict_match;
	struct bonding *bond = data->bond;
	struct slave *slave = data->slave;
	u8 *mac_addr = data->mac_addr;
	struct bond_vlan_tag *tags;

954
	if (is_vlan_dev(upper) &&
955
	    bond->dev->lower_level == upper->lower_level - 1) {
956
		if (upper->addr_assign_type == NET_ADDR_STOLEN) {
957
958
959
			alb_send_lp_vid(slave, mac_addr,
					vlan_dev_vlan_proto(upper),
					vlan_dev_vlan_id(upper));
960
		} else {
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
			alb_send_lp_vid(slave, upper->dev_addr,
					vlan_dev_vlan_proto(upper),
					vlan_dev_vlan_id(upper));
		}
	}

	/* If this is a macvlan device, then only send updates
	 * when strict_match is turned off.
	 */
	if (netif_is_macvlan(upper) && !strict_match) {
		tags = bond_verify_device_path(bond->dev, upper, 0);
		if (IS_ERR_OR_NULL(tags))
			BUG();
		alb_send_lp_vid(slave, upper->dev_addr,
				tags[0].vlan_proto, tags[0].vlan_id);
		kfree(tags);
	}

	return 0;
}

982
983
static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[],
				      bool strict_match)
984
985
{
	struct bonding *bond = bond_get_bond_by_slave(slave);
986
987
988
989
990
991
	struct alb_walk_data data = {
		.strict_match = strict_match,
		.mac_addr = mac_addr,
		.slave = slave,
		.bond = bond,
	};
992
993

	/* send untagged */
994
	alb_send_lp_vid(slave, mac_addr, 0, 0);
995

996
997
998
	/* loop through all devices and see if we need to send a packet
	 * for that device.
	 */
999
	rcu_read_lock();
1000
	netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &data);