dpaa_eth.c 80.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
/* Copyright 2008 - 2016 Freescale Semiconductor Inc.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *	 notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *	 notice, this list of conditions and the following disclaimer in the
 *	 documentation and/or other materials provided with the distribution.
 *     * Neither the name of Freescale Semiconductor nor the
 *	 names of its contributors may be used to endorse or promote products
 *	 derived from this software without specific prior written permission.
 *
 * ALTERNATIVELY, this software may be distributed under the terms of the
 * GNU General Public License ("GPL") as published by the Free Software
 * Foundation, either version 2 of that License or (at your option) any
 * later version.
 *
 * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/init.h>
#include <linux/module.h>
#include <linux/of_platform.h>
#include <linux/of_mdio.h>
#include <linux/of_net.h>
#include <linux/io.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
#include <linux/icmp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/etherdevice.h>
#include <linux/if_ether.h>
#include <linux/highmem.h>
#include <linux/percpu.h>
#include <linux/dma-mapping.h>
#include <linux/sort.h>
54
#include <linux/phy_fixed.h>
55
56
57
58
59
60
61
#include <soc/fsl/bman.h>
#include <soc/fsl/qman.h>
#include "fman.h"
#include "fman_port.h"
#include "mac.h"
#include "dpaa_eth.h"

Madalin Bucur's avatar
Madalin Bucur committed
62
63
64
65
66
67
/* CREATE_TRACE_POINTS only needs to be defined once. Other dpaa files
 * using trace events only need to #include <trace/events/sched.h>
 */
#define CREATE_TRACE_POINTS
#include "dpaa_eth_trace.h"

68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
static int debug = -1;
module_param(debug, int, 0444);
MODULE_PARM_DESC(debug, "Module/Driver verbosity level (0=none,...,16=all)");

static u16 tx_timeout = 1000;
module_param(tx_timeout, ushort, 0444);
MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms");

#define FM_FD_STAT_RX_ERRORS						\
	(FM_FD_ERR_DMA | FM_FD_ERR_PHYSICAL	| \
	 FM_FD_ERR_SIZE | FM_FD_ERR_CLS_DISCARD | \
	 FM_FD_ERR_EXTRACTION | FM_FD_ERR_NO_SCHEME	| \
	 FM_FD_ERR_PRS_TIMEOUT | FM_FD_ERR_PRS_ILL_INSTRUCT | \
	 FM_FD_ERR_PRS_HDR_ERR)

#define FM_FD_STAT_TX_ERRORS \
	(FM_FD_ERR_UNSUPPORTED_FORMAT | \
	 FM_FD_ERR_LENGTH | FM_FD_ERR_DMA)

#define DPAA_MSG_DEFAULT (NETIF_MSG_DRV | NETIF_MSG_PROBE | \
			  NETIF_MSG_LINK | NETIF_MSG_IFUP | \
			  NETIF_MSG_IFDOWN)

#define DPAA_INGRESS_CS_THRESHOLD 0x10000000
/* Ingress congestion threshold on FMan ports
 * The size in bytes of the ingress tail-drop threshold on FMan ports.
 * Traffic piling up above this value will be rejected by QMan and discarded
 * by FMan.
 */

/* Size in bytes of the FQ taildrop threshold */
#define DPAA_FQ_TD 0x200000

#define DPAA_CS_THRESHOLD_1G 0x06000000
/* Egress congestion threshold on 1G ports, range 0x1000 .. 0x10000000
 * The size in bytes of the egress Congestion State notification threshold on
 * 1G ports. The 1G dTSECs can quite easily be flooded by cores doing Tx in a
 * tight loop (e.g. by sending UDP datagrams at "while(1) speed"),
 * and the larger the frame size, the more acute the problem.
 * So we have to find a balance between these factors:
 * - avoiding the device staying congested for a prolonged time (risking
 *   the netdev watchdog to fire - see also the tx_timeout module param);
 * - affecting performance of protocols such as TCP, which otherwise
 *   behave well under the congestion notification mechanism;
 * - preventing the Tx cores from tightly-looping (as if the congestion
 *   threshold was too low to be effective);
 * - running out of memory if the CS threshold is set too high.
 */

#define DPAA_CS_THRESHOLD_10G 0x10000000
/* The size in bytes of the egress Congestion State notification threshold on
 * 10G ports, range 0x1000 .. 0x10000000
 */

/* Largest value that the FQD's OAL field can hold */
#define FSL_QMAN_MAX_OAL	127

/* Default alignment for start of data in an Rx FD */
#define DPAA_FD_DATA_ALIGNMENT  16

128
129
130
/* The DPAA requires 256 bytes reserved and mapped for the SGT */
#define DPAA_SGT_SIZE 256

131
132
133
134
135
136
137
138
139
140
141
142
/* Values for the L3R field of the FM Parse Results
 */
/* L3 Type field: First IP Present IPv4 */
#define FM_L3_PARSE_RESULT_IPV4	0x8000
/* L3 Type field: First IP Present IPv6 */
#define FM_L3_PARSE_RESULT_IPV6	0x4000
/* Values for the L4R field of the FM Parse Results */
/* L4 Type field: UDP */
#define FM_L4_PARSE_RESULT_UDP	0x40
/* L4 Type field: TCP */
#define FM_L4_PARSE_RESULT_TCP	0x20

143
144
145
146
147
148
149
/* FD status field indicating whether the FM Parser has attempted to validate
 * the L4 csum of the frame.
 * Note that having this bit set doesn't necessarily imply that the checksum
 * is valid. One would have to check the parse results to find that out.
 */
#define FM_FD_STAT_L4CV         0x00000004

150
151
152
153
154
155
156
157
158
159
160
161
162
163
#define DPAA_SGT_MAX_ENTRIES 16 /* maximum number of entries in SG Table */
#define DPAA_BUFF_RELEASE_MAX 8 /* maximum number of buffers released at once */

#define FSL_DPAA_BPID_INV		0xff
#define FSL_DPAA_ETH_MAX_BUF_COUNT	128
#define FSL_DPAA_ETH_REFILL_THRESHOLD	80

#define DPAA_TX_PRIV_DATA_SIZE	16
#define DPAA_PARSE_RESULTS_SIZE sizeof(struct fman_prs_result)
#define DPAA_TIME_STAMP_SIZE 8
#define DPAA_HASH_RESULTS_SIZE 8
#define DPAA_RX_PRIV_DATA_SIZE	(u16)(DPAA_TX_PRIV_DATA_SIZE + \
					dpaa_rx_extra_headroom)

164
#define DPAA_ETH_PCD_RXQ_NUM	128
165
166
167
168
169
170
171
172
173
174

#define DPAA_ENQUEUE_RETRIES	100000

enum port_type {RX, TX};

struct fm_port_fqs {
	struct dpaa_fq *tx_defq;
	struct dpaa_fq *tx_errq;
	struct dpaa_fq *rx_defq;
	struct dpaa_fq *rx_errq;
175
	struct dpaa_fq *rx_pcdq;
176
177
178
179
180
181
182
};

/* All the dpa bps in use at any moment */
static struct dpaa_bp *dpaa_bp_array[BM_MAX_NUM_OF_POOLS];

#define DPAA_BP_RAW_SIZE 4096

183
#define dpaa_bp_size(raw_size) SKB_WITH_OVERHEAD(raw_size)
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219

static int dpaa_max_frm;

static int dpaa_rx_extra_headroom;

#define dpaa_get_max_mtu()	\
	(dpaa_max_frm - (VLAN_ETH_HLEN + ETH_FCS_LEN))

static int dpaa_netdev_init(struct net_device *net_dev,
			    const struct net_device_ops *dpaa_ops,
			    u16 tx_timeout)
{
	struct dpaa_priv *priv = netdev_priv(net_dev);
	struct device *dev = net_dev->dev.parent;
	struct dpaa_percpu_priv *percpu_priv;
	const u8 *mac_addr;
	int i, err;

	/* Although we access another CPU's private data here
	 * we do it at initialization so it is safe
	 */
	for_each_possible_cpu(i) {
		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
		percpu_priv->net_dev = net_dev;
	}

	net_dev->netdev_ops = dpaa_ops;
	mac_addr = priv->mac_dev->addr;

	net_dev->mem_start = priv->mac_dev->res->start;
	net_dev->mem_end = priv->mac_dev->res->end;

	net_dev->min_mtu = ETH_MIN_MTU;
	net_dev->max_mtu = dpaa_get_max_mtu();

	net_dev->hw_features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
Madalin Bucur's avatar
Madalin Bucur committed
220
				 NETIF_F_LLTX | NETIF_F_RXHASH);
221
222
223
224
225
226

	net_dev->hw_features |= NETIF_F_SG | NETIF_F_HIGHDMA;
	/* The kernels enables GSO automatically, if we declare NETIF_F_SG.
	 * For conformity, we'll still declare GSO explicitly.
	 */
	net_dev->features |= NETIF_F_GSO;
227
	net_dev->features |= NETIF_F_RXCSUM;
228
229
230
231
232
233
234
235
236
237
238

	net_dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
	/* we do not want shared skbs on TX */
	net_dev->priv_flags &= ~IFF_TX_SKB_SHARING;

	net_dev->features |= net_dev->hw_features;
	net_dev->vlan_features = net_dev->features;

	memcpy(net_dev->perm_addr, mac_addr, net_dev->addr_len);
	memcpy(net_dev->dev_addr, mac_addr, net_dev->addr_len);

239
240
	net_dev->ethtool_ops = &dpaa_ethtool_ops;

241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
	net_dev->needed_headroom = priv->tx_headroom;
	net_dev->watchdog_timeo = msecs_to_jiffies(tx_timeout);

	/* start without the RUNNING flag, phylib controls it later */
	netif_carrier_off(net_dev);

	err = register_netdev(net_dev);
	if (err < 0) {
		dev_err(dev, "register_netdev() = %d\n", err);
		return err;
	}

	return 0;
}

static int dpaa_stop(struct net_device *net_dev)
{
	struct mac_device *mac_dev;
	struct dpaa_priv *priv;
	int i, err, error;

	priv = netdev_priv(net_dev);
	mac_dev = priv->mac_dev;

	netif_tx_stop_all_queues(net_dev);
	/* Allow the Fman (Tx) port to process in-flight frames before we
	 * try switching it off.
	 */
269
	msleep(200);
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285

	err = mac_dev->stop(mac_dev);
	if (err < 0)
		netif_err(priv, ifdown, net_dev, "mac_dev->stop() = %d\n",
			  err);

	for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
		error = fman_port_disable(mac_dev->port[i]);
		if (error)
			err = error;
	}

	if (net_dev->phydev)
		phy_disconnect(net_dev->phydev);
	net_dev->phydev = NULL;

286
287
	msleep(200);

288
289
290
	return err;
}

291
static void dpaa_tx_timeout(struct net_device *net_dev, unsigned int txqueue)
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
{
	struct dpaa_percpu_priv *percpu_priv;
	const struct dpaa_priv	*priv;

	priv = netdev_priv(net_dev);
	percpu_priv = this_cpu_ptr(priv->percpu_priv);

	netif_crit(priv, timer, net_dev, "Transmit timeout latency: %u ms\n",
		   jiffies_to_msecs(jiffies - dev_trans_start(net_dev)));

	percpu_priv->stats.tx_errors++;
}

/* Calculates the statistics for the given device by adding the statistics
 * collected by each CPU.
 */
308
309
static void dpaa_get_stats64(struct net_device *net_dev,
			     struct rtnl_link_stats64 *s)
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
{
	int numstats = sizeof(struct rtnl_link_stats64) / sizeof(u64);
	struct dpaa_priv *priv = netdev_priv(net_dev);
	struct dpaa_percpu_priv *percpu_priv;
	u64 *netstats = (u64 *)s;
	u64 *cpustats;
	int i, j;

	for_each_possible_cpu(i) {
		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);

		cpustats = (u64 *)&percpu_priv->stats;

		/* add stats from all CPUs */
		for (j = 0; j < numstats; j++)
			netstats[j] += cpustats[j];
	}
}

329
static int dpaa_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
330
			 void *type_data)
331
332
{
	struct dpaa_priv *priv = netdev_priv(net_dev);
333
	struct tc_mqprio_qopt *mqprio = type_data;
334
	u8 num_tc;
335
336
	int i;

337
	if (type != TC_SETUP_QDISC_MQPRIO)
338
		return -EOPNOTSUPP;
339

340
341
	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
	num_tc = mqprio->num_tc;
342
343

	if (num_tc == priv->num_tc)
344
345
		return 0;

346
	if (!num_tc) {
347
348
349
350
		netdev_reset_tc(net_dev);
		goto out;
	}

351
	if (num_tc > DPAA_TC_NUM) {
352
353
354
355
356
		netdev_err(net_dev, "Too many traffic classes: max %d supported.\n",
			   DPAA_TC_NUM);
		return -EINVAL;
	}

357
	netdev_set_num_tc(net_dev, num_tc);
358

359
	for (i = 0; i < num_tc; i++)
360
361
362
363
		netdev_set_tc_queue(net_dev, i, DPAA_TC_TXQ_NUM,
				    i * DPAA_TC_TXQ_NUM);

out:
364
	priv->num_tc = num_tc ? : 1;
365
366
367
368
	netif_set_real_num_tx_queues(net_dev, priv->num_tc * DPAA_TC_TXQ_NUM);
	return 0;
}

369
370
371
static struct mac_device *dpaa_mac_dev_get(struct platform_device *pdev)
{
	struct dpaa_eth_data *eth_data;
Madalin Bucur's avatar
Madalin Bucur committed
372
	struct device *dpaa_dev;
373
374
375
376
	struct mac_device *mac_dev;

	dpaa_dev = &pdev->dev;
	eth_data = dpaa_dev->platform_data;
Madalin Bucur's avatar
Madalin Bucur committed
377
378
	if (!eth_data) {
		dev_err(dpaa_dev, "eth_data missing\n");
379
380
		return ERR_PTR(-ENODEV);
	}
Madalin Bucur's avatar
Madalin Bucur committed
381
	mac_dev = eth_data->mac_dev;
382
	if (!mac_dev) {
Madalin Bucur's avatar
Madalin Bucur committed
383
		dev_err(dpaa_dev, "mac_dev missing\n");
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
		return ERR_PTR(-EINVAL);
	}

	return mac_dev;
}

static int dpaa_set_mac_address(struct net_device *net_dev, void *addr)
{
	const struct dpaa_priv *priv;
	struct mac_device *mac_dev;
	struct sockaddr old_addr;
	int err;

	priv = netdev_priv(net_dev);

	memcpy(old_addr.sa_data, net_dev->dev_addr,  ETH_ALEN);

	err = eth_mac_addr(net_dev, addr);
	if (err < 0) {
		netif_err(priv, drv, net_dev, "eth_mac_addr() = %d\n", err);
		return err;
	}

	mac_dev = priv->mac_dev;

	err = mac_dev->change_addr(mac_dev->fman_mac,
				   (enet_addr_t *)net_dev->dev_addr);
	if (err < 0) {
		netif_err(priv, drv, net_dev, "mac_dev->change_addr() = %d\n",
			  err);
		/* reverting to previous address */
		eth_mac_addr(net_dev, &old_addr);

		return err;
	}

	return 0;
}

static void dpaa_set_rx_mode(struct net_device *net_dev)
{
	const struct dpaa_priv	*priv;
	int err;

	priv = netdev_priv(net_dev);

	if (!!(net_dev->flags & IFF_PROMISC) != priv->mac_dev->promisc) {
		priv->mac_dev->promisc = !priv->mac_dev->promisc;
		err = priv->mac_dev->set_promisc(priv->mac_dev->fman_mac,
						 priv->mac_dev->promisc);
		if (err < 0)
			netif_err(priv, drv, net_dev,
				  "mac_dev->set_promisc() = %d\n",
				  err);
	}

Radu Bulie's avatar
Radu Bulie committed
440
441
442
443
444
445
446
447
448
449
	if (!!(net_dev->flags & IFF_ALLMULTI) != priv->mac_dev->allmulti) {
		priv->mac_dev->allmulti = !priv->mac_dev->allmulti;
		err = priv->mac_dev->set_allmulti(priv->mac_dev->fman_mac,
						  priv->mac_dev->allmulti);
		if (err < 0)
			netif_err(priv, drv, net_dev,
				  "mac_dev->set_allmulti() = %d\n",
				  err);
	}

450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
	err = priv->mac_dev->set_multi(net_dev, priv->mac_dev);
	if (err < 0)
		netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n",
			  err);
}

static struct dpaa_bp *dpaa_bpid2pool(int bpid)
{
	if (WARN_ON(bpid < 0 || bpid >= BM_MAX_NUM_OF_POOLS))
		return NULL;

	return dpaa_bp_array[bpid];
}

/* checks if this bpool is already allocated */
static bool dpaa_bpid2pool_use(int bpid)
{
	if (dpaa_bpid2pool(bpid)) {
468
		refcount_inc(&dpaa_bp_array[bpid]->refs);
469
470
471
472
473
474
475
476
477
478
		return true;
	}

	return false;
}

/* called only once per bpid by dpaa_bp_alloc_pool() */
static void dpaa_bpid2pool_map(int bpid, struct dpaa_bp *dpaa_bp)
{
	dpaa_bp_array[bpid] = dpaa_bp;
479
	refcount_set(&dpaa_bp->refs, 1);
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
}

static int dpaa_bp_alloc_pool(struct dpaa_bp *dpaa_bp)
{
	int err;

	if (dpaa_bp->size == 0 || dpaa_bp->config_count == 0) {
		pr_err("%s: Buffer pool is not properly initialized! Missing size or initial number of buffers\n",
		       __func__);
		return -EINVAL;
	}

	/* If the pool is already specified, we only create one per bpid */
	if (dpaa_bp->bpid != FSL_DPAA_BPID_INV &&
	    dpaa_bpid2pool_use(dpaa_bp->bpid))
		return 0;

	if (dpaa_bp->bpid == FSL_DPAA_BPID_INV) {
		dpaa_bp->pool = bman_new_pool();
		if (!dpaa_bp->pool) {
			pr_err("%s: bman_new_pool() failed\n",
			       __func__);
			return -ENODEV;
		}

		dpaa_bp->bpid = (u8)bman_get_bpid(dpaa_bp->pool);
	}

	if (dpaa_bp->seed_cb) {
		err = dpaa_bp->seed_cb(dpaa_bp);
		if (err)
			goto pool_seed_failed;
	}

	dpaa_bpid2pool_map(dpaa_bp->bpid, dpaa_bp);

	return 0;

pool_seed_failed:
	pr_err("%s: pool seeding failed\n", __func__);
	bman_free_pool(dpaa_bp->pool);

	return err;
}

/* remove and free all the buffers from the given buffer pool */
static void dpaa_bp_drain(struct dpaa_bp *bp)
{
	u8 num = 8;
	int ret;

	do {
		struct bm_buffer bmb[8];
		int i;

		ret = bman_acquire(bp->pool, bmb, num);
		if (ret < 0) {
			if (num == 8) {
				/* we have less than 8 buffers left;
				 * drain them one by one
				 */
				num = 1;
				ret = 1;
				continue;
			} else {
				/* Pool is fully drained */
				break;
			}
		}

		if (bp->free_buf_cb)
			for (i = 0; i < num; i++)
				bp->free_buf_cb(bp, &bmb[i]);
	} while (ret > 0);
}

static void dpaa_bp_free(struct dpaa_bp *dpaa_bp)
{
	struct dpaa_bp *bp = dpaa_bpid2pool(dpaa_bp->bpid);

	/* the mapping between bpid and dpaa_bp is done very late in the
	 * allocation procedure; if something failed before the mapping, the bp
	 * was not configured, therefore we don't need the below instructions
	 */
	if (!bp)
		return;

567
	if (!refcount_dec_and_test(&bp->refs))
568
569
570
571
572
573
574
575
576
577
578
		return;

	if (bp->free_buf_cb)
		dpaa_bp_drain(bp);

	dpaa_bp_array[bp->bpid] = NULL;
	bman_free_pool(bp->pool);
}

static void dpaa_bps_free(struct dpaa_priv *priv)
{
579
	dpaa_bp_free(priv->dpaa_bp);
580
581
582
583
}

/* Use multiple WQs for FQ assignment:
 *	- Tx Confirmation queues go to WQ1.
584
585
586
587
588
589
 *	- Rx Error and Tx Error queues go to WQ5 (giving them a better chance
 *	  to be scheduled, in case there are many more FQs in WQ6).
 *	- Rx Default goes to WQ6.
 *	- Tx queues go to different WQs depending on their priority. Equal
 *	  chunks of NR_CPUS queues go to WQ6 (lowest priority), WQ2, WQ1 and
 *	  WQ0 (highest priority).
590
591
592
593
594
 * This ensures that Tx-confirmed buffers are timely released. In particular,
 * it avoids congestion on the Tx Confirm FQs, which can pile up PFDRs if they
 * are greatly outnumbered by other FQs in the system, while
 * dequeue scheduling is round-robin.
 */
595
static inline void dpaa_assign_wq(struct dpaa_fq *fq, int idx)
596
597
598
599
600
601
602
603
{
	switch (fq->fq_type) {
	case FQ_TYPE_TX_CONFIRM:
	case FQ_TYPE_TX_CONF_MQ:
		fq->wq = 1;
		break;
	case FQ_TYPE_RX_ERROR:
	case FQ_TYPE_TX_ERROR:
604
		fq->wq = 5;
605
606
		break;
	case FQ_TYPE_RX_DEFAULT:
607
	case FQ_TYPE_RX_PCD:
608
609
		fq->wq = 6;
		break;
610
	case FQ_TYPE_TX:
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
		switch (idx / DPAA_TC_TXQ_NUM) {
		case 0:
			/* Low priority (best effort) */
			fq->wq = 6;
			break;
		case 1:
			/* Medium priority */
			fq->wq = 2;
			break;
		case 2:
			/* High priority */
			fq->wq = 1;
			break;
		case 3:
			/* Very high priority */
			fq->wq = 0;
			break;
		default:
			WARN(1, "Too many TX FQs: more than %d!\n",
			     DPAA_ETH_TXQ_NUM);
		}
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
		break;
	default:
		WARN(1, "Invalid FQ type %d for FQID %d!\n",
		     fq->fq_type, fq->fqid);
	}
}

static struct dpaa_fq *dpaa_fq_alloc(struct device *dev,
				     u32 start, u32 count,
				     struct list_head *list,
				     enum dpaa_fq_type fq_type)
{
	struct dpaa_fq *dpaa_fq;
	int i;

647
	dpaa_fq = devm_kcalloc(dev, count, sizeof(*dpaa_fq),
648
649
650
651
652
653
654
655
656
657
658
			       GFP_KERNEL);
	if (!dpaa_fq)
		return NULL;

	for (i = 0; i < count; i++) {
		dpaa_fq[i].fq_type = fq_type;
		dpaa_fq[i].fqid = start ? start + i : 0;
		list_add_tail(&dpaa_fq[i].list, list);
	}

	for (i = 0; i < count; i++)
659
		dpaa_assign_wq(dpaa_fq + i, i);
660
661
662
663
664
665
666
667

	return dpaa_fq;
}

static int dpaa_alloc_all_fqs(struct device *dev, struct list_head *list,
			      struct fm_port_fqs *port_fqs)
{
	struct dpaa_fq *dpaa_fq;
668
	u32 fq_base, fq_base_aligned, i;
669
670
671
672
673
674
675
676
677
678
679
680
681

	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_RX_ERROR);
	if (!dpaa_fq)
		goto fq_alloc_failed;

	port_fqs->rx_errq = &dpaa_fq[0];

	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_RX_DEFAULT);
	if (!dpaa_fq)
		goto fq_alloc_failed;

	port_fqs->rx_defq = &dpaa_fq[0];

682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
	/* the PCD FQIDs range needs to be aligned for correct operation */
	if (qman_alloc_fqid_range(&fq_base, 2 * DPAA_ETH_PCD_RXQ_NUM))
		goto fq_alloc_failed;

	fq_base_aligned = ALIGN(fq_base, DPAA_ETH_PCD_RXQ_NUM);

	for (i = fq_base; i < fq_base_aligned; i++)
		qman_release_fqid(i);

	for (i = fq_base_aligned + DPAA_ETH_PCD_RXQ_NUM;
	     i < (fq_base + 2 * DPAA_ETH_PCD_RXQ_NUM); i++)
		qman_release_fqid(i);

	dpaa_fq = dpaa_fq_alloc(dev, fq_base_aligned, DPAA_ETH_PCD_RXQ_NUM,
				list, FQ_TYPE_RX_PCD);
	if (!dpaa_fq)
		goto fq_alloc_failed;

	port_fqs->rx_pcdq = &dpaa_fq[0];

702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
	if (!dpaa_fq_alloc(dev, 0, DPAA_ETH_TXQ_NUM, list, FQ_TYPE_TX_CONF_MQ))
		goto fq_alloc_failed;

	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_TX_ERROR);
	if (!dpaa_fq)
		goto fq_alloc_failed;

	port_fqs->tx_errq = &dpaa_fq[0];

	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_TX_CONFIRM);
	if (!dpaa_fq)
		goto fq_alloc_failed;

	port_fqs->tx_defq = &dpaa_fq[0];

	if (!dpaa_fq_alloc(dev, 0, DPAA_ETH_TXQ_NUM, list, FQ_TYPE_TX))
		goto fq_alloc_failed;

	return 0;

fq_alloc_failed:
	dev_err(dev, "dpaa_fq_alloc() failed\n");
	return -ENOMEM;
}

static u32 rx_pool_channel;
static DEFINE_SPINLOCK(rx_pool_channel_init);

static int dpaa_get_channel(void)
{
	spin_lock(&rx_pool_channel_init);
	if (!rx_pool_channel) {
		u32 pool;
		int ret;

		ret = qman_alloc_pool(&pool);

		if (!ret)
			rx_pool_channel = pool;
	}
	spin_unlock(&rx_pool_channel_init);
	if (!rx_pool_channel)
		return -ENOMEM;
	return rx_pool_channel;
}

static void dpaa_release_channel(void)
{
	qman_release_pool(rx_pool_channel);
}

753
static void dpaa_eth_add_channel(u16 channel, struct device *dev)
754
755
756
757
758
759
{
	u32 pool = QM_SDQCR_CHANNELS_POOL_CONV(channel);
	const cpumask_t *cpus = qman_affine_cpus();
	struct qman_portal *portal;
	int cpu;

760
	for_each_cpu_and(cpu, cpus, cpu_online_mask) {
761
762
		portal = qman_get_affine_portal(cpu);
		qman_p_static_dequeue_add(portal, pool);
763
		qman_start_using_portal(portal, dev);
764
765
766
767
768
769
770
771
772
773
774
775
776
777
	}
}

/* Congestion group state change notification callback.
 * Stops the device's egress queues while they are congested and
 * wakes them upon exiting congested state.
 * Also updates some CGR-related stats.
 */
static void dpaa_eth_cgscn(struct qman_portal *qm, struct qman_cgr *cgr,
			   int congested)
{
	struct dpaa_priv *priv = (struct dpaa_priv *)container_of(cgr,
		struct dpaa_priv, cgr_data.cgr);

778
779
	if (congested) {
		priv->cgr_data.congestion_start_jiffies = jiffies;
780
		netif_tx_stop_all_queues(priv->net_dev);
781
782
783
784
		priv->cgr_data.cgr_congested_count++;
	} else {
		priv->cgr_data.congested_jiffies +=
			(jiffies - priv->cgr_data.congestion_start_jiffies);
785
		netif_tx_wake_all_queues(priv->net_dev);
786
	}
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
}

static int dpaa_eth_cgr_init(struct dpaa_priv *priv)
{
	struct qm_mcc_initcgr initcgr;
	u32 cs_th;
	int err;

	err = qman_alloc_cgrid(&priv->cgr_data.cgr.cgrid);
	if (err < 0) {
		if (netif_msg_drv(priv))
			pr_err("%s: Error %d allocating CGR ID\n",
			       __func__, err);
		goto out_error;
	}
	priv->cgr_data.cgr.cb = dpaa_eth_cgscn;

	/* Enable Congestion State Change Notifications and CS taildrop */
805
	memset(&initcgr, 0, sizeof(initcgr));
806
	initcgr.we_mask = cpu_to_be16(QM_CGR_WE_CSCN_EN | QM_CGR_WE_CS_THRES);
807
808
809
810
811
812
813
814
815
816
817
818
819
	initcgr.cgr.cscn_en = QM_CGR_EN;

	/* Set different thresholds based on the MAC speed.
	 * This may turn suboptimal if the MAC is reconfigured at a speed
	 * lower than its max, e.g. if a dTSEC later negotiates a 100Mbps link.
	 * In such cases, we ought to reconfigure the threshold, too.
	 */
	if (priv->mac_dev->if_support & SUPPORTED_10000baseT_Full)
		cs_th = DPAA_CS_THRESHOLD_10G;
	else
		cs_th = DPAA_CS_THRESHOLD_1G;
	qm_cgr_cs_thres_set64(&initcgr.cgr.cs_thres, cs_th, 1);

820
	initcgr.we_mask |= cpu_to_be16(QM_CGR_WE_CSTD_EN);
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
	initcgr.cgr.cstd_en = QM_CGR_EN;

	err = qman_create_cgr(&priv->cgr_data.cgr, QMAN_CGR_FLAG_USE_INIT,
			      &initcgr);
	if (err < 0) {
		if (netif_msg_drv(priv))
			pr_err("%s: Error %d creating CGR with ID %d\n",
			       __func__, err, priv->cgr_data.cgr.cgrid);
		qman_release_cgrid(priv->cgr_data.cgr.cgrid);
		goto out_error;
	}
	if (netif_msg_drv(priv))
		pr_debug("Created CGR %d for netdev with hwaddr %pM on QMan channel %d\n",
			 priv->cgr_data.cgr.cgrid, priv->mac_dev->addr,
			 priv->cgr_data.cgr.chan);

out_error:
	return err;
}

static inline void dpaa_setup_ingress(const struct dpaa_priv *priv,
				      struct dpaa_fq *fq,
				      const struct qman_fq *template)
{
	fq->fq_base = *template;
	fq->net_dev = priv->net_dev;

	fq->flags = QMAN_FQ_FLAG_NO_ENQUEUE;
	fq->channel = priv->channel;
}

static inline void dpaa_setup_egress(const struct dpaa_priv *priv,
				     struct dpaa_fq *fq,
				     struct fman_port *port,
				     const struct qman_fq *template)
{
	fq->fq_base = *template;
	fq->net_dev = priv->net_dev;

	if (port) {
		fq->flags = QMAN_FQ_FLAG_TO_DCPORTAL;
		fq->channel = (u16)fman_port_get_qman_channel_id(port);
	} else {
		fq->flags = QMAN_FQ_FLAG_NO_MODIFY;
	}
}

static void dpaa_fq_setup(struct dpaa_priv *priv,
			  const struct dpaa_fq_cbs *fq_cbs,
			  struct fman_port *tx_port)
{
872
	int egress_cnt = 0, conf_cnt = 0, num_portals = 0, portal_cnt = 0, cpu;
873
	const cpumask_t *affine_cpus = qman_affine_cpus();
874
	u16 channels[NR_CPUS];
875
876
	struct dpaa_fq *fq;

877
	for_each_cpu_and(cpu, affine_cpus, cpu_online_mask)
878
879
		channels[num_portals++] = qman_affine_channel(cpu);

880
881
	if (num_portals == 0)
		dev_err(priv->net_dev->dev.parent,
882
			"No Qman software (affine) channels found\n");
883
884
885
886
887
888
889
890
891
892

	/* Initialize each FQ in the list */
	list_for_each_entry(fq, &priv->dpaa_fq_list, list) {
		switch (fq->fq_type) {
		case FQ_TYPE_RX_DEFAULT:
			dpaa_setup_ingress(priv, fq, &fq_cbs->rx_defq);
			break;
		case FQ_TYPE_RX_ERROR:
			dpaa_setup_ingress(priv, fq, &fq_cbs->rx_errq);
			break;
893
894
895
896
897
898
		case FQ_TYPE_RX_PCD:
			if (!num_portals)
				continue;
			dpaa_setup_ingress(priv, fq, &fq_cbs->rx_defq);
			fq->channel = channels[portal_cnt++ % num_portals];
			break;
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
		case FQ_TYPE_TX:
			dpaa_setup_egress(priv, fq, tx_port,
					  &fq_cbs->egress_ern);
			/* If we have more Tx queues than the number of cores,
			 * just ignore the extra ones.
			 */
			if (egress_cnt < DPAA_ETH_TXQ_NUM)
				priv->egress_fqs[egress_cnt++] = &fq->fq_base;
			break;
		case FQ_TYPE_TX_CONF_MQ:
			priv->conf_fqs[conf_cnt++] = &fq->fq_base;
			/* fall through */
		case FQ_TYPE_TX_CONFIRM:
			dpaa_setup_ingress(priv, fq, &fq_cbs->tx_defq);
			break;
		case FQ_TYPE_TX_ERROR:
			dpaa_setup_ingress(priv, fq, &fq_cbs->tx_errq);
			break;
		default:
			dev_warn(priv->net_dev->dev.parent,
				 "Unknown FQ type detected!\n");
			break;
		}
	}

	 /* Make sure all CPUs receive a corresponding Tx queue. */
	while (egress_cnt < DPAA_ETH_TXQ_NUM) {
		list_for_each_entry(fq, &priv->dpaa_fq_list, list) {
			if (fq->fq_type != FQ_TYPE_TX)
				continue;
			priv->egress_fqs[egress_cnt++] = &fq->fq_base;
			if (egress_cnt == DPAA_ETH_TXQ_NUM)
				break;
		}
	}
}

static inline int dpaa_tx_fq_to_id(const struct dpaa_priv *priv,
				   struct qman_fq *tx_fq)
{
	int i;

	for (i = 0; i < DPAA_ETH_TXQ_NUM; i++)
		if (priv->egress_fqs[i] == tx_fq)
			return i;

	return -EINVAL;
}

static int dpaa_fq_init(struct dpaa_fq *dpaa_fq, bool td_enable)
{
	const struct dpaa_priv	*priv;
	struct qman_fq *confq = NULL;
	struct qm_mcc_initfq initfq;
	struct device *dev;
	struct qman_fq *fq;
	int queue_id;
	int err;

	priv = netdev_priv(dpaa_fq->net_dev);
	dev = dpaa_fq->net_dev->dev.parent;

	if (dpaa_fq->fqid == 0)
		dpaa_fq->flags |= QMAN_FQ_FLAG_DYNAMIC_FQID;

	dpaa_fq->init = !(dpaa_fq->flags & QMAN_FQ_FLAG_NO_MODIFY);

	err = qman_create_fq(dpaa_fq->fqid, dpaa_fq->flags, &dpaa_fq->fq_base);
	if (err) {
		dev_err(dev, "qman_create_fq() failed\n");
		return err;
	}
	fq = &dpaa_fq->fq_base;

	if (dpaa_fq->init) {
		memset(&initfq, 0, sizeof(initfq));

976
		initfq.we_mask = cpu_to_be16(QM_INITFQ_WE_FQCTRL);
977
		/* Note: we may get to keep an empty FQ in cache */
978
		initfq.fqd.fq_ctrl = cpu_to_be16(QM_FQCTRL_PREFERINCACHE);
979
980
981
982
983

		/* Try to reduce the number of portal interrupts for
		 * Tx Confirmation FQs.
		 */
		if (dpaa_fq->fq_type == FQ_TYPE_TX_CONFIRM)
984
			initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_AVOIDBLOCK);
985
986

		/* FQ placement */
987
		initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_DESTWQ);
988
989
990
991
992
993
994
995
996
997
998
999

		qm_fqd_set_destwq(&initfq.fqd, dpaa_fq->channel, dpaa_fq->wq);

		/* Put all egress queues in a congestion group of their own.
		 * Sensu stricto, the Tx confirmation queues are Rx FQs,
		 * rather than Tx - but they nonetheless account for the
		 * memory footprint on behalf of egress traffic. We therefore
		 * place them in the netdev's CGR, along with the Tx FQs.
		 */
		if (dpaa_fq->fq_type == FQ_TYPE_TX ||
		    dpaa_fq->fq_type == FQ_TYPE_TX_CONFIRM ||
		    dpaa_fq->fq_type == FQ_TYPE_TX_CONF_MQ) {
1000
			initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_CGID);