dpaa2-eth.c 91.5 KB
Newer Older
1
// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
2
3
4
5
6
7
8
9
10
11
12
/* Copyright 2014-2016 Freescale Semiconductor Inc.
 * Copyright 2016-2017 NXP
 */
#include <linux/init.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/etherdevice.h>
#include <linux/of_net.h>
#include <linux/interrupt.h>
#include <linux/msi.h>
#include <linux/kthread.h>
13
#include <linux/iommu.h>
14
#include <linux/net_tstamp.h>
15
#include <linux/fsl/mc.h>
16
17
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
18
19
#include <net/sock.h>

20
21
#include "dpaa2-eth.h"

22
23
24
25
26
27
/* CREATE_TRACE_POINTS only needs to be defined once. Other dpa files
 * using trace events only need to #include <trace/events/sched.h>
 */
#define CREATE_TRACE_POINTS
#include "dpaa2-eth-trace.h"

28
29
30
31
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Freescale Semiconductor, Inc");
MODULE_DESCRIPTION("Freescale DPAA2 Ethernet Driver");

32
33
34
35
36
37
38
39
40
41
static void *dpaa2_iova_to_virt(struct iommu_domain *domain,
				dma_addr_t iova_addr)
{
	phys_addr_t phys_addr;

	phys_addr = domain ? iommu_iova_to_phys(domain, iova_addr) : iova_addr;

	return phys_to_virt(phys_addr);
}

42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
static void validate_rx_csum(struct dpaa2_eth_priv *priv,
			     u32 fd_status,
			     struct sk_buff *skb)
{
	skb_checksum_none_assert(skb);

	/* HW checksum validation is disabled, nothing to do here */
	if (!(priv->net_dev->features & NETIF_F_RXCSUM))
		return;

	/* Read checksum validation bits */
	if (!((fd_status & DPAA2_FAS_L3CV) &&
	      (fd_status & DPAA2_FAS_L4CV)))
		return;

	/* Inform the stack there's no need to compute L3/L4 csum anymore */
	skb->ip_summed = CHECKSUM_UNNECESSARY;
}

/* Free a received FD.
 * Not to be used for Tx conf FDs or on any other paths.
 */
static void free_rx_fd(struct dpaa2_eth_priv *priv,
		       const struct dpaa2_fd *fd,
		       void *vaddr)
{
	struct device *dev = priv->net_dev->dev.parent;
	dma_addr_t addr = dpaa2_fd_get_addr(fd);
	u8 fd_format = dpaa2_fd_get_format(fd);
	struct dpaa2_sg_entry *sgt;
	void *sg_vaddr;
	int i;

	/* If single buffer frame, just free the data buffer */
	if (fd_format == dpaa2_fd_single)
		goto free_buf;
	else if (fd_format != dpaa2_fd_sg)
		/* We don't support any other format */
		return;

82
83
84
	/* For S/G frames, we first need to free all SG entries
	 * except the first one, which was taken care of already
	 */
85
	sgt = vaddr + dpaa2_fd_get_offset(fd);
86
	for (i = 1; i < DPAA2_ETH_MAX_SG_ENTRIES; i++) {
87
		addr = dpaa2_sg_get_addr(&sgt[i]);
88
		sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
89
90
		dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
			       DMA_BIDIRECTIONAL);
91

92
		free_pages((unsigned long)sg_vaddr, 0);
93
94
95
96
97
		if (dpaa2_sg_is_final(&sgt[i]))
			break;
	}

free_buf:
98
	free_pages((unsigned long)vaddr, 0);
99
100
101
}

/* Build a linear skb based on a single-buffer frame descriptor */
102
static struct sk_buff *build_linear_skb(struct dpaa2_eth_channel *ch,
103
104
105
106
107
108
109
					const struct dpaa2_fd *fd,
					void *fd_vaddr)
{
	struct sk_buff *skb = NULL;
	u16 fd_offset = dpaa2_fd_get_offset(fd);
	u32 fd_length = dpaa2_fd_get_len(fd);

110
111
	ch->buf_count--;

112
	skb = build_skb(fd_vaddr, DPAA2_ETH_RX_BUF_RAW_SIZE);
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
	if (unlikely(!skb))
		return NULL;

	skb_reserve(skb, fd_offset);
	skb_put(skb, fd_length);

	return skb;
}

/* Build a non linear (fragmented) skb based on a S/G table */
static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv,
				      struct dpaa2_eth_channel *ch,
				      struct dpaa2_sg_entry *sgt)
{
	struct sk_buff *skb = NULL;
	struct device *dev = priv->net_dev->dev.parent;
	void *sg_vaddr;
	dma_addr_t sg_addr;
	u16 sg_offset;
	u32 sg_length;
	struct page *page, *head_page;
	int page_offset;
	int i;

	for (i = 0; i < DPAA2_ETH_MAX_SG_ENTRIES; i++) {
		struct dpaa2_sg_entry *sge = &sgt[i];

		/* NOTE: We only support SG entries in dpaa2_sg_single format,
		 * but this is the only format we may receive from HW anyway
		 */

		/* Get the address and length from the S/G entry */
		sg_addr = dpaa2_sg_get_addr(sge);
146
		sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, sg_addr);
147
148
		dma_unmap_page(dev, sg_addr, DPAA2_ETH_RX_BUF_SIZE,
			       DMA_BIDIRECTIONAL);
149
150
151
152
153

		sg_length = dpaa2_sg_get_len(sge);

		if (i == 0) {
			/* We build the skb around the first data buffer */
154
			skb = build_skb(sg_vaddr, DPAA2_ETH_RX_BUF_RAW_SIZE);
155
			if (unlikely(!skb)) {
156
157
158
				/* Free the first SG entry now, since we already
				 * unmapped it and obtained the virtual address
				 */
159
				free_pages((unsigned long)sg_vaddr, 0);
160

161
162
163
164
165
166
167
168
				/* We still need to subtract the buffers used
				 * by this FD from our software counter
				 */
				while (!dpaa2_sg_is_final(&sgt[i]) &&
				       i < DPAA2_ETH_MAX_SG_ENTRIES)
					i++;
				break;
			}
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194

			sg_offset = dpaa2_sg_get_offset(sge);
			skb_reserve(skb, sg_offset);
			skb_put(skb, sg_length);
		} else {
			/* Rest of the data buffers are stored as skb frags */
			page = virt_to_page(sg_vaddr);
			head_page = virt_to_head_page(sg_vaddr);

			/* Offset in page (which may be compound).
			 * Data in subsequent SG entries is stored from the
			 * beginning of the buffer, so we don't need to add the
			 * sg_offset.
			 */
			page_offset = ((unsigned long)sg_vaddr &
				(PAGE_SIZE - 1)) +
				(page_address(page) - page_address(head_page));

			skb_add_rx_frag(skb, i - 1, head_page, page_offset,
					sg_length, DPAA2_ETH_RX_BUF_SIZE);
		}

		if (dpaa2_sg_is_final(sge))
			break;
	}

195
196
	WARN_ONCE(i == DPAA2_ETH_MAX_SG_ENTRIES, "Final bit not set in SGT");

197
198
199
200
201
202
	/* Count all data buffers + SG table buffer */
	ch->buf_count -= i + 2;

	return skb;
}

203
204
205
206
207
208
209
210
211
212
213
/* Free buffers acquired from the buffer pool or which were meant to
 * be released in the pool
 */
static void free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, int count)
{
	struct device *dev = priv->net_dev->dev.parent;
	void *vaddr;
	int i;

	for (i = 0; i < count; i++) {
		vaddr = dpaa2_iova_to_virt(priv->iommu_domain, buf_array[i]);
214
215
216
		dma_unmap_page(dev, buf_array[i], DPAA2_ETH_RX_BUF_SIZE,
			       DMA_BIDIRECTIONAL);
		free_pages((unsigned long)vaddr, 0);
217
218
219
	}
}

220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
static void xdp_release_buf(struct dpaa2_eth_priv *priv,
			    struct dpaa2_eth_channel *ch,
			    dma_addr_t addr)
{
	int err;

	ch->xdp.drop_bufs[ch->xdp.drop_cnt++] = addr;
	if (ch->xdp.drop_cnt < DPAA2_ETH_BUFS_PER_CMD)
		return;

	while ((err = dpaa2_io_service_release(ch->dpio, priv->bpid,
					       ch->xdp.drop_bufs,
					       ch->xdp.drop_cnt)) == -EBUSY)
		cpu_relax();

	if (err) {
		free_bufs(priv, ch->xdp.drop_bufs, ch->xdp.drop_cnt);
		ch->buf_count -= ch->xdp.drop_cnt;
	}

	ch->xdp.drop_cnt = 0;
}

243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
static int xdp_enqueue(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd,
		       void *buf_start, u16 queue_id)
{
	struct dpaa2_eth_fq *fq;
	struct dpaa2_faead *faead;
	u32 ctrl, frc;
	int i, err;

	/* Mark the egress frame hardware annotation area as valid */
	frc = dpaa2_fd_get_frc(fd);
	dpaa2_fd_set_frc(fd, frc | DPAA2_FD_FRC_FAEADV);
	dpaa2_fd_set_ctrl(fd, DPAA2_FD_CTRL_ASAL);

	/* Instruct hardware to release the FD buffer directly into
	 * the buffer pool once transmission is completed, instead of
	 * sending a Tx confirmation frame to us
	 */
	ctrl = DPAA2_FAEAD_A4V | DPAA2_FAEAD_A2V | DPAA2_FAEAD_EBDDV;
	faead = dpaa2_get_faead(buf_start, false);
	faead->ctrl = cpu_to_le32(ctrl);
	faead->conf_fqid = 0;

	fq = &priv->fq[queue_id];
	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
267
		err = priv->enqueue(priv, fq, fd, 0);
268
269
270
271
272
273
274
		if (err != -EBUSY)
			break;
	}

	return err;
}

275
276
static u32 run_xdp(struct dpaa2_eth_priv *priv,
		   struct dpaa2_eth_channel *ch,
277
		   struct dpaa2_eth_fq *rx_fq,
278
279
		   struct dpaa2_fd *fd, void *vaddr)
{
280
	dma_addr_t addr = dpaa2_fd_get_addr(fd);
281
	struct rtnl_link_stats64 *percpu_stats;
282
283
284
	struct bpf_prog *xdp_prog;
	struct xdp_buff xdp;
	u32 xdp_act = XDP_PASS;
285
286
287
	int err;

	percpu_stats = this_cpu_ptr(priv->percpu_stats);
288
289
290
291
292
293
294
295
296

	rcu_read_lock();

	xdp_prog = READ_ONCE(ch->xdp.prog);
	if (!xdp_prog)
		goto out;

	xdp.data = vaddr + dpaa2_fd_get_offset(fd);
	xdp.data_end = xdp.data + dpaa2_fd_get_len(fd);
297
	xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
298
	xdp_set_data_meta_invalid(&xdp);
299
	xdp.rxq = &ch->xdp_rxq;
300
301
302

	xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);

303
304
305
306
	/* xdp.data pointer may have changed */
	dpaa2_fd_set_offset(fd, xdp.data - vaddr);
	dpaa2_fd_set_len(fd, xdp.data_end - xdp.data);

307
308
309
	switch (xdp_act) {
	case XDP_PASS:
		break;
310
311
312
313
314
	case XDP_TX:
		err = xdp_enqueue(priv, fd, vaddr, rx_fq->flowid);
		if (err) {
			xdp_release_buf(priv, ch, addr);
			percpu_stats->tx_errors++;
315
			ch->stats.xdp_tx_err++;
316
317
318
		} else {
			percpu_stats->tx_packets++;
			percpu_stats->tx_bytes += dpaa2_fd_get_len(fd);
319
			ch->stats.xdp_tx++;
320
321
		}
		break;
322
323
	default:
		bpf_warn_invalid_xdp_action(xdp_act);
324
		/* fall through */
325
326
	case XDP_ABORTED:
		trace_xdp_exception(priv->net_dev, xdp_prog, xdp_act);
327
		/* fall through */
328
	case XDP_DROP:
329
		xdp_release_buf(priv, ch, addr);
330
		ch->stats.xdp_drop++;
331
		break;
332
333
334
335
336
337
338
339
340
341
342
	case XDP_REDIRECT:
		dma_unmap_page(priv->net_dev->dev.parent, addr,
			       DPAA2_ETH_RX_BUF_SIZE, DMA_BIDIRECTIONAL);
		ch->buf_count--;
		xdp.data_hard_start = vaddr;
		err = xdp_do_redirect(priv->net_dev, &xdp, xdp_prog);
		if (unlikely(err))
			ch->stats.xdp_drop++;
		else
			ch->stats.xdp_redirect++;
		break;
343
344
	}

345
	ch->xdp.res |= xdp_act;
346
347
348
349
350
out:
	rcu_read_unlock();
	return xdp_act;
}

351
352
353
354
/* Main Rx frame processing routine */
static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
			 struct dpaa2_eth_channel *ch,
			 const struct dpaa2_fd *fd,
355
			 struct dpaa2_eth_fq *fq)
356
357
358
359
360
361
{
	dma_addr_t addr = dpaa2_fd_get_addr(fd);
	u8 fd_format = dpaa2_fd_get_format(fd);
	void *vaddr;
	struct sk_buff *skb;
	struct rtnl_link_stats64 *percpu_stats;
362
	struct dpaa2_eth_drv_stats *percpu_extras;
363
364
	struct device *dev = priv->net_dev->dev.parent;
	struct dpaa2_fas *fas;
365
	void *buf_data;
366
	u32 status = 0;
367
	u32 xdp_act;
368

369
370
371
	/* Tracing point */
	trace_dpaa2_rx_fd(priv->net_dev, fd);

372
	vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
373
	dma_sync_single_for_cpu(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
374
				DMA_BIDIRECTIONAL);
375

376
	fas = dpaa2_get_fas(vaddr, false);
377
378
379
	prefetch(fas);
	buf_data = vaddr + dpaa2_fd_get_offset(fd);
	prefetch(buf_data);
380
381

	percpu_stats = this_cpu_ptr(priv->percpu_stats);
382
	percpu_extras = this_cpu_ptr(priv->percpu_extras);
383
384

	if (fd_format == dpaa2_fd_single) {
385
		xdp_act = run_xdp(priv, ch, fq, (struct dpaa2_fd *)fd, vaddr);
386
387
388
389
390
391
		if (xdp_act != XDP_PASS) {
			percpu_stats->rx_packets++;
			percpu_stats->rx_bytes += dpaa2_fd_get_len(fd);
			return;
		}

392
393
		dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
			       DMA_BIDIRECTIONAL);
394
		skb = build_linear_skb(ch, fd, vaddr);
395
	} else if (fd_format == dpaa2_fd_sg) {
396
397
		WARN_ON(priv->xdp_prog);

398
399
		dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
			       DMA_BIDIRECTIONAL);
400
		skb = build_frag_skb(priv, ch, buf_data);
401
		free_pages((unsigned long)vaddr, 0);
402
403
		percpu_extras->rx_sg_frames++;
		percpu_extras->rx_sg_bytes += dpaa2_fd_get_len(fd);
404
405
406
407
408
409
410
411
412
413
	} else {
		/* We don't support any other format */
		goto err_frame_format;
	}

	if (unlikely(!skb))
		goto err_build_skb;

	prefetch(skb->data);

414
415
416
417
418
419
420
421
422
423
424
425
	/* Get the timestamp value */
	if (priv->rx_tstamp) {
		struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
		__le64 *ts = dpaa2_get_ts(vaddr, false);
		u64 ns;

		memset(shhwtstamps, 0, sizeof(*shhwtstamps));

		ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts);
		shhwtstamps->hwtstamp = ns_to_ktime(ns);
	}

426
427
428
429
430
431
432
	/* Check if we need to validate the L4 csum */
	if (likely(dpaa2_fd_get_frc(fd) & DPAA2_FD_FRC_FASV)) {
		status = le32_to_cpu(fas->status);
		validate_rx_csum(priv, status, skb);
	}

	skb->protocol = eth_type_trans(skb, priv->net_dev);
433
	skb_record_rx_queue(skb, fq->flowid);
434
435
436
437

	percpu_stats->rx_packets++;
	percpu_stats->rx_bytes += dpaa2_fd_get_len(fd);

438
	list_add_tail(&skb->list, ch->rx_list);
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453

	return;

err_build_skb:
	free_rx_fd(priv, fd, vaddr);
err_frame_format:
	percpu_stats->rx_dropped++;
}

/* Consume all frames pull-dequeued into the store. This is the simplest way to
 * make sure we don't accidentally issue another volatile dequeue which would
 * overwrite (leak) frames already in the store.
 *
 * Observance of NAPI budget is not our concern, leaving that to the caller.
 */
454
static int consume_frames(struct dpaa2_eth_channel *ch,
455
			  struct dpaa2_eth_fq **src)
456
457
{
	struct dpaa2_eth_priv *priv = ch->priv;
458
	struct dpaa2_eth_fq *fq = NULL;
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
	struct dpaa2_dq *dq;
	const struct dpaa2_fd *fd;
	int cleaned = 0;
	int is_last;

	do {
		dq = dpaa2_io_store_next(ch->store, &is_last);
		if (unlikely(!dq)) {
			/* If we're here, we *must* have placed a
			 * volatile dequeue comnmand, so keep reading through
			 * the store until we get some sort of valid response
			 * token (either a valid frame or an "empty dequeue")
			 */
			continue;
		}

		fd = dpaa2_dq_fd(dq);
476
		fq = (struct dpaa2_eth_fq *)(uintptr_t)dpaa2_dq_fqd_ctx(dq);
477

478
		fq->consume(priv, ch, fd, fq);
479
480
481
		cleaned++;
	} while (!is_last);

482
483
484
485
486
487
	if (!cleaned)
		return 0;

	fq->stats.frames += cleaned;

	/* A dequeue operation only pulls frames from a single queue
488
	 * into the store. Return the frame queue as an out param.
489
	 */
490
491
	if (src)
		*src = fq;
492

493
494
495
	return cleaned;
}

496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
/* Configure the egress frame annotation for timestamp update */
static void enable_tx_tstamp(struct dpaa2_fd *fd, void *buf_start)
{
	struct dpaa2_faead *faead;
	u32 ctrl, frc;

	/* Mark the egress frame annotation area as valid */
	frc = dpaa2_fd_get_frc(fd);
	dpaa2_fd_set_frc(fd, frc | DPAA2_FD_FRC_FAEADV);

	/* Set hardware annotation size */
	ctrl = dpaa2_fd_get_ctrl(fd);
	dpaa2_fd_set_ctrl(fd, ctrl | DPAA2_FD_CTRL_ASAL);

	/* enable UPD (update prepanded data) bit in FAEAD field of
	 * hardware frame annotation area
	 */
	ctrl = DPAA2_FAEAD_A2V | DPAA2_FAEAD_UPDV | DPAA2_FAEAD_UPD;
	faead = dpaa2_get_faead(buf_start, true);
	faead->ctrl = cpu_to_le32(ctrl);
}

518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
/* Create a frame descriptor based on a fragmented skb */
static int build_sg_fd(struct dpaa2_eth_priv *priv,
		       struct sk_buff *skb,
		       struct dpaa2_fd *fd)
{
	struct device *dev = priv->net_dev->dev.parent;
	void *sgt_buf = NULL;
	dma_addr_t addr;
	int nr_frags = skb_shinfo(skb)->nr_frags;
	struct dpaa2_sg_entry *sgt;
	int i, err;
	int sgt_buf_size;
	struct scatterlist *scl, *crt_scl;
	int num_sg;
	int num_dma_bufs;
	struct dpaa2_eth_swa *swa;

	/* Create and map scatterlist.
	 * We don't advertise NETIF_F_FRAGLIST, so skb_to_sgvec() will not have
	 * to go beyond nr_frags+1.
	 * Note: We don't support chained scatterlists
	 */
	if (unlikely(PAGE_SIZE / sizeof(struct scatterlist) < nr_frags + 1))
		return -EINVAL;

	scl = kcalloc(nr_frags + 1, sizeof(struct scatterlist), GFP_ATOMIC);
	if (unlikely(!scl))
		return -ENOMEM;

	sg_init_table(scl, nr_frags + 1);
	num_sg = skb_to_sgvec(skb, scl, 0, skb->len);
549
	num_dma_bufs = dma_map_sg(dev, scl, num_sg, DMA_BIDIRECTIONAL);
550
551
552
553
554
555
556
	if (unlikely(!num_dma_bufs)) {
		err = -ENOMEM;
		goto dma_map_sg_failed;
	}

	/* Prepare the HW SGT structure */
	sgt_buf_size = priv->tx_data_offset +
557
		       sizeof(struct dpaa2_sg_entry) *  num_dma_bufs;
558
	sgt_buf = napi_alloc_frag(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN);
559
560
561
562
563
	if (unlikely(!sgt_buf)) {
		err = -ENOMEM;
		goto sgt_buf_alloc_failed;
	}
	sgt_buf = PTR_ALIGN(sgt_buf, DPAA2_ETH_TX_BUF_ALIGN);
564
565
	memset(sgt_buf, 0, sgt_buf_size);

566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
	sgt = (struct dpaa2_sg_entry *)(sgt_buf + priv->tx_data_offset);

	/* Fill in the HW SGT structure.
	 *
	 * sgt_buf is zeroed out, so the following fields are implicit
	 * in all sgt entries:
	 *   - offset is 0
	 *   - format is 'dpaa2_sg_single'
	 */
	for_each_sg(scl, crt_scl, num_dma_bufs, i) {
		dpaa2_sg_set_addr(&sgt[i], sg_dma_address(crt_scl));
		dpaa2_sg_set_len(&sgt[i], sg_dma_len(crt_scl));
	}
	dpaa2_sg_set_final(&sgt[i - 1], true);

	/* Store the skb backpointer in the SGT buffer.
	 * Fit the scatterlist and the number of buffers alongside the
	 * skb backpointer in the software annotation area. We'll need
	 * all of them on Tx Conf.
	 */
	swa = (struct dpaa2_eth_swa *)sgt_buf;
587
588
589
590
591
	swa->type = DPAA2_ETH_SWA_SG;
	swa->sg.skb = skb;
	swa->sg.scl = scl;
	swa->sg.num_sg = num_sg;
	swa->sg.sgt_size = sgt_buf_size;
592
593

	/* Separately map the SGT buffer */
594
	addr = dma_map_single(dev, sgt_buf, sgt_buf_size, DMA_BIDIRECTIONAL);
595
596
597
598
599
600
601
602
	if (unlikely(dma_mapping_error(dev, addr))) {
		err = -ENOMEM;
		goto dma_map_single_failed;
	}
	dpaa2_fd_set_offset(fd, priv->tx_data_offset);
	dpaa2_fd_set_format(fd, dpaa2_fd_sg);
	dpaa2_fd_set_addr(fd, addr);
	dpaa2_fd_set_len(fd, skb->len);
603
	dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
604

605
606
607
	if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
		enable_tx_tstamp(fd, sgt_buf);

608
609
610
	return 0;

dma_map_single_failed:
611
	skb_free_frag(sgt_buf);
612
sgt_buf_alloc_failed:
613
	dma_unmap_sg(dev, scl, num_sg, DMA_BIDIRECTIONAL);
614
615
616
617
618
619
620
621
622
623
624
dma_map_sg_failed:
	kfree(scl);
	return err;
}

/* Create a frame descriptor based on a linear skb */
static int build_single_fd(struct dpaa2_eth_priv *priv,
			   struct sk_buff *skb,
			   struct dpaa2_fd *fd)
{
	struct device *dev = priv->net_dev->dev.parent;
625
	u8 *buffer_start, *aligned_start;
626
	struct dpaa2_eth_swa *swa;
627
628
	dma_addr_t addr;

629
630
631
632
633
634
635
636
637
	buffer_start = skb->data - dpaa2_eth_needed_headroom(priv, skb);

	/* If there's enough room to align the FD address, do it.
	 * It will help hardware optimize accesses.
	 */
	aligned_start = PTR_ALIGN(buffer_start - DPAA2_ETH_TX_BUF_ALIGN,
				  DPAA2_ETH_TX_BUF_ALIGN);
	if (aligned_start >= skb->head)
		buffer_start = aligned_start;
638
639
640
641
642

	/* Store a backpointer to the skb at the beginning of the buffer
	 * (in the private data area) such that we can release it
	 * on Tx confirm
	 */
643
644
645
	swa = (struct dpaa2_eth_swa *)buffer_start;
	swa->type = DPAA2_ETH_SWA_SINGLE;
	swa->single.skb = skb;
646
647
648

	addr = dma_map_single(dev, buffer_start,
			      skb_tail_pointer(skb) - buffer_start,
649
			      DMA_BIDIRECTIONAL);
650
651
652
653
654
655
656
	if (unlikely(dma_mapping_error(dev, addr)))
		return -ENOMEM;

	dpaa2_fd_set_addr(fd, addr);
	dpaa2_fd_set_offset(fd, (u16)(skb->data - buffer_start));
	dpaa2_fd_set_len(fd, skb->len);
	dpaa2_fd_set_format(fd, dpaa2_fd_single);
657
	dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
658

659
660
661
	if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
		enable_tx_tstamp(fd, buffer_start);

662
663
664
665
666
667
668
669
670
671
672
	return 0;
}

/* FD freeing routine on the Tx path
 *
 * DMA-unmap and free FD and possibly SGT buffer allocated on Tx. The skb
 * back-pointed to is also freed.
 * This can be called either from dpaa2_eth_tx_conf() or on the error path of
 * dpaa2_eth_tx().
 */
static void free_tx_fd(const struct dpaa2_eth_priv *priv,
673
		       struct dpaa2_eth_fq *fq,
674
		       const struct dpaa2_fd *fd, bool in_napi)
675
676
677
{
	struct device *dev = priv->net_dev->dev.parent;
	dma_addr_t fd_addr;
678
	struct sk_buff *skb = NULL;
679
680
681
	unsigned char *buffer_start;
	struct dpaa2_eth_swa *swa;
	u8 fd_format = dpaa2_fd_get_format(fd);
682
	u32 fd_len = dpaa2_fd_get_len(fd);
683
684

	fd_addr = dpaa2_fd_get_addr(fd);
685
686
	buffer_start = dpaa2_iova_to_virt(priv->iommu_domain, fd_addr);
	swa = (struct dpaa2_eth_swa *)buffer_start;
687
688

	if (fd_format == dpaa2_fd_single) {
689
690
691
692
693
694
695
696
697
698
699
700
701
		if (swa->type == DPAA2_ETH_SWA_SINGLE) {
			skb = swa->single.skb;
			/* Accessing the skb buffer is safe before dma unmap,
			 * because we didn't map the actual skb shell.
			 */
			dma_unmap_single(dev, fd_addr,
					 skb_tail_pointer(skb) - buffer_start,
					 DMA_BIDIRECTIONAL);
		} else {
			WARN_ONCE(swa->type != DPAA2_ETH_SWA_XDP, "Wrong SWA type");
			dma_unmap_single(dev, fd_addr, swa->xdp.dma_size,
					 DMA_BIDIRECTIONAL);
		}
702
	} else if (fd_format == dpaa2_fd_sg) {
703
		skb = swa->sg.skb;
704
705

		/* Unmap the scatterlist */
706
707
708
		dma_unmap_sg(dev, swa->sg.scl, swa->sg.num_sg,
			     DMA_BIDIRECTIONAL);
		kfree(swa->sg.scl);
709
710

		/* Unmap the SGT buffer */
711
		dma_unmap_single(dev, fd_addr, swa->sg.sgt_size,
712
				 DMA_BIDIRECTIONAL);
713
	} else {
714
		netdev_dbg(priv->net_dev, "Invalid FD format\n");
715
716
717
		return;
	}

718
719
720
721
722
723
724
725
726
727
	if (swa->type != DPAA2_ETH_SWA_XDP && in_napi) {
		fq->dq_frames++;
		fq->dq_bytes += fd_len;
	}

	if (swa->type == DPAA2_ETH_SWA_XDP) {
		xdp_return_frame(swa->xdp.xdpf);
		return;
	}

728
729
730
	/* Get the timestamp value */
	if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
		struct skb_shared_hwtstamps shhwtstamps;
731
		__le64 *ts = dpaa2_get_ts(buffer_start, true);
732
733
734
735
736
737
738
739
740
		u64 ns;

		memset(&shhwtstamps, 0, sizeof(shhwtstamps));

		ns = DPAA2_PTP_CLK_PERIOD_NS * le64_to_cpup(ts);
		shhwtstamps.hwtstamp = ns_to_ktime(ns);
		skb_tstamp_tx(skb, &shhwtstamps);
	}

741
	/* Free SGT buffer allocated on tx */
742
	if (fd_format != dpaa2_fd_single)
743
		skb_free_frag(buffer_start);
744
745

	/* Move on with skb release */
746
	napi_consume_skb(skb, in_napi);
747
748
}

749
static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
750
751
752
753
{
	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
	struct dpaa2_fd fd;
	struct rtnl_link_stats64 *percpu_stats;
754
	struct dpaa2_eth_drv_stats *percpu_extras;
755
	struct dpaa2_eth_fq *fq;
756
	struct netdev_queue *nq;
757
	u16 queue_mapping;
758
	unsigned int needed_headroom;
759
	u32 fd_len;
760
	u8 prio = 0;
761
762
763
	int err, i;

	percpu_stats = this_cpu_ptr(priv->percpu_stats);
764
	percpu_extras = this_cpu_ptr(priv->percpu_extras);
765

766
767
	needed_headroom = dpaa2_eth_needed_headroom(priv, skb);
	if (skb_headroom(skb) < needed_headroom) {
768
769
		struct sk_buff *ns;

770
		ns = skb_realloc_headroom(skb, needed_headroom);
771
772
773
774
		if (unlikely(!ns)) {
			percpu_stats->tx_dropped++;
			goto err_alloc_headroom;
		}
775
		percpu_extras->tx_reallocs++;
776
777
778
779

		if (skb->sk)
			skb_set_owner_w(ns, skb->sk);

780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
		dev_kfree_skb(skb);
		skb = ns;
	}

	/* We'll be holding a back-reference to the skb until Tx Confirmation;
	 * we don't want that overwritten by a concurrent Tx with a cloned skb.
	 */
	skb = skb_unshare(skb, GFP_ATOMIC);
	if (unlikely(!skb)) {
		/* skb_unshare() has already freed the skb */
		percpu_stats->tx_dropped++;
		return NETDEV_TX_OK;
	}

	/* Setup the FD fields */
	memset(&fd, 0, sizeof(fd));

797
	if (skb_is_nonlinear(skb)) {
798
		err = build_sg_fd(priv, skb, &fd);
799
800
801
		percpu_extras->tx_sg_frames++;
		percpu_extras->tx_sg_bytes += skb->len;
	} else {
802
		err = build_single_fd(priv, skb, &fd);
803
804
	}

805
806
807
808
809
	if (unlikely(err)) {
		percpu_stats->tx_dropped++;
		goto err_build_fd;
	}

810
811
812
	/* Tracing point */
	trace_dpaa2_tx_fd(net_dev, &fd);

813
814
815
	/* TxConf FQ selection relies on queue id from the stack.
	 * In case of a forwarded frame from another DPNI interface, we choose
	 * a queue affined to the same core that processed the Rx frame
816
	 */
817
	queue_mapping = skb_get_queue_mapping(skb);
818
819
820
821
822
823
824
825
826
827
828
829

	if (net_dev->num_tc) {
		prio = netdev_txq_to_tc(net_dev, queue_mapping);
		/* Hardware interprets priority level 0 as being the highest,
		 * so we need to do a reverse mapping to the netdev tc index
		 */
		prio = net_dev->num_tc - prio - 1;
		/* We have only one FQ array entry for all Tx hardware queues
		 * with the same flow id (but different priority levels)
		 */
		queue_mapping %= dpaa2_eth_queue_count(priv);
	}
830
	fq = &priv->fq[queue_mapping];
831
832
833
834
835
836
837
838

	fd_len = dpaa2_fd_get_len(&fd);
	nq = netdev_get_tx_queue(net_dev, queue_mapping);
	netdev_tx_sent_queue(nq, fd_len);

	/* Everything that happens after this enqueues might race with
	 * the Tx confirmation callback for this frame
	 */
839
	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
840
		err = priv->enqueue(priv, fq, &fd, prio);
841
842
843
		if (err != -EBUSY)
			break;
	}
844
	percpu_extras->tx_portal_busy += i;
845
846
847
	if (unlikely(err < 0)) {
		percpu_stats->tx_errors++;
		/* Clean up everything, including freeing the skb */
848
		free_tx_fd(priv, fq, &fd, false);
849
		netdev_tx_completed_queue(nq, 1, fd_len);
850
851
	} else {
		percpu_stats->tx_packets++;
852
		percpu_stats->tx_bytes += fd_len;
853
854
855
856
857
858
859
860
861
862
863
864
865
	}

	return NETDEV_TX_OK;

err_build_fd:
err_alloc_headroom:
	dev_kfree_skb(skb);

	return NETDEV_TX_OK;
}

/* Tx confirmation frame processing routine */
static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv,
866
			      struct dpaa2_eth_channel *ch __always_unused,
867
			      const struct dpaa2_fd *fd,
868
			      struct dpaa2_eth_fq *fq)
869
870
{
	struct rtnl_link_stats64 *percpu_stats;
871
	struct dpaa2_eth_drv_stats *percpu_extras;
872
	u32 fd_len = dpaa2_fd_get_len(fd);
873
	u32 fd_errors;
874

875
876
877
	/* Tracing point */
	trace_dpaa2_tx_conf_fd(priv->net_dev, fd);

878
879
	percpu_extras = this_cpu_ptr(priv->percpu_extras);
	percpu_extras->tx_conf_frames++;
880
881
	percpu_extras->tx_conf_bytes += fd_len;

882
883
	/* Check frame errors in the FD field */
	fd_errors = dpaa2_fd_get_ctrl(fd) & DPAA2_FD_TX_ERR_MASK;
884
	free_tx_fd(priv, fq, fd, true);
885
886
887
888

	if (likely(!fd_errors))
		return;

889
890
891
892
	if (net_ratelimit())
		netdev_dbg(priv->net_dev, "TX frame FD error: 0x%08x\n",
			   fd_errors);

893
894
895
	percpu_stats = this_cpu_ptr(priv->percpu_stats);
	/* Tx-conf logically pertains to the egress path. */
	percpu_stats->tx_errors++;
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
}

static int set_rx_csum(struct dpaa2_eth_priv *priv, bool enable)
{
	int err;

	err = dpni_set_offload(priv->mc_io, 0, priv->mc_token,
			       DPNI_OFF_RX_L3_CSUM, enable);
	if (err) {
		netdev_err(priv->net_dev,
			   "dpni_set_offload(RX_L3_CSUM) failed\n");
		return err;
	}

	err = dpni_set_offload(priv->mc_io, 0, priv->mc_token,
			       DPNI_OFF_RX_L4_CSUM, enable);
	if (err) {
		netdev_err(priv->net_dev,
			   "dpni_set_offload(RX_L4_CSUM) failed\n");
		return err;
	}

	return 0;
}

static int set_tx_csum(struct dpaa2_eth_priv *priv, bool enable)
{
	int err;

	err = dpni_set_offload(priv->mc_io, 0, priv->mc_token,
			       DPNI_OFF_TX_L3_CSUM, enable);
	if (err) {
		netdev_err(priv->net_dev, "dpni_set_offload(TX_L3_CSUM) failed\n");
		return err;
	}

	err = dpni_set_offload(priv->mc_io, 0, priv->mc_token,
			       DPNI_OFF_TX_L4_CSUM, enable);
	if (err) {
		netdev_err(priv->net_dev, "dpni_set_offload(TX_L4_CSUM) failed\n");
		return err;
	}

	return 0;
}

/* Perform a single release command to add buffers
 * to the specified buffer pool
 */
945
946
static int add_bufs(struct dpaa2_eth_priv *priv,
		    struct dpaa2_eth_channel *ch, u16 bpid)
947
948
949
{
	struct device *dev = priv->net_dev->dev.parent;
	u64 buf_array[DPAA2_ETH_BUFS_PER_CMD];
950
	struct page *page;
951
	dma_addr_t addr;
952
	int i, err;
953
954
955
956
957

	for (i = 0; i < DPAA2_ETH_BUFS_PER_CMD; i++) {
		/* Allocate buffer visible to WRIOP + skb shared info +
		 * alignment padding
		 */
958
959
960
961
962
963
		/* allocate one page for each Rx buffer. WRIOP sees
		 * the entire page except for a tailroom reserved for
		 * skb shared info
		 */
		page = dev_alloc_pages(0);
		if (!page)
964
965
			goto err_alloc;

966
967
		addr = dma_map_page(dev, page, 0, DPAA2_ETH_RX_BUF_SIZE,
				    DMA_BIDIRECTIONAL);
968
969
970
971
		if (unlikely(dma_mapping_error(dev, addr)))
			goto err_map;

		buf_array[i] = addr;
972
973
974

		/* tracing point */
		trace_dpaa2_eth_buf_seed(priv->net_dev,
975
					 page, DPAA2_ETH_RX_BUF_RAW_SIZE,
976
977
					 addr, DPAA2_ETH_RX_BUF_SIZE,
					 bpid);
978
979
980
	}

release_bufs:
981
	/* In case the portal is busy, retry until successful */
982
	while ((err = dpaa2_io_service_release(ch->dpio, bpid,
983
					       buf_array, i)) == -EBUSY)
984
		cpu_relax();
985
986
987
988
989
990
991
992
993

	/* If release command failed, clean up and bail out;
	 * not much else we can do about it
	 */
	if (err) {
		free_bufs(priv, buf_array, i);
		return 0;
	}

994
995
996
	return i;

err_map:
997
	__free_pages(page, 0);
998
err_alloc:
999
1000
1001
	/* If we managed to allocate at least some buffers,
	 * release them to hardware
	 */
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
	if (i)
		goto release_bufs;

	return 0;
}

static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid)
{
	int i, j;
	int new_count;

	for (j = 0; j < priv->num_channels; j++) {
		for (i = 0; i < DPAA2_ETH_NUM_BUFS;
		     i += DPAA2_ETH_BUFS_PER_CMD) {
1016
			new_count = add_bufs(priv, priv->channel[j], bpid);
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
			priv->channel[j]->buf_count += new_count;

			if (new_count < DPAA2_ETH_BUFS_PER_CMD) {
				return -ENOMEM;
			}
		}
	}

	return 0;
}

/**
 * Drain the specified number of buffers from the DPNI's private buffer pool.
 * @count must not exceeed DPAA2_ETH_BUFS_PER_CMD
 */
static void drain_bufs(struct dpaa2_eth_priv *priv, int count)
{
	u64 buf_array[DPAA2_ETH_BUFS_PER_CMD];
1035
	int ret;
1036
1037

	do {
1038
		ret = dpaa2_io_service_acquire(NULL, priv->bpid,
1039
1040
1041
1042
1043
					       buf_array, count);
		if (ret < 0) {
			netdev_err(priv->net_dev, "dpaa2_io_service_acquire() failed\n");
			return;
		}
1044
		free_bufs(priv, buf_array, ret);
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
	} while (ret);
}

static void drain_pool(struct dpaa2_eth_priv *priv)
{
	int i;

	drain_bufs(priv, DPAA2_ETH_BUFS_PER_CMD);
	drain_bufs(priv, 1);

	for (i = 0; i < priv->num_channels; i++)
		priv->channel[i]->buf_count = 0;
}

/* Function is called from softirq context only, so we don't need to guard
 * the access to percpu count
 */
static int refill_pool(struct dpaa2_eth_priv *priv,
		       struct dpaa2_eth_channel *ch,
		       u16 bpid)
{
	int new_count;

	if (likely(ch->buf_count >= DPAA2_ETH_REFILL_THRESH))
		return 0;

	do {
1072
		new_count = add_bufs(priv, ch, bpid);
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
		if (unlikely(!new_count)) {
			/* Out of memory; abort for now, we'll try later on */
			break;
		}
		ch->buf_count += new_count;
	} while (ch->buf_count < DPAA2_ETH_NUM_BUFS);

	if (unlikely(ch->buf_count < DPAA2_ETH_NUM_BUFS))
		return -ENOMEM;

	return 0;
}

static int pull_channel(struct dpaa2_eth_channel *ch)
{
	int err;
1089
	int dequeues = -1;
1090
1091
1092

	/* Retry while portal is busy */
	do {
1093
1094
		err = dpaa2_io_service_pull_channel(ch->dpio, ch->ch_id,
						    ch->store);
1095
		dequeues++;
1096
1097
1098
		cpu_relax();
	} while (err == -EBUSY);

1099
1100
1101
1102
	ch->stats.dequeue_portal_busy += dequeues;
	if (unlikely(err))
		ch->stats.pull_err++;

1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
	return err;
}

/* NAPI poll routine
 *
 * Frames are dequeued from the QMan channel associated with this NAPI context.
 * Rx, Tx confirmation and (if configured) Rx error frames all count
 * towards the NAPI budget.
 */
static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
{
	struct dpaa2_eth_channel *ch;
	struct dpaa2_eth_priv *priv;
1116
	int rx_cleaned = 0, txconf_cleaned = 0;
1117
1118
1119
	struct dpaa2_eth_fq *fq, *txc_fq = NULL;
	struct netdev_queue *nq;
	int store_cleaned, work_done;
1120
	struct list_head rx_list;
1121
1122
1123
	int err;

	ch = container_of(napi, struct dpaa2_eth_channel, napi);
1124
	ch->xdp.res = 0;
1125
1126
	priv = ch->priv;

1127
1128
1129
	INIT_LIST_HEAD(&rx_list);
	ch->rx_list = &rx_list;

1130
	do {
1131
1132
1133
1134
1135
		err = pull_channel(ch);
		if (unlikely(err))
			break;

		/* Refill pool if appropriate */
1136
		refill_pool(priv, ch, priv->bpid);
1137

1138
1139
1140
1141
		store_cleaned = consume_frames(ch, &fq);
		if (!store_cleaned)
			break;
		if (fq->type == DPAA2_RX_FQ) {
1142
			rx_cleaned += store_cleaned;
1143
		} else {
1144
			txconf_cleaned += store_cleaned;
1145
1146
1147
			/* We have a single Tx conf FQ on this channel */
			txc_fq = fq;
		}
1148

1149
1150
		/* If we either consumed the whole NAPI budget with Rx frames
		 * or we reached the Tx confirmations threshold, we're done.
1151
		 */
1152
		if (rx_cleaned >= budget ||
1153
1154
1155
1156
		    txconf_cleaned >= DPAA2_ETH_TXCONF_PER_NAPI) {
			work_done = budget;
			goto out;
		}
1157
	} while (store_cleaned);
1158

1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
	/* We didn't consume the entire budget, so finish napi and
	 * re-enable data availability notifications
	 */
	napi_complete_done(napi, rx_cleaned);
	do {
		err = dpaa2_io_service_rearm(ch->dpio, &ch->nctx);
		cpu_relax();
	} while (err == -EBUSY);
	WARN_ONCE(err, "CDAN notifications rearm failed on core %d",
		  ch->nctx.desired_cpu);
1169

1170
1171
1172
	work_done = max(rx_cleaned, 1);

out:
1173
1174
	netif_receive_skb_list(ch->rx_list);

1175
	if (txc_fq && txc_fq->dq_frames) {
1176
1177
1178
1179
1180
1181
1182
		nq = netdev_get_tx_queue(priv->net_dev, txc_fq->flowid);
		netdev_tx_completed_queue(nq, txc_fq->dq_frames,
					  txc_fq->dq_bytes);
		txc_fq->dq_frames = 0;
		txc_fq->dq_bytes = 0;
	}

1183
1184
1185
	if (ch->xdp.res & XDP_REDIRECT)
		xdp_do_flush_map();

1186
	return work_done;
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
}

static void enable_ch_napi(struct dpaa2_eth_priv *priv)
{
	struct dpaa2_eth_channel *ch;
	int i;

	for (i = 0; i < priv->num_channels; i++) {
		ch = priv->channel[i];
		napi_enable(&ch->napi);
	}
}

static void disable_ch_napi(struct dpaa2_eth_priv *priv)
{
	struct dpaa2_eth_channel *ch;
	int i;

	for (i = 0; i < priv->num_channels; i++) {
		ch = priv->channel[i];
		napi_disable(&ch->napi);
	}
}

1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv, bool enable)
{
	struct dpni_taildrop td = {0};
	int i, err;

	if (priv->rx_td_enabled == enable)
		return;

	td.enable = enable;
	td.threshold = DPAA2_ETH_TAILDROP_THRESH;

	for (i = 0; i < priv->num_fqs; i++) {
		if (priv->fq[i].type != DPAA2_RX_FQ)
			continue;
		err = dpni_set_taildrop(priv->mc_io, 0, priv->mc_token,
					DPNI_CP_QUEUE, DPNI_QUEUE_RX, 0,
					priv->fq[i].flowid, &td);
		if (err) {
			netdev_err(priv->net_dev,
				   "dpni_set_taildrop() failed\n");
			break;
		}
	}

	priv->rx_td_enabled = enable;
}

1238
1239
static void update_tx_fqids(struct dpaa2_eth_priv *priv);

1240
1241
static int link_state_update(struct dpaa2_eth_priv *priv)
{
1242
	struct dpni_link_state state = {0};
1243
	bool tx_pause;
1244
1245
1246
1247
1248
1249
1250
1251
1252
	int err;

	err = dpni_get_link_state(priv->mc_io, 0, priv->mc_token, &state);
	if (unlikely(err)) {
		netdev_err(priv->net_dev,
			   "dpni_get_link_state() failed\n");
		return err;
	}

1253
1254
1255
1256
1257
1258
1259
1260
	/* If Tx pause frame settings have changed, we need to update
	 * Rx FQ taildrop configuration as well. We configure taildrop
	 * only when pause frame generation is disabled.
	 */
	tx_pause = !!(state.options & DPNI_LINK_OPT_PAUSE) ^
		   !!(state.options & DPNI_LINK_OPT_ASYM_PAUSE);
	dpaa2_eth_set_rx_taildrop(priv, !tx_pause);

1261
1262
	/* Chech link state; speed / duplex changes are not treated yet */
	if (priv->link_state.up == state.up)
1263
		goto out;
1264
1265

	if (state.up) {
1266
		update_tx_fqids(priv);
1267
1268
1269
1270
1271
1272
1273
		netif_carrier_on(priv->net_dev);
		netif_tx_start_all_queues(priv->net_dev);
	} else {
		netif_tx_stop_all_queues(priv->net_dev);
		netif_carrier_off(priv->net_dev);
	}

1274
	netdev_info(priv->net_dev, "Link Event: state %s\n",
1275
1276
		    state.up ? "up" : "down");

1277
1278
1279
out:
	priv->link_state = state;

1280
1281
1282
1283
1284
1285
1286
1287
	return 0;
}

static int dpaa2_eth_open(struct net_device *net_dev)
{
	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
	int err;

1288
	err = seed_pool(priv, priv->bpid);
1289
1290
1291
1292
1293
1294
	if (err) {
		/* Not much to do; the buffer pool, though not filled up,
		 * may still contain some buffers which would enable us
		 * to limp on.
		 */
		netdev_err(net_dev, "Buffer seeding failed for DPBP %d (bpid=%d)\n",
1295
			   priv->dpbp_dev->obj_desc.id, priv->bpid);
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
	}

	/* We'll only start the txqs when the link is actually ready; make sure
	 * we don't race against the link up notification, which may come
	 * immediately after dpni_enable();
	 */
	netif_tx_stop_all_queues(net_dev);
	enable_ch_napi(priv);
	/* Also, explicitly set carrier off, otherwise netif_carrier_ok() will
	 * return true and cause 'ip link show' to report the LOWER_UP flag,
	 * even though the link notification wasn't even received.
	 */
	netif_carrier_off(net_dev);

	err = dpni_enable(priv->mc_io, 0, priv->mc_token);
	if (err < 0) {
		netdev_err(net_dev, "dpni_enable() failed\n");
		goto enable_err;
	}

	/* If the DPMAC object has already processed the link up interrupt,
	 * we have to learn the link state ourselves.
	 */
	err = link_state_update(priv);
	if (err < 0) {
		netdev_err(net_dev, "Can't update link state\n");
		goto link_state_err;
	}

	return 0;

link_state_err:
enable_err:
	disable_ch_napi(priv);
	drain_pool(priv);
	return err;
}

1334
1335
/* Total number of in-flight frames on ingress queues */
static u32 ingress_fq_count(struct dpaa2_eth_priv *priv)
1336
{
1337
1338
1339
	struct dpaa2_eth_fq *fq;
	u32 fcnt = 0, bcnt = 0, total = 0;
	int i, err;
1340

1341
1342
1343
1344
1345
1346
1347
1348
1349
	for (i = 0; i < priv->num_fqs; i++) {
		fq = &priv->fq[i];
		err = dpaa2_io_query_fq_count(NULL, fq->fqid, &fcnt, &bcnt);
		if (err) {
			netdev_warn(priv->net_dev, "query_fq_count failed");
			break;
		}
		total += fcnt;
	}
1350
1351
1352
1353

	return total;
}

1354
static void wait_for_ingress_fq_empty(struct dpaa2_eth_priv *priv)
1355
{
1356
1357
	int retries = 10;
	u32 pending;
1358

1359
1360
1361
1362
1363
	do {
		pending = ingress_fq_count(priv);
		if (pending)
			msleep(100);
	} while (pending && --retries);
1364
1365
}

1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
#define DPNI_TX_PENDING_VER_MAJOR	7
#define DPNI_TX_PENDING_VER_MINOR	13
static void wait_for_egress_fq_empty(struct dpaa2_eth_priv *priv)
{
	union dpni_statistics stats;
	int retries = 10;
	int err;

	if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_TX_PENDING_VER_MAJOR,
				   DPNI_TX_PENDING_VER_MINOR) < 0)
		goto out;

	do {
		err = dpni_get_statistics(priv->mc_io, 0, priv->mc_token, 6,
					  &stats);
		if (err)
			goto out;
		if (stats.page_6.tx_pending_frames == 0)
			return;
	} while (--retries);

out:
	msleep(500);
}