Commit 9c590490 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'nfp-offload-LAG-for-tc-flower-egress'



Jakub Kicinski says:

====================
nfp: offload LAG for tc flower egress

This series from John adds bond offload to the nfp driver.  Patch 5
exposes the hash type for NETDEV_LAG_TX_TYPE_HASH to make sure nfp
hashing matches that of the software LAG.  This may be unnecessarily
conservative, let's see what LAG maintainers think :)

John says:

This patchset sets up the infrastructure and offloads output actions for
when a TC flower rule attempts to egress a packet to a LAG port.

Firstly it adds some of the infrastructure required to the flower app and
to the nfp core. This includes the ability to change the MAC address of a
repr, a function for combining lookup and write to a FW symbol, and the
addition of private data to a repr on a per app basis.

Patch 6 continues by implementing notifiers that track Linux bonds and
communicates to the FW those which enslave reprs, along with the current
state of reprs within the bond.

Patch 7 ensures bonds are synchronised with FW by receiving and acting
upon cmsgs sent to the kernel. These may request that a bond message is
retransmitted when FW can process it, or may request a full sync of the
bonds defined in the kernel.

Patch 8 offloads a flower action when that action requires egressing to a
pre-defined Linux bond.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d97cde6a 7e24a593
......@@ -1218,12 +1218,37 @@ static enum netdev_lag_tx_type bond_lag_tx_type(struct bonding *bond)
}
}
static enum netdev_lag_hash bond_lag_hash_type(struct bonding *bond,
enum netdev_lag_tx_type type)
{
if (type != NETDEV_LAG_TX_TYPE_HASH)
return NETDEV_LAG_HASH_NONE;
switch (bond->params.xmit_policy) {
case BOND_XMIT_POLICY_LAYER2:
return NETDEV_LAG_HASH_L2;
case BOND_XMIT_POLICY_LAYER34:
return NETDEV_LAG_HASH_L34;
case BOND_XMIT_POLICY_LAYER23:
return NETDEV_LAG_HASH_L23;
case BOND_XMIT_POLICY_ENCAP23:
return NETDEV_LAG_HASH_E23;
case BOND_XMIT_POLICY_ENCAP34:
return NETDEV_LAG_HASH_E34;
default:
return NETDEV_LAG_HASH_UNKNOWN;
}
}
static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave,
struct netlink_ext_ack *extack)
{
struct netdev_lag_upper_info lag_upper_info;
enum netdev_lag_tx_type type;
lag_upper_info.tx_type = bond_lag_tx_type(bond);
type = bond_lag_tx_type(bond);
lag_upper_info.tx_type = type;
lag_upper_info.hash_type = bond_lag_hash_type(bond, type);
return netdev_master_upper_dev_link(slave->dev, bond->dev, slave,
&lag_upper_info, extack);
......
......@@ -37,6 +37,7 @@ ifeq ($(CONFIG_NFP_APP_FLOWER),y)
nfp-objs += \
flower/action.o \
flower/cmsg.o \
flower/lag_conf.o \
flower/main.o \
flower/match.o \
flower/metadata.o \
......
......@@ -72,6 +72,42 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci);
}
static int
nfp_fl_pre_lag(struct nfp_app *app, const struct tc_action *action,
struct nfp_fl_payload *nfp_flow, int act_len)
{
size_t act_size = sizeof(struct nfp_fl_pre_lag);
struct nfp_fl_pre_lag *pre_lag;
struct net_device *out_dev;
int err;
out_dev = tcf_mirred_dev(action);
if (!out_dev || !netif_is_lag_master(out_dev))
return 0;
if (act_len + act_size > NFP_FL_MAX_A_SIZ)
return -EOPNOTSUPP;
/* Pre_lag action must be first on action list.
* If other actions already exist they need pushed forward.
*/
if (act_len)
memmove(nfp_flow->action_data + act_size,
nfp_flow->action_data, act_len);
pre_lag = (struct nfp_fl_pre_lag *)nfp_flow->action_data;
err = nfp_flower_lag_populate_pre_action(app, out_dev, pre_lag);
if (err)
return err;
pre_lag->head.jump_id = NFP_FL_ACTION_OPCODE_PRE_LAG;
pre_lag->head.len_lw = act_size >> NFP_FL_LW_SIZ;
nfp_flow->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
return act_size;
}
static bool nfp_fl_netdev_is_tunnel_type(struct net_device *out_dev,
enum nfp_flower_tun_type tun_type)
{
......@@ -88,12 +124,13 @@ static bool nfp_fl_netdev_is_tunnel_type(struct net_device *out_dev,
}
static int
nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
struct nfp_fl_payload *nfp_flow, bool last,
struct net_device *in_dev, enum nfp_flower_tun_type tun_type,
int *tun_out_cnt)
nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output,
const struct tc_action *action, struct nfp_fl_payload *nfp_flow,
bool last, struct net_device *in_dev,
enum nfp_flower_tun_type tun_type, int *tun_out_cnt)
{
size_t act_size = sizeof(struct nfp_fl_output);
struct nfp_flower_priv *priv = app->priv;
struct net_device *out_dev;
u16 tmp_flags;
......@@ -118,6 +155,15 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
output->flags = cpu_to_be16(tmp_flags |
NFP_FL_OUT_FLAGS_USE_TUN);
output->port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type);
} else if (netif_is_lag_master(out_dev) &&
priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
int gid;
output->flags = cpu_to_be16(tmp_flags);
gid = nfp_flower_lag_get_output_id(app, out_dev);
if (gid < 0)
return gid;
output->port = cpu_to_be32(NFP_FL_LAG_OUT | gid);
} else {
/* Set action output parameters. */
output->flags = cpu_to_be16(tmp_flags);
......@@ -164,7 +210,7 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len)
struct nfp_fl_pre_tunnel *pre_tun_act;
/* Pre_tunnel action must be first on action list.
* If other actions already exist they need pushed forward.
* If other actions already exist they need to be pushed forward.
*/
if (act_len)
memmove(act_data + act_size, act_data, act_len);
......@@ -443,42 +489,73 @@ nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len)
}
static int
nfp_flower_loop_action(const struct tc_action *a,
nfp_flower_output_action(struct nfp_app *app, const struct tc_action *a,
struct nfp_fl_payload *nfp_fl, int *a_len,
struct net_device *netdev, bool last,
enum nfp_flower_tun_type *tun_type, int *tun_out_cnt,
int *out_cnt)
{
struct nfp_flower_priv *priv = app->priv;
struct nfp_fl_output *output;
int err, prelag_size;
if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ)
return -EOPNOTSUPP;
output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
err = nfp_fl_output(app, output, a, nfp_fl, last, netdev, *tun_type,
tun_out_cnt);
if (err)
return err;
*a_len += sizeof(struct nfp_fl_output);
if (priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
/* nfp_fl_pre_lag returns -err or size of prelag action added.
* This will be 0 if it is not egressing to a lag dev.
*/
prelag_size = nfp_fl_pre_lag(app, a, nfp_fl, *a_len);
if (prelag_size < 0)
return prelag_size;
else if (prelag_size > 0 && (!last || *out_cnt))
return -EOPNOTSUPP;
*a_len += prelag_size;
}
(*out_cnt)++;
return 0;
}
static int
nfp_flower_loop_action(struct nfp_app *app, const struct tc_action *a,
struct nfp_fl_payload *nfp_fl, int *a_len,
struct net_device *netdev,
enum nfp_flower_tun_type *tun_type, int *tun_out_cnt)
enum nfp_flower_tun_type *tun_type, int *tun_out_cnt,
int *out_cnt)
{
struct nfp_fl_set_ipv4_udp_tun *set_tun;
struct nfp_fl_pre_tunnel *pre_tun;
struct nfp_fl_push_vlan *psh_v;
struct nfp_fl_pop_vlan *pop_v;
struct nfp_fl_output *output;
int err;
if (is_tcf_gact_shot(a)) {
nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_DROP);
} else if (is_tcf_mirred_egress_redirect(a)) {
if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ)
return -EOPNOTSUPP;
output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
err = nfp_fl_output(output, a, nfp_fl, true, netdev, *tun_type,
tun_out_cnt);
err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev,
true, tun_type, tun_out_cnt,
out_cnt);
if (err)
return err;
*a_len += sizeof(struct nfp_fl_output);
} else if (is_tcf_mirred_egress_mirror(a)) {
if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ)
return -EOPNOTSUPP;
output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
err = nfp_fl_output(output, a, nfp_fl, false, netdev, *tun_type,
tun_out_cnt);
err = nfp_flower_output_action(app, a, nfp_fl, a_len, netdev,
false, tun_type, tun_out_cnt,
out_cnt);
if (err)
return err;
*a_len += sizeof(struct nfp_fl_output);
} else if (is_tcf_vlan(a) && tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
if (*a_len + sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ)
return -EOPNOTSUPP;
......@@ -535,11 +612,12 @@ nfp_flower_loop_action(const struct tc_action *a,
return 0;
}
int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
int nfp_flower_compile_action(struct nfp_app *app,
struct tc_cls_flower_offload *flow,
struct net_device *netdev,
struct nfp_fl_payload *nfp_flow)
{
int act_len, act_cnt, err, tun_out_cnt;
int act_len, act_cnt, err, tun_out_cnt, out_cnt;
enum nfp_flower_tun_type tun_type;
const struct tc_action *a;
LIST_HEAD(actions);
......@@ -550,11 +628,12 @@ int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
act_len = 0;
act_cnt = 0;
tun_out_cnt = 0;
out_cnt = 0;
tcf_exts_to_list(flow->exts, &actions);
list_for_each_entry(a, &actions, list) {
err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev,
&tun_type, &tun_out_cnt);
err = nfp_flower_loop_action(app, a, nfp_flow, &act_len, netdev,
&tun_type, &tun_out_cnt, &out_cnt);
if (err)
return err;
act_cnt++;
......
......@@ -239,8 +239,10 @@ nfp_flower_cmsg_portreify_rx(struct nfp_app *app, struct sk_buff *skb)
static void
nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
{
struct nfp_flower_priv *app_priv = app->priv;
struct nfp_flower_cmsg_hdr *cmsg_hdr;
enum nfp_flower_cmsg_type_port type;
bool skb_stored = false;
cmsg_hdr = nfp_flower_cmsg_get_hdr(skb);
......@@ -258,13 +260,20 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS:
nfp_tunnel_keep_alive(app, skb);
break;
case NFP_FLOWER_CMSG_TYPE_LAG_CONFIG:
if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
skb_stored = nfp_flower_lag_unprocessed_msg(app, skb);
break;
}
/* fall through */
default:
nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n",
type);
goto out;
}
dev_consume_skb_any(skb);
if (!skb_stored)
dev_consume_skb_any(skb);
return;
out:
dev_kfree_skb_any(skb);
......
......@@ -92,6 +92,7 @@
#define NFP_FL_ACTION_OPCODE_SET_IPV6_DST 12
#define NFP_FL_ACTION_OPCODE_SET_UDP 14
#define NFP_FL_ACTION_OPCODE_SET_TCP 15
#define NFP_FL_ACTION_OPCODE_PRE_LAG 16
#define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17
#define NFP_FL_ACTION_OPCODE_NUM 32
......@@ -103,6 +104,9 @@
#define NFP_FL_PUSH_VLAN_CFI BIT(12)
#define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0)
/* LAG ports */
#define NFP_FL_LAG_OUT 0xC0DE0000
/* Tunnel ports */
#define NFP_FL_PORT_TYPE_TUN 0x50000000
#define NFP_FL_IPV4_TUNNEL_TYPE GENMASK(7, 4)
......@@ -177,6 +181,15 @@ struct nfp_fl_pop_vlan {
__be16 reserved;
};
struct nfp_fl_pre_lag {
struct nfp_fl_act_head head;
__be16 group_id;
u8 lag_version[3];
u8 instance;
};
#define NFP_FL_PRE_LAG_VER_OFF 8
struct nfp_fl_pre_tunnel {
struct nfp_fl_act_head head;
__be16 reserved;
......@@ -366,6 +379,7 @@ struct nfp_flower_cmsg_hdr {
enum nfp_flower_cmsg_type_port {
NFP_FLOWER_CMSG_TYPE_FLOW_ADD = 0,
NFP_FLOWER_CMSG_TYPE_FLOW_DEL = 2,
NFP_FLOWER_CMSG_TYPE_LAG_CONFIG = 4,
NFP_FLOWER_CMSG_TYPE_PORT_REIFY = 6,
NFP_FLOWER_CMSG_TYPE_MAC_REPR = 7,
NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8,
......
/*
* Copyright (C) 2018 Netronome Systems, Inc.
*
* This software is dual licensed under the GNU General License Version 2,
* June 1991 as shown in the file COPYING in the top-level directory of this
* source tree or the BSD 2-Clause License provided below. You have the
* option to license this software under the complete terms of either license.
*
* The BSD 2-Clause License:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* 1. Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "main.h"
/* LAG group config flags. */
#define NFP_FL_LAG_LAST BIT(1)
#define NFP_FL_LAG_FIRST BIT(2)
#define NFP_FL_LAG_DATA BIT(3)
#define NFP_FL_LAG_XON BIT(4)
#define NFP_FL_LAG_SYNC BIT(5)
#define NFP_FL_LAG_SWITCH BIT(6)
#define NFP_FL_LAG_RESET BIT(7)
/* LAG port state flags. */
#define NFP_PORT_LAG_LINK_UP BIT(0)
#define NFP_PORT_LAG_TX_ENABLED BIT(1)
#define NFP_PORT_LAG_CHANGED BIT(2)
enum nfp_fl_lag_batch {
NFP_FL_LAG_BATCH_FIRST,
NFP_FL_LAG_BATCH_MEMBER,
NFP_FL_LAG_BATCH_FINISHED
};
/**
* struct nfp_flower_cmsg_lag_config - control message payload for LAG config
* @ctrl_flags: Configuration flags
* @reserved: Reserved for future use
* @ttl: Time to live of packet - host always sets to 0xff
* @pkt_number: Config message packet number - increment for each message
* @batch_ver: Batch version of messages - increment for each batch of messages
* @group_id: Group ID applicable
* @group_inst: Group instance number - increment when group is reused
* @members: Array of 32-bit words listing all active group members
*/
struct nfp_flower_cmsg_lag_config {
u8 ctrl_flags;
u8 reserved[2];
u8 ttl;
__be32 pkt_number;
__be32 batch_ver;
__be32 group_id;
__be32 group_inst;
__be32 members[];
};
/**
* struct nfp_fl_lag_group - list entry for each LAG group
* @group_id: Assigned group ID for host/kernel sync
* @group_inst: Group instance in case of ID reuse
* @list: List entry
* @master_ndev: Group master Netdev
* @dirty: Marked if the group needs synced to HW
* @offloaded: Marked if the group is currently offloaded to NIC
* @to_remove: Marked if the group should be removed from NIC
* @to_destroy: Marked if the group should be removed from driver
* @slave_cnt: Number of slaves in group
*/
struct nfp_fl_lag_group {
unsigned int group_id;
u8 group_inst;
struct list_head list;
struct net_device *master_ndev;
bool dirty;
bool offloaded;
bool to_remove;
bool to_destroy;
unsigned int slave_cnt;
};
#define NFP_FL_LAG_PKT_NUMBER_MASK GENMASK(30, 0)
#define NFP_FL_LAG_VERSION_MASK GENMASK(22, 0)
#define NFP_FL_LAG_HOST_TTL 0xff
/* Use this ID with zero members to ack a batch config */
#define NFP_FL_LAG_SYNC_ID 0
#define NFP_FL_LAG_GROUP_MIN 1 /* ID 0 reserved */
#define NFP_FL_LAG_GROUP_MAX 32 /* IDs 1 to 31 are valid */
/* wait for more config */
#define NFP_FL_LAG_DELAY (msecs_to_jiffies(2))
#define NFP_FL_LAG_RETRANS_LIMIT 100 /* max retrans cmsgs to store */
static unsigned int nfp_fl_get_next_pkt_number(struct nfp_fl_lag *lag)
{
lag->pkt_num++;
lag->pkt_num &= NFP_FL_LAG_PKT_NUMBER_MASK;
return lag->pkt_num;
}
static void nfp_fl_increment_version(struct nfp_fl_lag *lag)
{
/* LSB is not considered by firmware so add 2 for each increment. */
lag->batch_ver += 2;
lag->batch_ver &= NFP_FL_LAG_VERSION_MASK;
/* Zero is reserved by firmware. */
if (!lag->batch_ver)
lag->batch_ver += 2;
}
static struct nfp_fl_lag_group *
nfp_fl_lag_group_create(struct nfp_fl_lag *lag, struct net_device *master)
{
struct nfp_fl_lag_group *group;
struct nfp_flower_priv *priv;
int id;
priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
id = ida_simple_get(&lag->ida_handle, NFP_FL_LAG_GROUP_MIN,
NFP_FL_LAG_GROUP_MAX, GFP_KERNEL);
if (id < 0) {
nfp_flower_cmsg_warn(priv->app,
"No more bonding groups available\n");
return ERR_PTR(id);
}
group = kmalloc(sizeof(*group), GFP_KERNEL);
if (!group) {
ida_simple_remove(&lag->ida_handle, id);
return ERR_PTR(-ENOMEM);
}
group->group_id = id;
group->master_ndev = master;
group->dirty = true;
group->offloaded = false;
group->to_remove = false;
group->to_destroy = false;
group->slave_cnt = 0;
group->group_inst = ++lag->global_inst;
list_add_tail(&group->list, &lag->group_list);
return group;
}
static struct nfp_fl_lag_group *
nfp_fl_lag_find_group_for_master_with_lag(struct nfp_fl_lag *lag,
struct net_device *master)
{
struct nfp_fl_lag_group *entry;
if (!master)
return NULL;
list_for_each_entry(entry, &lag->group_list, list)
if (entry->master_ndev == master)
return entry;
return NULL;
}
int nfp_flower_lag_populate_pre_action(struct nfp_app *app,
struct net_device *master,
struct nfp_fl_pre_lag *pre_act)
{
struct nfp_flower_priv *priv = app->priv;
struct nfp_fl_lag_group *group = NULL;
__be32 temp_vers;
mutex_lock(&priv->nfp_lag.lock);
group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag,
master);
if (!group) {
mutex_unlock(&priv->nfp_lag.lock);
return -ENOENT;
}
pre_act->group_id = cpu_to_be16(group->group_id);
temp_vers = cpu_to_be32(priv->nfp_lag.batch_ver <<
NFP_FL_PRE_LAG_VER_OFF);
memcpy(pre_act->lag_version, &temp_vers, 3);
pre_act->instance = group->group_inst;
mutex_unlock(&priv->nfp_lag.lock);
return 0;
}
int nfp_flower_lag_get_output_id(struct nfp_app *app, struct net_device *master)
{
struct nfp_flower_priv *priv = app->priv;
struct nfp_fl_lag_group *group = NULL;
int group_id = -ENOENT;
mutex_lock(&priv->nfp_lag.lock);
group = nfp_fl_lag_find_group_for_master_with_lag(&priv->nfp_lag,
master);
if (group)
group_id = group->group_id;
mutex_unlock(&priv->nfp_lag.lock);
return group_id;
}
static int
nfp_fl_lag_config_group(struct nfp_fl_lag *lag, struct nfp_fl_lag_group *group,
struct net_device **active_members,
unsigned int member_cnt, enum nfp_fl_lag_batch *batch)
{
struct nfp_flower_cmsg_lag_config *cmsg_payload;
struct nfp_flower_priv *priv;
unsigned long int flags;
unsigned int size, i;
struct sk_buff *skb;
priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
size = sizeof(*cmsg_payload) + sizeof(__be32) * member_cnt;
skb = nfp_flower_cmsg_alloc(priv->app, size,
NFP_FLOWER_CMSG_TYPE_LAG_CONFIG,
GFP_KERNEL);
if (!skb)
return -ENOMEM;
cmsg_payload = nfp_flower_cmsg_get_data(skb);
flags = 0;
/* Increment batch version for each new batch of config messages. */
if (*batch == NFP_FL_LAG_BATCH_FIRST) {
flags |= NFP_FL_LAG_FIRST;
nfp_fl_increment_version(lag);
*batch = NFP_FL_LAG_BATCH_MEMBER;
}
/* If it is a reset msg then it is also the end of the batch. */
if (lag->rst_cfg) {
flags |= NFP_FL_LAG_RESET;
*batch = NFP_FL_LAG_BATCH_FINISHED;
}
/* To signal the end of a batch, both the switch and last flags are set
* and the the reserved SYNC group ID is used.
*/
if (*batch == NFP_FL_LAG_BATCH_FINISHED) {
flags |= NFP_FL_LAG_SWITCH | NFP_FL_LAG_LAST;
lag->rst_cfg = false;