Commit 48516438 authored by David S. Miller's avatar David S. Miller
Browse files

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf



Pablo Neira Ayuso says:

====================
Netfilter/IPVS fixes for net

The following patchset contains ten Netfilter/IPVS fixes, they are:

1) Address refcount leak when creating an expectation from the ctnetlink
   interface.

2) Fix bug splat in the IDLETIMER target related to sysfs, from Dmitry
   Torokhov.

3) Resolve panic for unreachable route in IPVS with locally generated
   traffic in the output path, from Alex Gartrell.

4) Fix wrong source address in rare cases for tunneled traffic in IPVS,
   from Julian Anastasov.

5) Fix crash if scheduler is changed via ipvsadm -E, again from Julian.

6) Make sure skb->sk is unset for forwarded traffic through IPVS, again from
   Alex Gartrell.

7) Fix crash with IPVS sync protocol v0 and FTP, from Julian.

8) Reset sender cpu for forwarded traffic in IPVS, also from Julian.

9) Allocate template conntracks through kmalloc() to resolve netns dependency
   problems with the conntrack kmem_cache.

10) Fix zones with expectations that clash using the same tuple, from Joe
    Stringer.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents cc9f4daa 4b31814d
......@@ -291,7 +291,7 @@ extern unsigned int nf_conntrack_max;
extern unsigned int nf_conntrack_hash_rnd;
void init_nf_conntrack_hash_rnd(void);
void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl);
struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags);
#define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count)
#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
......
......@@ -68,7 +68,6 @@ struct ct_pcpu {
spinlock_t lock;
struct hlist_nulls_head unconfirmed;
struct hlist_nulls_head dying;
struct hlist_nulls_head tmpl;
};
struct netns_ct {
......
......@@ -319,7 +319,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* return *ignored=0 i.e. ICMP and NF_DROP
*/
sched = rcu_dereference(svc->scheduler);
dest = sched->schedule(svc, skb, iph);
if (sched) {
/* read svc->sched_data after svc->scheduler */
smp_rmb();
dest = sched->schedule(svc, skb, iph);
} else {
dest = NULL;
}
if (!dest) {
IP_VS_DBG(1, "p-schedule: no dest found.\n");
kfree(param.pe_data);
......@@ -467,7 +473,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
}
sched = rcu_dereference(svc->scheduler);
dest = sched->schedule(svc, skb, iph);
if (sched) {
/* read svc->sched_data after svc->scheduler */
smp_rmb();
dest = sched->schedule(svc, skb, iph);
} else {
dest = NULL;
}
if (dest == NULL) {
IP_VS_DBG(1, "Schedule: no dest found.\n");
return NULL;
......
......@@ -842,15 +842,16 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
__ip_vs_dst_cache_reset(dest);
spin_unlock_bh(&dest->dst_lock);
sched = rcu_dereference_protected(svc->scheduler, 1);
if (add) {
ip_vs_start_estimator(svc->net, &dest->stats);
list_add_rcu(&dest->n_list, &svc->destinations);
svc->num_dests++;
if (sched->add_dest)
sched = rcu_dereference_protected(svc->scheduler, 1);
if (sched && sched->add_dest)
sched->add_dest(svc, dest);
} else {
if (sched->upd_dest)
sched = rcu_dereference_protected(svc->scheduler, 1);
if (sched && sched->upd_dest)
sched->upd_dest(svc, dest);
}
}
......@@ -1084,7 +1085,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
struct ip_vs_scheduler *sched;
sched = rcu_dereference_protected(svc->scheduler, 1);
if (sched->del_dest)
if (sched && sched->del_dest)
sched->del_dest(svc, dest);
}
}
......@@ -1175,11 +1176,14 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
ip_vs_use_count_inc();
/* Lookup the scheduler by 'u->sched_name' */
sched = ip_vs_scheduler_get(u->sched_name);
if (sched == NULL) {
pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
ret = -ENOENT;
goto out_err;
if (strcmp(u->sched_name, "none")) {
sched = ip_vs_scheduler_get(u->sched_name);
if (!sched) {
pr_info("Scheduler module ip_vs_%s not found\n",
u->sched_name);
ret = -ENOENT;
goto out_err;
}
}
if (u->pe_name && *u->pe_name) {
......@@ -1240,10 +1244,12 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
spin_lock_init(&svc->stats.lock);
/* Bind the scheduler */
ret = ip_vs_bind_scheduler(svc, sched);
if (ret)
goto out_err;
sched = NULL;
if (sched) {
ret = ip_vs_bind_scheduler(svc, sched);
if (ret)
goto out_err;
sched = NULL;
}
/* Bind the ct retriever */
RCU_INIT_POINTER(svc->pe, pe);
......@@ -1291,17 +1297,20 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
static int
ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
{
struct ip_vs_scheduler *sched, *old_sched;
struct ip_vs_scheduler *sched = NULL, *old_sched;
struct ip_vs_pe *pe = NULL, *old_pe = NULL;
int ret = 0;
/*
* Lookup the scheduler, by 'u->sched_name'
*/
sched = ip_vs_scheduler_get(u->sched_name);
if (sched == NULL) {
pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
return -ENOENT;
if (strcmp(u->sched_name, "none")) {
sched = ip_vs_scheduler_get(u->sched_name);
if (!sched) {
pr_info("Scheduler module ip_vs_%s not found\n",
u->sched_name);
return -ENOENT;
}
}
old_sched = sched;
......@@ -1329,14 +1338,20 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
old_sched = rcu_dereference_protected(svc->scheduler, 1);
if (sched != old_sched) {
if (old_sched) {
ip_vs_unbind_scheduler(svc, old_sched);
RCU_INIT_POINTER(svc->scheduler, NULL);
/* Wait all svc->sched_data users */
synchronize_rcu();
}
/* Bind the new scheduler */
ret = ip_vs_bind_scheduler(svc, sched);
if (ret) {
old_sched = sched;
goto out;
if (sched) {
ret = ip_vs_bind_scheduler(svc, sched);
if (ret) {
ip_vs_scheduler_put(sched);
goto out;
}
}
/* Unbind the old scheduler on success */
ip_vs_unbind_scheduler(svc, old_sched);
}
/*
......@@ -1982,6 +1997,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest;
struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
char *sched_name = sched ? sched->name : "none";
if (iter->table == ip_vs_svc_table) {
#ifdef CONFIG_IP_VS_IPV6
......@@ -1990,18 +2006,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
ip_vs_proto_name(svc->protocol),
&svc->addr.in6,
ntohs(svc->port),
sched->name);
sched_name);
else
#endif
seq_printf(seq, "%s %08X:%04X %s %s ",
ip_vs_proto_name(svc->protocol),
ntohl(svc->addr.ip),
ntohs(svc->port),
sched->name,
sched_name,
(svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
} else {
seq_printf(seq, "FWM %08X %s %s",
svc->fwmark, sched->name,
svc->fwmark, sched_name,
(svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
}
......@@ -2427,13 +2443,15 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
struct ip_vs_scheduler *sched;
struct ip_vs_kstats kstats;
char *sched_name;
sched = rcu_dereference_protected(src->scheduler, 1);
sched_name = sched ? sched->name : "none";
dst->protocol = src->protocol;
dst->addr = src->addr.ip;
dst->port = src->port;
dst->fwmark = src->fwmark;
strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name));
strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
dst->flags = src->flags;
dst->timeout = src->timeout / HZ;
dst->netmask = src->netmask;
......@@ -2892,6 +2910,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
struct ip_vs_flags flags = { .flags = svc->flags,
.mask = ~0 };
struct ip_vs_kstats kstats;
char *sched_name;
nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
if (!nl_service)
......@@ -2910,8 +2929,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
}
sched = rcu_dereference_protected(svc->scheduler, 1);
sched_name = sched ? sched->name : "none";
pe = rcu_dereference_protected(svc->pe, 1);
if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) ||
if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) ||
(pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
......
......@@ -74,7 +74,7 @@ void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
if (sched->done_service)
sched->done_service(svc);
/* svc->scheduler can not be set to NULL */
/* svc->scheduler can be set to NULL only by caller */
}
......@@ -147,21 +147,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
{
struct ip_vs_scheduler *sched;
struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
char *sched_name = sched ? sched->name : "none";
sched = rcu_dereference(svc->scheduler);
if (svc->fwmark) {
IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
sched->name, svc->fwmark, svc->fwmark, msg);
sched_name, svc->fwmark, svc->fwmark, msg);
#ifdef CONFIG_IP_VS_IPV6
} else if (svc->af == AF_INET6) {
IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
sched->name, ip_vs_proto_name(svc->protocol),
sched_name, ip_vs_proto_name(svc->protocol),
&svc->addr.in6, ntohs(svc->port), msg);
#endif
} else {
IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
sched->name, ip_vs_proto_name(svc->protocol),
sched_name, ip_vs_proto_name(svc->protocol),
&svc->addr.ip, ntohs(svc->port), msg);
}
}
......
......@@ -612,7 +612,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
pkts = atomic_add_return(1, &cp->in_pkts);
else
pkts = sysctl_sync_threshold(ipvs);
ip_vs_sync_conn(net, cp->control, pkts);
ip_vs_sync_conn(net, cp, pkts);
}
}
......
......@@ -130,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = daddr;
fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
FLOWI_FLAG_KNOWN_NH : 0;
......@@ -505,6 +504,13 @@ err_put:
return -1;
err_unreach:
/* The ip6_link_failure function requires the dev field to be set
* in order to get the net (further for the sake of fwmark
* reflection).
*/
if (!skb->dev)
skb->dev = skb_dst(skb)->dev;
dst_link_failure(skb);
return -1;
}
......@@ -523,10 +529,27 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
if (ret == NF_ACCEPT) {
nf_reset(skb);
skb_forward_csum(skb);
if (!skb->sk)
skb_sender_cpu_clear(skb);
}
return ret;
}
/* In the event of a remote destination, it's possible that we would have
* matches against an old socket (particularly a TIME-WAIT socket). This
* causes havoc down the line (ip_local_out et. al. expect regular sockets
* and invalid memory accesses will happen) so simply drop the association
* in this case.
*/
static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb)
{
/* If dev is set, the packet came from the LOCAL_IN callback and
* not from a local TCP socket.
*/
if (skb->dev)
skb_orphan(skb);
}
/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
struct ip_vs_conn *cp, int local)
......@@ -538,12 +561,23 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
ip_vs_notrack(skb);
else
ip_vs_update_conntrack(skb, cp, 1);
/* Remove the early_demux association unless it's bound for the
* exact same port and address on this host after translation.
*/
if (!local || cp->vport != cp->dport ||
!ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr))
ip_vs_drop_early_demux_sk(skb);
if (!local) {
skb_forward_csum(skb);
if (!skb->sk)
skb_sender_cpu_clear(skb);
NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output_sk);
} else
ret = NF_ACCEPT;
return ret;
}
......@@ -557,7 +591,10 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
ip_vs_notrack(skb);
if (!local) {
ip_vs_drop_early_demux_sk(skb);
skb_forward_csum(skb);
if (!skb->sk)
skb_sender_cpu_clear(skb);
NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output_sk);
} else
......@@ -845,6 +882,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
struct ipv6hdr *old_ipv6h = NULL;
#endif
ip_vs_drop_early_demux_sk(skb);
if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
new_skb = skb_realloc_headroom(skb, max_headroom);
if (!new_skb)
......
......@@ -287,6 +287,46 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
spin_unlock(&pcpu->lock);
}
/* Released via destroy_conntrack() */
struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags)
{
struct nf_conn *tmpl;
tmpl = kzalloc(sizeof(struct nf_conn), GFP_KERNEL);
if (tmpl == NULL)
return NULL;
tmpl->status = IPS_TEMPLATE;
write_pnet(&tmpl->ct_net, net);
#ifdef CONFIG_NF_CONNTRACK_ZONES
if (zone) {
struct nf_conntrack_zone *nf_ct_zone;
nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, GFP_ATOMIC);
if (!nf_ct_zone)
goto out_free;
nf_ct_zone->id = zone;
}
#endif
atomic_set(&tmpl->ct_general.use, 0);
return tmpl;
#ifdef CONFIG_NF_CONNTRACK_ZONES
out_free:
kfree(tmpl);
return NULL;
#endif
}
EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
static void nf_ct_tmpl_free(struct nf_conn *tmpl)
{
nf_ct_ext_destroy(tmpl);
nf_ct_ext_free(tmpl);
kfree(tmpl);
}
static void
destroy_conntrack(struct nf_conntrack *nfct)
{
......@@ -298,6 +338,10 @@ destroy_conntrack(struct nf_conntrack *nfct)
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
NF_CT_ASSERT(!timer_pending(&ct->timeout));
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
return;
}
rcu_read_lock();
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto && l4proto->destroy)
......@@ -540,28 +584,6 @@ out:
}
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
/* deletion from this larval template list happens via nf_ct_put() */
void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
{
struct ct_pcpu *pcpu;
__set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
__set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
nf_conntrack_get(&tmpl->ct_general);
/* add this conntrack to the (per cpu) tmpl list */
local_bh_disable();
tmpl->cpu = smp_processor_id();
pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu);
spin_lock(&pcpu->lock);
/* Overload tuple linked list to put us in template list. */
hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&pcpu->tmpl);
spin_unlock_bh(&pcpu->lock);
}
EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);
/* Confirm a connection given skb; places it in hash table */
int
__nf_conntrack_confirm(struct sk_buff *skb)
......@@ -1751,7 +1773,6 @@ int nf_conntrack_init_net(struct net *net)
spin_lock_init(&pcpu->lock);
INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL);
INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL);
INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL);
}
net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
......
......@@ -219,7 +219,8 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
}
return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
nf_ct_zone(a->master) == nf_ct_zone(b->master);
}
static inline int expect_matches(const struct nf_conntrack_expect *a,
......
......@@ -2995,11 +2995,6 @@ ctnetlink_create_expect(struct net *net, u16 zone,
}
err = nf_ct_expect_related_report(exp, portid, report);
if (err < 0)
goto err_exp;
return 0;
err_exp:
nf_ct_expect_put(exp);
err_ct:
nf_ct_put(ct);
......
......@@ -349,12 +349,10 @@ static void __net_exit synproxy_proc_exit(struct net *net)
static int __net_init synproxy_net_init(struct net *net)
{
struct synproxy_net *snet = synproxy_pernet(net);
struct nf_conntrack_tuple t;
struct nf_conn *ct;
int err = -ENOMEM;
memset(&t, 0, sizeof(t));
ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL);
ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL);
if (IS_ERR(ct)) {
err = PTR_ERR(ct);
goto err1;
......@@ -365,7 +363,8 @@ static int __net_init synproxy_net_init(struct net *net)
if (!nfct_synproxy_ext_add(ct))
goto err2;
nf_conntrack_tmpl_insert(net, ct);
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
nf_conntrack_get(&ct->ct_general);
snet->tmpl = ct;
snet->stats = alloc_percpu(struct synproxy_stats);
......
......@@ -184,7 +184,6 @@ out:
static int xt_ct_tg_check(const struct xt_tgchk_param *par,
struct xt_ct_target_info_v1 *info)
{
struct nf_conntrack_tuple t;
struct nf_conn *ct;
int ret = -EOPNOTSUPP;
......@@ -202,8 +201,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
if (ret < 0)
goto err1;
memset(&t, 0, sizeof(t));
ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL);
ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL);
ret = PTR_ERR(ct);
if (IS_ERR(ct))
goto err2;
......@@ -227,8 +225,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
if (ret < 0)
goto err3;
}
nf_conntrack_tmpl_insert(par->net, ct);
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
nf_conntrack_get(&ct->ct_general);
out:
info->ct = ct;
return 0;
......
......@@ -126,6 +126,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
goto out;
}
sysfs_attr_init(&info->timer->attr.attr);
info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL);
if (!info->timer->attr.attr.name) {
ret = -ENOMEM;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment