Commit f5d28712 authored by David S. Miller's avatar David S. Miller
Browse files

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf



Daniel Borkmann says:

====================
pull-request: bpf 2021-05-26

The following pull-request contains BPF updates for your *net* tree.

We've added 14 non-merge commits during the last 14 day(s) which contain
a total of 17 files changed, 513 insertions(+), 231 deletions(-).

The main changes are:

1) Fix bpf_skb_change_head() helper to reset mac_len, from Jussi Maki.

2) Fix masking direction swap upon off-reg sign change, from Daniel Borkmann.

3) Fix BPF offloads in verifier by reordering driver callback, from Yinjun Zhang.

4) BPF selftest for ringbuf mmap ro/rw restrictions, from Andrii Nakryiko.

5) Follow-up fixes to nested bprintf per-cpu buffers, from Florent Revest.

6) Fix bpftool sock_release attach point help info, from Liu Jian.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 6dfa87b4 1bad6fd5
# SPDX-License-Identifier: GPL-2.0-only
obj-y += kernel/ mm/
obj-$(CONFIG_NET) += net/
obj-y += kernel/ mm/ net/
obj-$(CONFIG_KVM) += kvm/
obj-$(CONFIG_XEN) += xen/
obj-$(CONFIG_CRYPTO) += crypto/
......@@ -37,6 +37,7 @@ config BPF_SYSCALL
config BPF_JIT
bool "Enable BPF Just In Time compiler"
depends on BPF
depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
depends on MODULES
help
......
......@@ -107,10 +107,12 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_inode_storage_get_proto;
case BPF_FUNC_inode_storage_delete:
return &bpf_inode_storage_delete_proto;
#ifdef CONFIG_NET
case BPF_FUNC_sk_storage_get:
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &bpf_sk_storage_delete_proto;
#endif /* CONFIG_NET */
case BPF_FUNC_spin_lock:
return &bpf_spin_lock_proto;
case BPF_FUNC_spin_unlock:
......
......@@ -692,13 +692,15 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
return -EINVAL;
}
/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p
/* Per-cpu temp buffers used by printf-like helpers to store the bprintf binary
* arguments representation.
*/
#define MAX_PRINTF_BUF_LEN 512
#define MAX_BPRINTF_BUF_LEN 512
/* Support executing three nested bprintf helper calls on a given CPU */
#define MAX_BPRINTF_NEST_LEVEL 3
struct bpf_bprintf_buffers {
char tmp_bufs[3][MAX_PRINTF_BUF_LEN];
char tmp_bufs[MAX_BPRINTF_NEST_LEVEL][MAX_BPRINTF_BUF_LEN];
};
static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs);
static DEFINE_PER_CPU(int, bpf_bprintf_nest_level);
......@@ -710,7 +712,7 @@ static int try_get_fmt_tmp_buf(char **tmp_buf)
preempt_disable();
nest_level = this_cpu_inc_return(bpf_bprintf_nest_level);
if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bufs->tmp_bufs))) {
if (WARN_ON_ONCE(nest_level > MAX_BPRINTF_NEST_LEVEL)) {
this_cpu_dec(bpf_bprintf_nest_level);
preempt_enable();
return -EBUSY;
......@@ -761,7 +763,7 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
if (num_args && try_get_fmt_tmp_buf(&tmp_buf))
return -EBUSY;
tmp_buf_end = tmp_buf + MAX_PRINTF_BUF_LEN;
tmp_buf_end = tmp_buf + MAX_BPRINTF_BUF_LEN;
*bin_args = (u32 *)tmp_buf;
}
......
......@@ -6409,18 +6409,10 @@ enum {
};
static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
const struct bpf_reg_state *off_reg,
u32 *alu_limit, u8 opcode)
u32 *alu_limit, bool mask_to_left)
{
bool off_is_neg = off_reg->smin_value < 0;
bool mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
(opcode == BPF_SUB && !off_is_neg);
u32 max = 0, ptr_limit = 0;
if (!tnum_is_const(off_reg->var_off) &&
(off_reg->smin_value < 0) != (off_reg->smax_value < 0))
return REASON_BOUNDS;
switch (ptr_reg->type) {
case PTR_TO_STACK:
/* Offset 0 is out-of-bounds, but acceptable start for the
......@@ -6486,15 +6478,20 @@ static bool sanitize_needed(u8 opcode)
return opcode == BPF_ADD || opcode == BPF_SUB;
}
struct bpf_sanitize_info {
struct bpf_insn_aux_data aux;
bool mask_to_left;
};
static int sanitize_ptr_alu(struct bpf_verifier_env *env,
struct bpf_insn *insn,
const struct bpf_reg_state *ptr_reg,
const struct bpf_reg_state *off_reg,
struct bpf_reg_state *dst_reg,
struct bpf_insn_aux_data *tmp_aux,
struct bpf_sanitize_info *info,
const bool commit_window)
{
struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux;
struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
struct bpf_verifier_state *vstate = env->cur_state;
bool off_is_imm = tnum_is_const(off_reg->var_off);
bool off_is_neg = off_reg->smin_value < 0;
......@@ -6515,7 +6512,16 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
if (vstate->speculative)
goto do_sim;
err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode);
if (!commit_window) {
if (!tnum_is_const(off_reg->var_off) &&
(off_reg->smin_value < 0) != (off_reg->smax_value < 0))
return REASON_BOUNDS;
info->mask_to_left = (opcode == BPF_ADD && off_is_neg) ||
(opcode == BPF_SUB && !off_is_neg);
}
err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
if (err < 0)
return err;
......@@ -6523,8 +6529,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
/* In commit phase we narrow the masking window based on
* the observed pointer move after the simulated operation.
*/
alu_state = tmp_aux->alu_state;
alu_limit = abs(tmp_aux->alu_limit - alu_limit);
alu_state = info->aux.alu_state;
alu_limit = abs(info->aux.alu_limit - alu_limit);
} else {
alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
......@@ -6539,8 +6545,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
/* If we're in commit phase, we're done here given we already
* pushed the truncated dst_reg into the speculative verification
* stack.
*
* Also, when register is a known constant, we rewrite register-based
* operation to immediate-based, and thus do not need masking (and as
* a consequence, do not need to simulate the zero-truncation either).
*/
if (commit_window)
if (commit_window || off_is_imm)
return 0;
/* Simulate and find potential out-of-bounds access under
......@@ -6685,7 +6695,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
struct bpf_insn_aux_data tmp_aux = {};
struct bpf_sanitize_info info = {};
u8 opcode = BPF_OP(insn->code);
u32 dst = insn->dst_reg;
int ret;
......@@ -6754,7 +6764,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
if (sanitize_needed(opcode)) {
ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
&tmp_aux, false);
&info, false);
if (ret < 0)
return sanitize_err(env, insn, ret, off_reg, dst_reg);
}
......@@ -6895,7 +6905,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
return -EACCES;
if (sanitize_needed(opcode)) {
ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
&tmp_aux, true);
&info, true);
if (ret < 0)
return sanitize_err(env, insn, ret, off_reg, dst_reg);
}
......@@ -13368,12 +13378,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
if (is_priv)
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
if (bpf_prog_is_dev_bound(env->prog->aux)) {
ret = bpf_prog_offload_verifier_prep(env->prog);
if (ret)
goto skip_full_check;
}
env->explored_states = kvcalloc(state_htab_size(env),
sizeof(struct bpf_verifier_state_list *),
GFP_USER);
......@@ -13401,6 +13405,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
if (ret < 0)
goto skip_full_check;
if (bpf_prog_is_dev_bound(env->prog->aux)) {
ret = bpf_prog_offload_verifier_prep(env->prog);
if (ret)
goto skip_full_check;
}
ret = check_cfg(env);
if (ret < 0)
goto skip_full_check;
......
......@@ -3784,6 +3784,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
__skb_push(skb, head_room);
memset(skb->data, 0, head_room);
skb_reset_mac_header(skb);
skb_reset_mac_len(skb);
}
return ret;
......
......@@ -30,7 +30,8 @@ CGROUP COMMANDS
| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** |
| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** }
| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** |
| **sock_release** }
| *ATTACH_FLAGS* := { **multi** | **override** }
DESCRIPTION
......@@ -106,6 +107,7 @@ DESCRIPTION
**getpeername6** call to getpeername(2) for an inet6 socket (since 5.8);
**getsockname4** call to getsockname(2) for an inet4 socket (since 5.8);
**getsockname6** call to getsockname(2) for an inet6 socket (since 5.8).
**sock_release** closing an userspace inet socket (since 5.9).
**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
Detach *PROG* from the cgroup *CGROUP* and attach type
......
......@@ -44,7 +44,7 @@ PROG COMMANDS
| **cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** |
| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
| **cgroup/getsockopt** | **cgroup/setsockopt** |
| **cgroup/getsockopt** | **cgroup/setsockopt** | **cgroup/sock_release** |
| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
| }
| *ATTACH_TYPE* := {
......
......@@ -478,7 +478,7 @@ _bpftool()
cgroup/recvmsg4 cgroup/recvmsg6 \
cgroup/post_bind4 cgroup/post_bind6 \
cgroup/sysctl cgroup/getsockopt \
cgroup/setsockopt struct_ops \
cgroup/setsockopt cgroup/sock_release struct_ops \
fentry fexit freplace sk_lookup" -- \
"$cur" ) )
return 0
......@@ -1021,7 +1021,7 @@ _bpftool()
device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
getpeername4 getpeername6 getsockname4 getsockname6 \
sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
setsockopt'
setsockopt sock_release'
local ATTACH_FLAGS='multi override'
local PROG_TYPE='id pinned tag name'
case $prev in
......@@ -1032,7 +1032,7 @@ _bpftool()
ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
post_bind4|post_bind6|connect4|connect6|getpeername4|\
getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\
recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt)
recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release)
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
"$cur" ) )
return 0
......
......@@ -28,7 +28,8 @@
" connect6 | getpeername4 | getpeername6 |\n" \
" getsockname4 | getsockname6 | sendmsg4 |\n" \
" sendmsg6 | recvmsg4 | recvmsg6 |\n" \
" sysctl | getsockopt | setsockopt }"
" sysctl | getsockopt | setsockopt |\n" \
" sock_release }"
static unsigned int query_flags;
......
......@@ -2138,7 +2138,7 @@ static int do_help(int argc, char **argv)
" cgroup/getpeername4 | cgroup/getpeername6 |\n"
" cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
" cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
" cgroup/getsockopt | cgroup/setsockopt |\n"
" cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
" struct_ops | fentry | fexit | freplace | sk_lookup }\n"
" ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
" flow_dissector }\n"
......
......@@ -86,8 +86,9 @@ void test_ringbuf(void)
const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
pthread_t thread;
long bg_ret = -1;
int err, cnt;
int err, cnt, rb_fd;
int page_size = getpagesize();
void *mmap_ptr, *tmp_ptr;
skel = test_ringbuf__open();
if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
......@@ -101,6 +102,52 @@ void test_ringbuf(void)
if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
goto cleanup;
rb_fd = bpf_map__fd(skel->maps.ringbuf);
/* good read/write cons_pos */
mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos");
tmp_ptr = mremap(mmap_ptr, page_size, 2 * page_size, MREMAP_MAYMOVE);
if (!ASSERT_ERR_PTR(tmp_ptr, "rw_extend"))
goto cleanup;
ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect");
ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
/* bad writeable prod_pos */
mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, page_size);
err = -errno;
ASSERT_ERR_PTR(mmap_ptr, "wr_prod_pos");
ASSERT_EQ(err, -EPERM, "wr_prod_pos_err");
/* bad writeable data pages */
mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
err = -errno;
ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_one");
ASSERT_EQ(err, -EPERM, "wr_data_page_one_err");
mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 3 * page_size);
ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_two");
mmap_ptr = mmap(NULL, 2 * page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_all");
/* good read-only pages */
mmap_ptr = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED, rb_fd, 0);
if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
goto cleanup;
ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_WRITE), "write_protect");
ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_EXEC), "exec_protect");
ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "ro_remap");
ASSERT_OK(munmap(mmap_ptr, 4 * page_size), "unmap_ro");
/* good read-only pages with initial offset */
mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, page_size);
if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
goto cleanup;
ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_protect");
ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_protect");
ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 3 * page_size, MREMAP_MAYMOVE), "ro_remap");
ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro");
/* only trigger BPF program for current process */
skel->bss->pid = getpid();
......
......@@ -33,8 +33,8 @@
a.s6_addr32[3] == b.s6_addr32[3])
#endif
static volatile const __u32 IFINDEX_SRC;
static volatile const __u32 IFINDEX_DST;
volatile const __u32 IFINDEX_SRC;
volatile const __u32 IFINDEX_DST;
static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
__be32 addr)
......
......@@ -5,11 +5,16 @@
#include <linux/bpf.h>
#include <linux/stddef.h>
#include <linux/pkt_cls.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <bpf/bpf_helpers.h>
static volatile const __u32 IFINDEX_SRC;
static volatile const __u32 IFINDEX_DST;
volatile const __u32 IFINDEX_SRC;
volatile const __u32 IFINDEX_DST;
static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
SEC("classifier/chk_egress")
int tc_chk(struct __sk_buff *skb)
......@@ -29,4 +34,30 @@ int tc_src(struct __sk_buff *skb)
return bpf_redirect_peer(IFINDEX_DST, 0);
}
SEC("classifier/dst_ingress_l3")
int tc_dst_l3(struct __sk_buff *skb)
{
return bpf_redirect(IFINDEX_SRC, 0);
}
SEC("classifier/src_ingress_l3")
int tc_src_l3(struct __sk_buff *skb)
{
__u16 proto = skb->protocol;
if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
return TC_ACT_SHOT;
if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
return TC_ACT_SHOT;
if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
return TC_ACT_SHOT;
if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
return TC_ACT_SHOT;
return bpf_redirect_peer(IFINDEX_DST, 0);
}
char __license[] SEC("license") = "GPL";
......@@ -295,8 +295,6 @@
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
BPF_EXIT_INSN(),
},
.result_unpriv = REJECT,
.errstr_unpriv = "invalid write to stack R1 off=0 size=1",
.result = ACCEPT,
.retval = 42,
},
......
......@@ -300,8 +300,6 @@
},
.fixup_map_array_48b = { 3 },
.result = ACCEPT,
.result_unpriv = REJECT,
.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
.retval = 1,
},
{
......@@ -371,8 +369,6 @@
},
.fixup_map_array_48b = { 3 },
.result = ACCEPT,
.result_unpriv = REJECT,
.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
.retval = 1,
},
{
......@@ -472,8 +468,6 @@
},
.fixup_map_array_48b = { 3 },
.result = ACCEPT,
.result_unpriv = REJECT,
.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
.retval = 1,
},
{
......@@ -766,8 +760,6 @@
},
.fixup_map_array_48b = { 3 },
.result = ACCEPT,
.result_unpriv = REJECT,
.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
.retval = 1,
},
{
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment