BPF maps
struct syscall {
bool enabled;
};
bpf_map(syscalls, ARRAY, int, struct syscall, 512);
BPF maps: expanded
struct syscall {
bool enabled;
};
struct bpf_map __attribute__((section("maps"), used)) syscalls = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(struct syscall),
.max_entries = 512,
};
struct ____btf_map_syscalls {
int key;
struct syscall value;
};
struct ____btf_map_syscalls __attribute__((section(".maps.syscalls"), used))
____btf_map_syscalls = { }
bpftool
- Queries kernel about BPF
- progs
- maps
- map contents, lookups for values
perf trace
- BPF to collect syscall pointer payloads
- BPF maps for filtering
- And for telling what to collect
Putting a bpf prog + maps in place
# perf trace -a -e nanosleep sleep 100h
sleep/4227 nanosleep(0x7ffe0c7610b0, NULL) ...
gnome-terminal/2739 nanosleep(0x7ffffc45d9a0, 0x7ffffc45d9b0) = 0
gnome-terminal/2739 nanosleep(0x7ffffc45d9a0, 0x7ffffc45d9b0) = 0
gnome-terminal/2739 nanosleep(0x7ffffc45d9a0, 0x7ffffc45d9b0) = 0
bpftool progs
# bpftool prog | tail -6
547: tracepoint name sys_enter tag 819967866022f1e1 gpl
loaded_at 2019-01-26T00:33:15+0100 uid 0
xlated 528B jited 381B memlock 4096B map_ids 363,362,361
548: tracepoint name sys_exit tag c1bd85c092d6e4aa gpl
loaded_at 2019-01-26T00:33:15+0100 uid 0
xlated 256B jited 191B memlock 4096B map_ids 363,362
#
bpftool maps
# bpftool map | tail -6
361: perf_event_array name __augmented_sys flags 0x0
key 4B value 4B max_entries 8 memlock 4096B
362: array name syscalls flags 0x0
key 4B value 1B max_entries 512 memlock 8192B
363: hash name pids_filtered flags 0x0
key 4B value 1B max_entries 64 memlock 8192B
#
Dumping contents of a map
# grep -w nanosleep /tmp/build/perf/arch/x86/include/generated/asm/syscalls_64.c
[35] = "nanosleep",
#
dumping contents of a map
# bpftool map dump id 362 | head -40 | tail -10
key: 1e 00 00 00 value: 00
key: 1f 00 00 00 value: 00
key: 20 00 00 00 value: 00
key: 21 00 00 00 value: 00
key: 22 00 00 00 value: 00
key: 23 00 00 00 value: 01
key: 24 00 00 00 value: 00
key: 25 00 00 00 value: 00
key: 26 00 00 00 value: 00
key: 27 00 00 00 value: 00
#
bpftool map lookup
# bpftool map lookup id 362 key 35
Error: key expected 4 bytes got 1
# bpftool map lookup id 362 key 35 00 00 00
key: 23 00 00 00 value: 01
#
Generating BTF info
- DaveM notices CTF in a Solaris kernel image
- Hands me an initial .h with the main definitions
- pahole gets refactored to support multiple formats
- CTF being the first DWARF companion
- 10 years later: BPF needs this
- BTF
- llvm generates it as well, directly
pahole BTF encoder
- Implemented by Martin Lau @ FB
- Starting from ctf_loader.c
- dwarf loader
- Reads DWARF tags
- Intermediate format
- BTF encoder uses it
- Inserts a new .BTF ELF section
Encoding
$ cat test.c
struct A {
char b;
int a;
};
int test(struct A *t)
{
return t->a;
}
$ gcc -g -c test.c
$ file test.o
test.o: ELF 64-bit LSB relocatable, x86-64, version 1 (SYSV), with debug_info, not stripped
$
DWARF ELF sections
$ readelf -SW test.o | grep \.debug
[Nr] Name Type Addr Off Size
[ 4] .debug_info PROGBITS 0000 051 94
[ 5] .rela.debug_info RELA 0000 408 d8
[ 6] .debug_abbrev PROGBITS 0000 0e5 84
[ 7] .debug_aranges PROGBITS 0000 169 30
[ 8] .rela.debug_aranges RELA 0000 4e0 30
[ 9] .debug_line PROGBITS 0000 199 40
[10] .rela.debug_line RELA 0000 510 18
[11] .debug_str PROGBITS 0000 1d9 69
$
pahole using DWARF
$ pahole test.o
struct A {
char b; /* 0 1 */
/* XXX 3 bytes hole, try to pack */
int a; /* 4 4 */
/* size: 8, cachelines: 1, members: 2 */
/* sum members: 5, holes: 1, sum holes: 3 */
/* last cacheline: 8 bytes */
};
$
pahole encoding BTF
$ pahole -JV test.o
File test.o:
[1] STRUCT A kind_flag=0 size=8 vlen=2
b type_id=2 bits_offset=0
a type_id=3 bits_offset=32
[2] INT char size=1 bit_offset=0 nr_bits=8 encoding=(none)
[3] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[4] PTR (anon) type_id=1
$
BTF ELF section
$ readelf -SW test.o | grep \.BTF
[Nr] Name Type Addr Off Size
[19] .BTF PROGBITS 0000 619 7f
$
clang 8 generating BTF
$ clang -target bpf -g -c test.c
$ file test.o
test.o: ELF 64-bit LSB relocatable, eBPF, version 1 (SYSV), with debug_info, not stripped
$ readelf -SW test.o | grep \.BTF
[Nr] Name Type Addr Off Size
[ 8] .BTF PROGBITS 0000 23d dd
[ 9] .BTF.ext PROGBITS 0000 31a 68
[10] .rel.BTF.ext REL 0000 670 40
$
pahole BTF loader
- Reads BTF tags
- Intermediate format
- pretty prints
pahole decoding BTF
$ pahole -F btf test.o
struct A {
char b; /* 0 1 */
/* XXX 3 bytes hole, try to pack */
int a; /* 4 4 */
/* size: 8, cachelines: 1, members: 2 */
/* sum members: 5, holes: 1, sum holes: 3 */
/* last cacheline: 8 bytes */
};
$
kernel loading BTF
- libbpf notices __btf_map_MAP_NAME
- In a ".maps." prefixed ELF section
- Collects that BTF data
- sys_bpf(fd, BPF_BTF_LOAD, btf_data)
btf_map(name)
#define bpf_map(name, _type, type_key, type_val, _max_entries) \
struct bpf_map SEC("maps") name = { \
.type = BPF_MAP_TYPE_##_type, \
.key_size = sizeof(type_key), \
.value_size = sizeof(type_val), \
.max_entries = _max_entries, \
}; \
struct ____btf_map_##name { \
type_key key; \
type_val value; \
}; \
struct ____btf_map_##name __attribute__((section(".maps." #name), used)) \
____btf_map_##name = { }
kernel validates BTF
- Validates header
- BTF_MAGIC
- BTF_VERSION
- flags
Some validations performed
$ grep btf_verifier_log kernel/bpf/btf.c
btf_verifier_log(env, "Exceeded max num of types");
btf_verifier_log_type(env, t, "nr_bits exceeds %zu",
btf_verifier_log_type(env, t, "nr_bits exceeds type_size");
btf_verifier_log_type(env, t, "Unsupported encoding");
btf_verifier_log_type(env, t, "Invalid type_id");
btf_verifier_log_type(env, t, "Invalid name");
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
btf_verifier_log_type(env, t, "Expected size:%zu",
btf_verifier_log_type(env, t, "Loop detected");
btf_verifier_log(env, "Unaligned type_off");
btf_verifier_log(env, "No type found");
btf_verifier_log(env, "String section is not at the end");
btf_verifier_log(env, "Invalid string section");
btf_verifier_log(env, "Section overlap found");
btf_verifier_log(env, "Total section length too long");
btf_verifier_log(env, "Unsupported section found");
btf_verifier_log(env, "hdr_len not found");
btf_verifier_log(env, "btf_header not found");
btf_verifier_log(env, "Unsupported btf_header");
btf_verifier_log(env, "Invalid magic");
btf_verifier_log(env, "Unsupported version");
btf_verifier_log(env, "Unsupported flags");
$
kernel validating BTF (excerpts)
# perf ftrace -G '*btf*' perf trace -e *sleep sleep 1
7) | bpf_btf_load() {
7) | capable() {
7) 1.527 us | }
7) | btf_new_fd() {
7) 0.101 us | btf_sec_info_cmp();
7) | btf_struct_check_meta() {
7) 0.135 us | btf_name_valid_identifier.isra.12();
7) 0.109 us | __btf_verifier_log_type();
7) 0.107 us | btf_name_valid_identifier.isra.12();
7) 0.108 us | btf_verifier_log_member();
7) 3.642 us | }
7) | btf_int_check_meta() {
7) 0.100 us | __btf_verifier_log_type();
7) 0.315 us | }
7) | btf_ref_type_check_meta()
7) + 49.743 us | }
btfdiff
- pahole -F btf file.o
- pahole -F dwarf --flat_arrays file.o
- diff
- Should produce the same results
- Regression tests