Hi! I’ve been trying to follow examples from Systems Performance by Brendan Gregg and got stuck in runqlat.bt
from bpftrace
and runqlat
from bcc
:
❯ sudo runqlat.bt
WARNING: Could not find kernel headers in or . To specify a particular path to kernel headers, set the env variables BPFTRACE_KERNEL_SOURCE and, optionally, BPFTRACE_KERNEL_BUILD if the kernel was built in a different directory than its source. To create kernel headers run 'modprobe kheaders', which will create a tar file at /sys/kernel/kheaders.tar.xz
definitions.h:2:10: fatal error: 'linux/sched.h' file not found
Or runqlat
from bcc
:
❯ sudo runqlat
libbpf: failed to find valid kernel BTF
In file included from /virtual/main.c:2:
In file included from include/uapi/linux/ptrace.h:145:
In file included from arch/x86/include/asm/ptrace.h:5:
In file included from arch/x86/include/asm/segment.h:7:
arch/x86/include/asm/ibt.h:55:8: warning: 'nocf_check' attribute ignored; use -fcf-protection to enable the attribute [-Wignored-attributes]
extern __noendbr u64 ibt_save(bool disable);
^
arch/x86/include/asm/ibt.h:20:34: note: expanded from macro '__noendbr'
#define __noendbr __attribute__((nocf_check))
^
arch/x86/include/asm/ibt.h:56:8: warning: 'nocf_check' attribute ignored; use -fcf-protection to enable the attribute [-Wignored-attributes]
extern __noendbr void ibt_restore(u64 save);
^
arch/x86/include/asm/ibt.h:20:34: note: expanded from macro '__noendbr'
#define __noendbr __attribute__((nocf_check))
^
/virtual/main.c:94:15: error: no member named 'state' in 'struct task_struct'; did you mean 'stats'?
if (prev->state == TASK_RUNNING) {
^~~~~
stats
include/linux/sched.h:561:34: note: 'stats' declared here
struct sched_statistics stats;
^
/virtual/main.c:94:21: error: invalid operands to binary expression ('struct sched_statistics' and 'int')
if (prev->state == TASK_RUNNING) {
~~~~~~~~~~~ ^ ~~~~~~~~~~~~
2 warnings and 2 errors generated.
Traceback (most recent call last):
File "/nix/store/az8cga3nq065w10aifl3f9b7ix4319z4-bcc-0.24.0/share/bcc/tools/.runqlat-wrapped", line 296, in <module>
b = BPF(text=bpf_text)
File "/nix/store/az8cga3nq065w10aifl3f9b7ix4319z4-bcc-0.24.0/lib/python3.10/site-packages/bcc/__init__.py", line 475, in __init__
raise Exception("Failed to compile BPF module %s" % (src_file or "<text>"))
Exception: Failed to compile BPF module <text>
I’m able to make it work by changing the STATE_FIELD
to __state
which made me check what BPF.kernel_struct_has_field(b'task_struct', b'__state')
does and returns:
❯ rg STATE_FIELD -C 5 $(nix-locate .runqlat-wrapped -p bcc | awk '{print $4}')
153-int trace_run(struct pt_regs *ctx, struct task_struct *prev)
154-{
155- u32 pid, tgid;
156-
157- // ivcsw: treat like an enqueue event and store timestamp
158: if (prev->STATE_FIELD == TASK_RUNNING) {
159- tgid = prev->tgid;
160- pid = prev->pid;
161- if (!(FILTER || pid == 0)) {
162- u64 ts = bpf_ktime_get_ns();
163- start.update(&pid, &ts);
--
207- struct task_struct *prev = (struct task_struct *)ctx->args[1];
208- struct task_struct *next = (struct task_struct *)ctx->args[2];
209- u32 pid, tgid;
210-
211- // ivcsw: treat like an enqueue event and store timestamp
212: if (prev->STATE_FIELD == TASK_RUNNING) {
213- tgid = prev->tgid;
214- pid = prev->pid;
215- if (!(FILTER || pid == 0)) {
216- u64 ts = bpf_ktime_get_ns();
217- start.update(&pid, &ts);
--
246-else:
247- bpf_text += bpf_text_kprobe
248-
249-# code substitutions
250-if BPF.kernel_struct_has_field(b'task_struct', b'__state') == 1:
251: bpf_text = bpf_text.replace('STATE_FIELD', '__state')
252-else:
253: bpf_text = bpf_text.replace('STATE_FIELD', 'state')
254-if args.pid:
255- # pid from userspace point of view is thread group from kernel pov
256- bpf_text = bpf_text.replace('FILTER', 'tgid != %s' % args.pid)
257-else:
258- bpf_text = bpf_text.replace('FILTER', '0')
It seems that it points to bcc/src/cc/libbpf.c at 6d3d8a2aca2772dbd91644462852206387e296f2 · iovisor/bcc · GitHub and the return value is -1
Which in turn suggests to me that the culprit for the issue is following line:
libbpf: failed to find valid kernel BTF
Logged in libbpf here
But debugging with strace
shows that the file exists:
openat(AT_FDCWD, "/sys/kernel/btf/vmlinux", O_RDONLY) = 3
libbpf: failed to find valid kernel BTF
What should I do to enable/change for the above tools to work on my host?
EDIT: I think it’s my configuration fault somehow (maybe zfs?) since running the same code in a simple vm works fine: nix vm using flake.md · GitHub