On Thu, Dec 03, 2020 at 10:13:10PM -0800, Song Liu wrote:

SNIP

> +#include "bpf_skel/bpf_prog_profiler.skel.h"
> +
> +static inline void *u64_to_ptr(__u64 ptr)
> +{
> +     return (void *)(unsigned long)ptr;
> +}
> +
> +static void set_max_rlimit(void)
> +{
> +     struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
> +
> +     setrlimit(RLIMIT_MEMLOCK, &rinf);
> +}
> +
> +static inline struct bpf_counter *bpf_counter_alloc(void)

why is this inlined?

SNIP

> +static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id)
> +{
> +     struct bpf_prog_profiler_bpf *skel;
> +     struct bpf_counter *counter;
> +     struct bpf_program *prog;
> +     char *prog_name;
> +     int prog_fd;
> +     int err;
> +
> +     prog_fd = bpf_prog_get_fd_by_id(prog_id);
> +     if (prog_fd < 0) {
> +             pr_debug("Failed to open fd for bpf prog %u\n", prog_id);
> +             return -1;
> +     }
> +     counter = bpf_counter_alloc();
> +     if (!counter)
> +             return -1;
> +
> +     skel = bpf_prog_profiler_bpf__open();
> +     if (!skel) {
> +             pr_debug("Failed to load bpf skeleton\n");

I'm still getting

[root@dell-r440-01 perf]# ./perf stat -b 38
libbpf: elf: skipping unrecognized data section(9) .eh_frame
libbpf: elf: skipping relo section(15) .rel.eh_frame for section(9) .eh_frame
libbpf: XXX is not found in vmlinux BTF
libbpf: failed to load object 'bpf_prog_profiler_bpf'
libbpf: failed to load BPF skeleton 'bpf_prog_profiler_bpf': -2
...

with id 38 being:

38: tracepoint  name sys_enter  tag 03418b72a610af75  gpl
        loaded_at 2020-12-07T22:54:05+0100  uid 0
        xlated 272B  jited 153B  memlock 4096B  map_ids 1

how is this supposed to work when there's XXX in the
program's section? libbpf is trying to find XXX in
kernel BTF and fails of course


> +             free(counter);
> +             return -1;
> +     }
> +     skel->rodata->num_cpu = evsel__nr_cpus(evsel);
> +
> +     bpf_map__resize(skel->maps.events, evsel__nr_cpus(evsel));
> +     bpf_map__resize(skel->maps.fentry_readings, 1);
> +     bpf_map__resize(skel->maps.accum_readings, 1);
> +

SNIP

> +static int bpf_program_profiler__read(struct evsel *evsel)
> +{
> +     int num_cpu = evsel__nr_cpus(evsel);
> +     struct bpf_perf_event_value values[num_cpu];
> +     struct bpf_counter *counter;
> +     int reading_map_fd;
> +     __u32 key = 0;
> +     int err, cpu;
> +
> +     if (list_empty(&evsel->bpf_counter_list))
> +             return -EAGAIN;
> +
> +     for (cpu = 0; cpu < num_cpu; cpu++) {
> +             perf_counts(evsel->counts, cpu, 0)->val = 0;
> +             perf_counts(evsel->counts, cpu, 0)->ena = 0;
> +             perf_counts(evsel->counts, cpu, 0)->run = 0;
> +     }
> +     list_for_each_entry(counter, &evsel->bpf_counter_list, list) {
> +             struct bpf_prog_profiler_bpf *skel = counter->skel;
> +
> +             reading_map_fd = bpf_map__fd(skel->maps.accum_readings);
> +
> +             err = bpf_map_lookup_elem(reading_map_fd, &key, values);
> +             if (err) {
> +                     fprintf(stderr, "failed to read value\n");
> +                     return err;
> +             }
> +
> +             for (cpu = 0; cpu < num_cpu; cpu++) {
> +                     perf_counts(evsel->counts, cpu, 0)->val += 
> values[cpu].counter;
> +                     perf_counts(evsel->counts, cpu, 0)->ena += 
> values[cpu].enabled;
> +                     perf_counts(evsel->counts, cpu, 0)->run += 
> values[cpu].running;
> +             }

so we sum everything up for all provided bpf IDs,
should we count/display them separately?

SNIP

> +SEC("fentry/XXX")
> +int BPF_PROG(fentry_XXX)
> +{
> +     __u32 key = bpf_get_smp_processor_id();
> +     struct bpf_perf_event_value reading;
> +     struct bpf_perf_event_value *ptr;
> +     __u32 zero = 0;
> +     long err;
> +
> +     /* look up before reading, to reduce error */
> +     ptr = bpf_map_lookup_elem(&fentry_readings, &zero);
> +     if (!ptr)
> +             return 0;
> +
> +     err = bpf_perf_event_read_value(&events, key, &reading,
> +                                     sizeof(reading));

can't we read directly to ptr in here?

SNIP

>       /* THREAD and SYSTEM/CPU are mutually exclusive */
>       if (target->per_thread && (target->system_wide || target->cpu_list)) {
>               target->per_thread = false;
> @@ -109,6 +137,10 @@ static const char *target__error_str[] = {
>       "PID/TID switch overriding SYSTEM",
>       "UID switch overriding SYSTEM",
>       "SYSTEM/CPU switch overriding PER-THREAD",
> +     "BPF switch overriding CPU",
> +     "BPF switch overriding PID/TID",
> +     "BPF switch overriding UID",
> +     "BPF switch overriding THREAD",
>       "Invalid User: %s",
>       "Problems obtaining information for user %s",
>  };
> @@ -134,7 +166,7 @@ int target__strerror(struct target *target, int errnum,
>  
>       switch (errnum) {
>       case TARGET_ERRNO__PID_OVERRIDE_CPU ...
> -          TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD:

hum, this should stay, no?

thanks,
jirka

> +          TARGET_ERRNO__BPF_OVERRIDE_THREAD:
>               snprintf(buf, buflen, "%s", msg);
>               break;
>  
> diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h

SNIP

Reply via email to