Skip to content

Commit 291961f

Browse files
ahunter6acmel
authored andcommitted
perf script: Add API for filtering via dynamically loaded shared object
In some cases, users want to filter very large amounts of data (e.g. from AUX area tracing like Intel PT) looking for something specific. While scripting such as Python can be used, Python is 10 to 20 times slower than C. So define a C API so that custom filters can be written and loaded. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Leo Yan <leo.yan@linaro.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Link: https://lore.kernel.org/r/20210627131818.810-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
1 parent c435c16 commit 291961f

7 files changed

Lines changed: 780 additions & 2 deletions

File tree

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
perf-dlfilter(1)
2+
================
3+
4+
NAME
5+
----
6+
perf-dlfilter - Filter sample events using a dynamically loaded shared
7+
object file
8+
9+
SYNOPSIS
10+
--------
11+
[verse]
12+
'perf script' [--dlfilter file.so ]
13+
14+
DESCRIPTION
15+
-----------
16+
17+
This option is used to process data through a custom filter provided by a
18+
dynamically loaded shared object file.
19+
20+
If 'file.so' does not contain "/", then it will be found either in the current
21+
directory, or perf tools exec path which is ~/libexec/perf-core/dlfilters for
22+
a local build and install (refer perf --exec-path), or the dynamic linker
23+
paths.
24+
25+
API
26+
---
27+
28+
The API for filtering consists of the following:
29+
30+
[source,c]
31+
----
32+
#include <perf/perf_dlfilter.h>
33+
34+
const struct perf_dlfilter_fns perf_dlfilter_fns;
35+
36+
int start(void **data, void *ctx);
37+
int stop(void *data, void *ctx);
38+
int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx);
39+
----
40+
41+
If implemented, 'start' will be called at the beginning, before any
42+
calls to 'filter_event' . Return 0 to indicate success,
43+
or return a negative error code. '*data' can be assigned for use by other
44+
functions. 'ctx' is needed for calls to perf_dlfilter_fns, but most
45+
perf_dlfilter_fns are not valid when called from 'start'.
46+
47+
If implemented, 'stop' will be called at the end, after any calls to
48+
'filter_event'. Return 0 to indicate success, or
49+
return a negative error code. 'data' is set by 'start'. 'ctx' is needed
50+
for calls to perf_dlfilter_fns, but most perf_dlfilter_fns are not valid
51+
when called from 'stop'.
52+
53+
If implemented, 'filter_event' will be called for each sample event.
54+
Return 0 to keep the sample event, 1 to filter it out, or return a negative
55+
error code. 'data' is set by 'start'. 'ctx' is needed for calls to
56+
'perf_dlfilter_fns'.
57+
58+
The perf_dlfilter_sample structure
59+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
60+
61+
'filter_event' is passed a perf_dlfilter_sample
62+
structure, which contains the following fields:
63+
[source,c]
64+
----
65+
/*
66+
* perf sample event information (as per perf script and <linux/perf_event.h>)
67+
*/
68+
struct perf_dlfilter_sample {
69+
__u32 size; /* Size of this structure (for compatibility checking) */
70+
__u16 ins_lat; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
71+
__u16 p_stage_cyc; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
72+
__u64 ip;
73+
__s32 pid;
74+
__s32 tid;
75+
__u64 time;
76+
__u64 addr;
77+
__u64 id;
78+
__u64 stream_id;
79+
__u64 period;
80+
__u64 weight; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */
81+
__u64 transaction; /* Refer PERF_SAMPLE_TRANSACTION in <linux/perf_event.h> */
82+
__u64 insn_cnt; /* For instructions-per-cycle (IPC) */
83+
__u64 cyc_cnt; /* For instructions-per-cycle (IPC) */
84+
__s32 cpu;
85+
__u32 flags; /* Refer PERF_DLFILTER_FLAG_* above */
86+
__u64 data_src; /* Refer PERF_SAMPLE_DATA_SRC in <linux/perf_event.h> */
87+
__u64 phys_addr; /* Refer PERF_SAMPLE_PHYS_ADDR in <linux/perf_event.h> */
88+
__u64 data_page_size; /* Refer PERF_SAMPLE_DATA_PAGE_SIZE in <linux/perf_event.h> */
89+
__u64 code_page_size; /* Refer PERF_SAMPLE_CODE_PAGE_SIZE in <linux/perf_event.h> */
90+
__u64 cgroup; /* Refer PERF_SAMPLE_CGROUP in <linux/perf_event.h> */
91+
__u8 cpumode; /* Refer CPUMODE_MASK etc in <linux/perf_event.h> */
92+
__u8 addr_correlates_sym; /* True => resolve_addr() can be called */
93+
__u16 misc; /* Refer perf_event_header in <linux/perf_event.h> */
94+
__u32 raw_size; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
95+
const void *raw_data; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */
96+
__u64 brstack_nr; /* Number of brstack entries */
97+
const struct perf_branch_entry *brstack; /* Refer <linux/perf_event.h> */
98+
__u64 raw_callchain_nr; /* Number of raw_callchain entries */
99+
const __u64 *raw_callchain; /* Refer <linux/perf_event.h> */
100+
const char *event;
101+
};
102+
----
103+
104+
The perf_dlfilter_fns structure
105+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
106+
107+
The 'perf_dlfilter_fns' structure is populated with function pointers when the
108+
file is loaded. The functions can be called by 'filter_event'.
109+
110+
[source,c]
111+
----
112+
struct perf_dlfilter_fns {
113+
const struct perf_dlfilter_al *(*resolve_ip)(void *ctx);
114+
const struct perf_dlfilter_al *(*resolve_addr)(void *ctx);
115+
void *(*reserved[126])(void *);
116+
};
117+
----
118+
119+
'resolve_ip' returns information about ip.
120+
121+
'resolve_addr' returns information about addr (if addr_correlates_sym).
122+
123+
The perf_dlfilter_al structure
124+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
125+
126+
The 'perf_dlfilter_al' structure contains information about an address.
127+
128+
[source,c]
129+
----
130+
/*
131+
* Address location (as per perf script)
132+
*/
133+
struct perf_dlfilter_al {
134+
__u32 size; /* Size of this structure (for compatibility checking) */
135+
__u32 symoff;
136+
const char *sym;
137+
__u64 addr; /* Mapped address (from dso) */
138+
__u64 sym_start;
139+
__u64 sym_end;
140+
const char *dso;
141+
__u8 sym_binding; /* STB_LOCAL, STB_GLOBAL or STB_WEAK, refer <elf.h> */
142+
__u8 is_64_bit; /* Only valid if dso is not NULL */
143+
__u8 is_kernel_ip; /* True if in kernel space */
144+
__u32 buildid_size;
145+
__u8 *buildid;
146+
/* Below members are only populated by resolve_ip() */
147+
__u8 filtered; /* true if this sample event will be filtered out */
148+
const char *comm;
149+
};
150+
----
151+
152+
perf_dlfilter_sample flags
153+
~~~~~~~~~~~~~~~~~~~~~~~~~~
154+
155+
The 'flags' member of 'perf_dlfilter_sample' corresponds with the flags field
156+
of perf script. The bits of the flags are as follows:
157+
158+
[source,c]
159+
----
160+
/* Definitions for perf_dlfilter_sample flags */
161+
enum {
162+
PERF_DLFILTER_FLAG_BRANCH = 1ULL << 0,
163+
PERF_DLFILTER_FLAG_CALL = 1ULL << 1,
164+
PERF_DLFILTER_FLAG_RETURN = 1ULL << 2,
165+
PERF_DLFILTER_FLAG_CONDITIONAL = 1ULL << 3,
166+
PERF_DLFILTER_FLAG_SYSCALLRET = 1ULL << 4,
167+
PERF_DLFILTER_FLAG_ASYNC = 1ULL << 5,
168+
PERF_DLFILTER_FLAG_INTERRUPT = 1ULL << 6,
169+
PERF_DLFILTER_FLAG_TX_ABORT = 1ULL << 7,
170+
PERF_DLFILTER_FLAG_TRACE_BEGIN = 1ULL << 8,
171+
PERF_DLFILTER_FLAG_TRACE_END = 1ULL << 9,
172+
PERF_DLFILTER_FLAG_IN_TX = 1ULL << 10,
173+
PERF_DLFILTER_FLAG_VMENTRY = 1ULL << 11,
174+
PERF_DLFILTER_FLAG_VMEXIT = 1ULL << 12,
175+
};
176+
----
177+
178+
EXAMPLE
179+
-------
180+
181+
Filter out everything except branches from "foo" to "bar":
182+
183+
[source,c]
184+
----
185+
#include <perf/perf_dlfilter.h>
186+
#include <string.h>
187+
188+
const struct perf_dlfilter_fns perf_dlfilter_fns;
189+
190+
int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
191+
{
192+
const struct perf_dlfilter_al *al;
193+
const struct perf_dlfilter_al *addr_al;
194+
195+
if (!sample->ip || !sample->addr_correlates_sym)
196+
return 1;
197+
198+
al = perf_dlfilter_fns.resolve_ip(ctx);
199+
if (!al || !al->sym || strcmp(al->sym, "foo"))
200+
return 1;
201+
202+
addr_al = perf_dlfilter_fns.resolve_addr(ctx);
203+
if (!addr_al || !addr_al->sym || strcmp(addr_al->sym, "bar"))
204+
return 1;
205+
206+
return 0;
207+
}
208+
----
209+
210+
To build the shared object, assuming perf has been installed for the local user
211+
i.e. perf_dlfilter.h is in ~/include/perf :
212+
213+
gcc -c -I ~/include -fpic dlfilter-example.c
214+
gcc -shared -o dlfilter-example.so dlfilter-example.o
215+
216+
To use the filter with perf script:
217+
218+
perf script --dlfilter dlfilter-example.so
219+
220+
SEE ALSO
221+
--------
222+
linkperf:perf-script[1]

tools/perf/Documentation/perf-script.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ OPTIONS
9898
Generate perf-script.[ext] starter script for given language,
9999
using current perf.data.
100100

101+
--dlfilter=<file>::
102+
Filter sample events using the given shared object file.
103+
Refer linkperf:perf-dlfilter[1]
104+
101105
-a::
102106
Force system-wide collection. Scripts run without a <command>
103107
normally use -a by default, while scripts run with a <command>
@@ -483,4 +487,5 @@ include::itrace.txt[]
483487
SEE ALSO
484488
--------
485489
linkperf:perf-record[1], linkperf:perf-script-perl[1],
486-
linkperf:perf-script-python[1], linkperf:perf-intel-pt[1]
490+
linkperf:perf-script-python[1], linkperf:perf-intel-pt[1],
491+
linkperf:perf-dlfilter[1]

tools/perf/builtin-script.c

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
#include <subcmd/pager.h>
5656
#include <perf/evlist.h>
5757
#include <linux/err.h>
58+
#include "util/dlfilter.h"
5859
#include "util/record.h"
5960
#include "util/util.h"
6061
#include "perf.h"
@@ -79,6 +80,7 @@ static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
7980
static struct perf_stat_config stat_config;
8081
static int max_blocks;
8182
static bool native_arch;
83+
static struct dlfilter *dlfilter;
8284

8385
unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
8486

@@ -2175,6 +2177,7 @@ static int process_sample_event(struct perf_tool *tool,
21752177
struct perf_script *scr = container_of(tool, struct perf_script, tool);
21762178
struct addr_location al;
21772179
struct addr_location addr_al;
2180+
int ret = 0;
21782181

21792182
if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num,
21802183
sample->time)) {
@@ -2213,6 +2216,13 @@ static int process_sample_event(struct perf_tool *tool,
22132216
if (evswitch__discard(&scr->evswitch, evsel))
22142217
goto out_put;
22152218

2219+
ret = dlfilter__filter_event(dlfilter, event, sample, evsel, machine, &al, &addr_al);
2220+
if (ret) {
2221+
if (ret > 0)
2222+
ret = 0;
2223+
goto out_put;
2224+
}
2225+
22162226
if (scripting_ops) {
22172227
struct addr_location *addr_al_ptr = NULL;
22182228

@@ -2229,7 +2239,7 @@ static int process_sample_event(struct perf_tool *tool,
22292239

22302240
out_put:
22312241
addr_location__put(&al);
2232-
return 0;
2242+
return ret;
22332243
}
22342244

22352245
static int process_attr(struct perf_tool *tool, union perf_event *event,
@@ -3568,6 +3578,7 @@ int cmd_script(int argc, const char **argv)
35683578
};
35693579
struct utsname uts;
35703580
char *script_path = NULL;
3581+
const char *dlfilter_file = NULL;
35713582
const char **__argv;
35723583
int i, j, err = 0;
35733584
struct perf_script script = {
@@ -3615,6 +3626,7 @@ int cmd_script(int argc, const char **argv)
36153626
parse_scriptname),
36163627
OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
36173628
"generate perf-script.xx script in specified language"),
3629+
OPT_STRING(0, "dlfilter", &dlfilter_file, "file", "filter .so file name"),
36183630
OPT_STRING('i', "input", &input_name, "file", "input file name"),
36193631
OPT_BOOLEAN('d', "debug-mode", &debug_mode,
36203632
"do various checks like samples ordering and lost events"),
@@ -3933,6 +3945,12 @@ int cmd_script(int argc, const char **argv)
39333945
exit(-1);
39343946
}
39353947

3948+
if (dlfilter_file) {
3949+
dlfilter = dlfilter__new(dlfilter_file);
3950+
if (!dlfilter)
3951+
return -1;
3952+
}
3953+
39363954
if (!script_name) {
39373955
setup_pager();
39383956
use_browser = 0;
@@ -4032,6 +4050,10 @@ int cmd_script(int argc, const char **argv)
40324050
goto out_delete;
40334051
}
40344052

4053+
err = dlfilter__start(dlfilter, session);
4054+
if (err)
4055+
goto out_delete;
4056+
40354057
if (script_name) {
40364058
err = scripting_ops->start_script(script_name, argc, argv, session);
40374059
if (err)
@@ -4081,6 +4103,7 @@ int cmd_script(int argc, const char **argv)
40814103

40824104
if (script_started)
40834105
cleanup_scripting();
4106+
dlfilter__cleanup(dlfilter);
40844107
out:
40854108
return err;
40864109
}

tools/perf/util/Build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ perf-y += parse-regs-options.o
126126
perf-y += parse-sublevel-options.o
127127
perf-y += term.o
128128
perf-y += help-unknown-cmd.o
129+
perf-y += dlfilter.o
129130
perf-y += mem-events.o
130131
perf-y += vsprintf.o
131132
perf-y += units.o

0 commit comments

Comments
 (0)