diff --git a/src/common/ipc.h b/src/common/ipc.h index e945faf46a7fc71db6c867a4a9537d646831c213..b56b3a46ec17da4449ff24d9a91174d5f0640c1f 100644 --- a/src/common/ipc.h +++ b/src/common/ipc.h @@ -160,6 +160,7 @@ #define PROBE_RANGE_TPROFILING_ONCPU_SAMPLE 0x00000080 #define PROBE_RANGE_TPROFILING_MEM_USAGE 0x00000100 #define PROBE_RANGE_TPROFILING_MEM_GLIBC 0x00000200 +#define PROBE_RANGE_TPROFILING_OFFCPU 0x00000400 /* hardware subprobe define */ #define PROBE_RANGE_HW_NIC 0x00000001 diff --git a/src/lib/probe/probe_mng.c b/src/lib/probe/probe_mng.c index 24027ce99fc0a81c6be213d38980ca0634b24975..1547537a6935a464ee787fcbfaa4e05a611edc98 100644 --- a/src/lib/probe/probe_mng.c +++ b/src/lib/probe/probe_mng.c @@ -126,6 +126,7 @@ struct probe_range_define_s probe_range_define[] = { {PROBE_TP, "oncpu_sample", PROBE_RANGE_TPROFILING_ONCPU_SAMPLE}, {PROBE_TP, "mem_usage", PROBE_RANGE_TPROFILING_MEM_USAGE}, {PROBE_TP, "mem_glibc", PROBE_RANGE_TPROFILING_MEM_GLIBC}, + {PROBE_TP, "offcpu", PROBE_RANGE_TPROFILING_OFFCPU}, {PROBE_HW, "hw_nic", PROBE_RANGE_HW_NIC}, {PROBE_HW, "hw_mem", PROBE_RANGE_HW_MEM}, diff --git a/src/probes/extends/ebpf.probe/src/tprofilingprobe/bpf_prog.c b/src/probes/extends/ebpf.probe/src/tprofilingprobe/bpf_prog.c index c42532d7277d396550df87fec4bdd96e286e5ef2..95eccc9e3745229c294ee2a1410bb6ffd716da47 100644 --- a/src/probes/extends/ebpf.probe/src/tprofilingprobe/bpf_prog.c +++ b/src/probes/extends/ebpf.probe/src/tprofilingprobe/bpf_prog.c @@ -39,6 +39,7 @@ #include "syscall_lock.skel.h" #include "syscall_sched.skel.h" #include "oncpu.skel.h" +#include "offcpu.skel.h" #include "pygc.skel.h" #include "pthrd_sync.skel.h" #include "mem_glibc.skel.h" @@ -171,6 +172,50 @@ int load_oncpu_bpf_prog(struct ipc_body_s *ipc_body, struct bpf_prog_s *prog) return 0; } +static int __load_offcpu_bpf_prog(struct bpf_prog_s *prog, char is_load) +{ + int ret = 0; + + LOAD_OFFCPU_PROBE(offcpu, err, is_load, &tprofiler.pbMgmt); + if (is_load) { + prog->skels[prog->num].skel = offcpu_skel; + prog->skels[prog->num].fn = (skel_destroy_fn)offcpu_bpf__destroy; + prog->custom_btf_paths[prog->num] = offcpu_open_opts.btf_custom_path; + + int is_attach_tp = (probe_kernel_version() >= KERNEL_VERSION(6, 4, 0)); + PROG_ENABLE_ONLY_IF(offcpu, bpf_raw_trace_sched_switch, is_attach_tp); + PROG_ENABLE_ONLY_IF(offcpu, bpf_finish_task_switch, !is_attach_tp); + + LOAD_ATTACH(tprofiling, offcpu, err, is_load); + + ret = open_profiling_bpf_buffer(&tprofiler.pbMgmt); + if (ret) { + goto err; + } + + prog->num++; + } + + return ret; +err: + UNLOAD(offcpu); + CLEANUP_CUSTOM_BTF(offcpu); + return -1; +} + +int load_offcpu_bpf_prog(struct ipc_body_s *ipc_body, struct bpf_prog_s *prog) +{ + + char is_load_offcpu; + + is_load_offcpu = is_load_probe_ipc(ipc_body, PROBE_RANGE_TPROFILING_OFFCPU); + if (__load_offcpu_bpf_prog(prog, is_load_offcpu)) { + return -1; + } + + return 0; +} + proc_ubpf_link_t *proc_link_tbl = NULL; #define MAX_PROC_LINK_TBL_NUM 100 #define BPF_FUNC_NAME_LEN 64 @@ -883,6 +928,11 @@ int load_profiling_bpf_progs(struct ipc_body_s *ipc_body) TP_ERROR("Failed to load oncpu bpf prog\n"); goto err; } + ret = load_offcpu_bpf_prog(ipc_body, prog); + if (ret) { + TP_ERROR("Failed to load offcpu bpf prog\n"); + goto err; + } ret = load_pygc_bpf_prog(ipc_body, prog); if (ret) { TP_ERROR("Failed to load pygc bpf prog\n"); diff --git a/src/probes/extends/ebpf.probe/src/tprofilingprobe/bpf_prog.h b/src/probes/extends/ebpf.probe/src/tprofilingprobe/bpf_prog.h index 0bec0cc269d146a2b28145e967653cc6cc81abce..2ea8bf24e7f878b5b773f4d15896c9d2bc2975a3 100644 --- a/src/probes/extends/ebpf.probe/src/tprofilingprobe/bpf_prog.h +++ b/src/probes/extends/ebpf.probe/src/tprofilingprobe/bpf_prog.h @@ -79,6 +79,9 @@ #define LOAD_ONCPU_PROBE(probe_name, end, load, pbMgmt) \ LOAD_PROBE_COMMON(probe_name, end, load, pbMgmt) +#define LOAD_OFFCPU_PROBE(probe_name, end, load, pbMgmt) \ + LOAD_PROBE_COMMON_WITH_STACK(probe_name, end, load, pbMgmt); \ + #define LOAD_PYGC_PROBE(probe_name, end, load, pbMgmt) \ LOAD_PROBE_COMMON_WITH_STACK(probe_name, end, load, pbMgmt); \ LOAD_ATTACH(tprofiling, probe_name, end, load); diff --git a/src/probes/extends/ebpf.probe/src/tprofilingprobe/offcpu.bpf.c b/src/probes/extends/ebpf.probe/src/tprofilingprobe/offcpu.bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..f815ea557227a3cdcb11004f1d1f42736ae7de9b --- /dev/null +++ b/src/probes/extends/ebpf.probe/src/tprofilingprobe/offcpu.bpf.c @@ -0,0 +1,248 @@ +/* + * bpf code runs in the Linux kernel + * Copyright (c) Huawei Technologies Co., Ltd. 2024. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * SPDX-License-Identifier: GPL-2.0 + */ + +#ifdef BPF_PROG_USER +#undef BPF_PROG_USER +#endif +#define BPF_PROG_KERN +#include "bpf.h" +#include "tprofiling.h" + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(offcpu_m_enter_t)); + __uint(max_entries, MAX_SIZE_OF_THREAD); +} offcpu_enter_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(trace_event_data_t)); + __uint(max_entries, MAX_SIZE_OF_THREAD); +} offcpu_stash_map SEC(".maps"); + +static __always_inline void init_offcpu_event_common(trace_event_data_t *evt_data, + struct task_struct *task) +{ + evt_data->type = EVT_TYPE_OFFCPU; + evt_data->pid = BPF_CORE_READ(task, pid); + evt_data->tgid = BPF_CORE_READ(task, tgid); + bpf_core_read_str(evt_data->comm, sizeof(evt_data->comm), &task->comm); +} + +static __always_inline void init_offcpu_data(offcpu_data_t *offcpu_d, offcpu_m_enter_t *offcpu_enter, + void *ctx, int ctx_is_prev) +{ + offcpu_d->start_time = offcpu_enter->start_time; + offcpu_d->end_time = offcpu_enter->end_time; + offcpu_d->duration = offcpu_enter->end_time - offcpu_enter->start_time; + offcpu_d->count = 1; + __builtin_memset(&offcpu_d->stats_stack, 0, sizeof(struct stats_stack_elem)); + + + if (ctx_is_prev == 0) { + stats_append_stack(&offcpu_d->stats_stack, 0, ctx); + } else { + (void)__builtin_memcpy(&offcpu_d->stats_stack, &offcpu_enter->stats_stack, sizeof(struct stats_stack_elem)); + } +} + +static __always_inline trace_event_data_t *create_offcpu_event(offcpu_m_enter_t *offcpu_enter, struct task_struct *task, + void *ctx, int ctx_is_prev) +{ + trace_event_data_t *evt_data; + + evt_data = new_trace_event(); + if (!evt_data) { + return NULL; + } + init_offcpu_event_common(evt_data, task); + init_offcpu_data(&evt_data->offcpu_d, offcpu_enter, ctx, ctx_is_prev); + + return evt_data; +} + +static __always_inline void emit_incomming_offcpu_event(offcpu_m_enter_t *offcpu_enter, struct task_struct *task, + void *ctx, int ctx_is_prev) +{ + trace_event_data_t *evt_data = create_offcpu_event(offcpu_enter, task, ctx, ctx_is_prev); + void *cur_event_map; + + if (!evt_data) { + return; + } + cur_event_map = bpf_get_current_event_map(); + if (cur_event_map) { + bpfbuf_output(ctx, cur_event_map, evt_data, sizeof(trace_event_data_t)); + } +} + +static __always_inline void stash_incomming_offcpu_event(offcpu_m_enter_t *offcpu_enter, struct task_struct *task, + void *ctx, int ctx_is_prev) +{ + trace_event_data_t *evt_data = create_offcpu_event(offcpu_enter, task, ctx, ctx_is_prev); + + if (!evt_data) { + return; + } + bpf_map_update_elem(&offcpu_stash_map, &offcpu_enter->pid, evt_data, BPF_ANY); +} + +static __always_inline void merge_incomming_offcpu_event(trace_event_data_t *evt_data, offcpu_m_enter_t *offcpu_enter) +{ + evt_data->offcpu_d.end_time = offcpu_enter->end_time; + evt_data->offcpu_d.duration += offcpu_enter->end_time - offcpu_enter->start_time; + evt_data->offcpu_d.count++; +} + +static __always_inline void emit_event_stashed(trace_event_data_t *evt_data, void *ctx) +{ + void *cur_event_map; + + cur_event_map = bpf_get_current_event_map(); + if (cur_event_map) { + bpfbuf_output(ctx, cur_event_map, evt_data, sizeof(trace_event_data_t)); + } +} + +#define CAN_EMIT(aggr_dur, stime, etime) ((etime) >= ((stime) + (aggr_dur)) ? 1 : 0) +static __always_inline void process_offcpu_event(offcpu_m_enter_t *offcpu_enter, struct task_struct *task, + void *ctx, u64 min_aggr_dur, int ctx_is_prev) +{ + trace_event_data_t *evt_data; + u32 pid; + u64 aggr_dur = min_aggr_dur == 0 ? DFT_AGGR_DURATION : min_aggr_dur; + + pid = BPF_CORE_READ(task, pid); + evt_data = (trace_event_data_t *)bpf_map_lookup_elem(&offcpu_stash_map, &pid); + + if (evt_data != (void *)0) { + if (CAN_EMIT(aggr_dur, evt_data->offcpu_d.start_time, offcpu_enter->end_time)) { + emit_event_stashed(evt_data, ctx); + bpf_map_delete_elem(&offcpu_stash_map, &pid); + } else { + merge_incomming_offcpu_event(evt_data, offcpu_enter); + return; + } + } + + if (CAN_EMIT(aggr_dur, offcpu_enter->start_time, offcpu_enter->end_time)) { + emit_incomming_offcpu_event(offcpu_enter, task, ctx, ctx_is_prev); + } else { + stash_incomming_offcpu_event(offcpu_enter, task, ctx, ctx_is_prev); + } +} + +static __always_inline offcpu_m_enter_t *get_offcpu_enter(struct task_struct *task) +{ + u32 pid, tgid; + pid = BPF_CORE_READ(task, pid); + tgid = BPF_CORE_READ(task, tgid); + + offcpu_m_enter_t *offcpu_enter; + offcpu_enter = (offcpu_m_enter_t *)bpf_map_lookup_elem(&offcpu_enter_map, &pid); + if (offcpu_enter == (void *)0) { + offcpu_m_enter_t offcpu_enter_tmp; + if (!is_proc_enabled(tgid) || !is_thrd_enabled(pid, tgid)) { + return 0; + } + + __builtin_memset(&offcpu_enter_tmp, 0, sizeof(offcpu_enter_tmp)); + offcpu_enter_tmp.pid = pid; + (void)bpf_map_update_elem(&offcpu_enter_map, &offcpu_enter_tmp.pid, &offcpu_enter_tmp, BPF_ANY); + offcpu_enter = (offcpu_m_enter_t *)bpf_map_lookup_elem(&offcpu_enter_map, &pid); + } + + return offcpu_enter; +} + +static __always_inline void process_oncpu(struct task_struct *task, void *ctx, int ctx_is_prev) +{ + offcpu_m_enter_t *offcpu_enter; + trace_setting_t *setting; + + offcpu_enter = get_offcpu_enter(task); + if (offcpu_enter == (void *)0) { + return; + } + + setting = get_trace_setting(); + if (setting == (void *)0) { + return; + } + + offcpu_enter->end_time = bpf_ktime_get_ns(); + if (offcpu_enter->start_time == 0) { + // This means that the start time of the offcpu event is before the probe is started. + // Therefore, we set a fake start time because stack data needs to be reported. + offcpu_enter->start_time = offcpu_enter->end_time - DFT_AGGR_DURATION; + return; + } + + if (offcpu_enter->end_time < offcpu_enter->start_time + setting->min_exec_dur) { + // offcpu time is too short + return; + } + + process_offcpu_event(offcpu_enter, task, ctx, setting->min_aggr_dur, ctx_is_prev); +} + +static __always_inline void process_offcpu(struct task_struct *task, int ctx_is_prev, void *ctx) +{ + offcpu_m_enter_t *offcpu_enter; + + offcpu_enter = get_offcpu_enter(task); + if (offcpu_enter == (void *)0) { + return; + } + + offcpu_enter->start_time = bpf_ktime_get_ns(); + + if (ctx_is_prev == 1 && offcpu_enter != (void *)0) { + __builtin_memset(&offcpu_enter->stats_stack, 0, sizeof(struct stats_stack_elem)); + stats_append_stack(&offcpu_enter->stats_stack, 0, ctx); + } +} + +KRAWTRACE(sched_switch, bpf_raw_tracepoint_args) +{ + int ctx_is_prev = 1; // ctx is prev + struct task_struct *prev = (struct task_struct *)ctx->args[1]; + struct task_struct *current = (struct task_struct *)ctx->args[2]; + process_offcpu(prev, ctx_is_prev, (void *)ctx); + process_oncpu(current, (void *)ctx, ctx_is_prev); + + return 0; +} + +KPROBE(finish_task_switch, pt_regs) +{ + int ctx_is_prev = 0; // ctx is current + struct task_struct *prev = (struct task_struct *)PT_REGS_PARM1(ctx); + struct task_struct *current = (struct task_struct *)bpf_get_current_task(); + process_offcpu(prev, ctx_is_prev, (void *)ctx); + process_oncpu(current, (void *)ctx, ctx_is_prev); + + return 0; +} + +char g_license[] SEC("license") = "Dual BSD/GPL"; \ No newline at end of file diff --git a/src/probes/extends/ebpf.probe/src/tprofilingprobe/oncpu.bpf.c b/src/probes/extends/ebpf.probe/src/tprofilingprobe/oncpu.bpf.c index 3e5c71cc457776d9471c5ffa5b850a4c1a6b76ed..c8b6a6cd4d884a5d62f63a0022a51cf7a20395c2 100644 --- a/src/probes/extends/ebpf.probe/src/tprofilingprobe/oncpu.bpf.c +++ b/src/probes/extends/ebpf.probe/src/tprofilingprobe/oncpu.bpf.c @@ -40,13 +40,6 @@ struct { __uint(max_entries, MAX_SIZE_OF_THREAD); } oncpu_stash_map SEC(".maps"); -struct { - __uint(type, BPF_MAP_TYPE_HASH); - __uint(key_size, sizeof(u32)); - __uint(value_size, sizeof(trace_event_data_t)); - __uint(max_entries, MAX_SIZE_OF_THREAD); -} offcpu_stash_map SEC(".maps"); - static __always_inline void init_oncpu_event_common(trace_event_data_t *evt_data, struct task_struct *task) { @@ -56,15 +49,6 @@ static __always_inline void init_oncpu_event_common(trace_event_data_t *evt_data bpf_core_read_str(evt_data->comm, sizeof(evt_data->comm), &task->comm); } -static __always_inline void init_offcpu_event_common(trace_event_data_t *evt_data, - struct task_struct *task) -{ - evt_data->type = EVT_TYPE_OFFCPU; - evt_data->pid = BPF_CORE_READ(task, pid); - evt_data->tgid = BPF_CORE_READ(task, tgid); - bpf_core_read_str(evt_data->comm, sizeof(evt_data->comm), &task->comm); -} - static __always_inline void init_oncpu_data(oncpu_data_t *oncpu_d, oncpu_m_enter_t *oncpu_enter) { oncpu_d->start_time = oncpu_enter->start_time; @@ -73,17 +57,6 @@ static __always_inline void init_oncpu_data(oncpu_data_t *oncpu_d, oncpu_m_enter oncpu_d->count = 1; } -static __always_inline void init_offcpu_data(offcpu_data_t *offcpu_d, oncpu_m_enter_t *oncpu_enter, void *ctx) -{ - offcpu_d->start_time = oncpu_enter->end_time; - offcpu_d->end_time = oncpu_enter->start_time; - offcpu_d->duration = oncpu_enter->start_time - oncpu_enter->end_time; - offcpu_d->count = 1; - __builtin_memset(&offcpu_d->stats_stack, 0, sizeof(struct stats_stack_elem)); - stats_append_stack(&offcpu_d->stats_stack, 0, ctx); - -} - static __always_inline trace_event_data_t *create_oncpu_event(oncpu_m_enter_t *oncpu_enter, struct task_struct *task) { trace_event_data_t *evt_data; @@ -98,21 +71,6 @@ static __always_inline trace_event_data_t *create_oncpu_event(oncpu_m_enter_t *o return evt_data; } -static __always_inline trace_event_data_t *create_offcpu_event(oncpu_m_enter_t *oncpu_enter, struct task_struct *task, - void *ctx) -{ - trace_event_data_t *evt_data; - - evt_data = new_trace_event(); - if (!evt_data) { - return NULL; - } - init_offcpu_event_common(evt_data, task); - init_offcpu_data(&evt_data->offcpu_d, oncpu_enter, ctx); - - return evt_data; -} - static __always_inline void emit_incomming_oncpu_event(oncpu_m_enter_t *oncpu_enter, struct task_struct *task, void *ctx) { @@ -128,22 +86,7 @@ static __always_inline void emit_incomming_oncpu_event(oncpu_m_enter_t *oncpu_en } } -static __always_inline void emit_incomming_offcpu_event(oncpu_m_enter_t *oncpu_enter, struct task_struct *task, - void *ctx) -{ - trace_event_data_t *evt_data = create_offcpu_event(oncpu_enter, task, ctx); - void *cur_event_map; - - if (!evt_data) { - return; - } - cur_event_map = bpf_get_current_event_map(); - if (cur_event_map) { - bpfbuf_output(ctx, cur_event_map, evt_data, sizeof(trace_event_data_t)); - } -} - -static __always_inline void emit_event_stashed(trace_event_data_t *evt_data, void *ctx) +static __always_inline void emit_oncpu_event_stashed(trace_event_data_t *evt_data, void *ctx) { void *cur_event_map; @@ -163,17 +106,6 @@ static __always_inline void stash_incomming_oncpu_event(oncpu_m_enter_t *oncpu_e bpf_map_update_elem(&oncpu_stash_map, &oncpu_enter->pid, evt_data, BPF_ANY); } -static __always_inline void stash_incomming_offcpu_event(oncpu_m_enter_t *oncpu_enter, struct task_struct *task, - void *ctx) -{ - trace_event_data_t *evt_data = create_offcpu_event(oncpu_enter, task, ctx); - - if (!evt_data) { - return; - } - bpf_map_update_elem(&offcpu_stash_map, &oncpu_enter->pid, evt_data, BPF_ANY); -} - static __always_inline void merge_incomming_oncpu_event(trace_event_data_t *evt_data, oncpu_m_enter_t *oncpu_enter) { evt_data->oncpu_d.end_time = oncpu_enter->end_time; @@ -181,24 +113,19 @@ static __always_inline void merge_incomming_oncpu_event(trace_event_data_t *evt_ evt_data->oncpu_d.count++; } -static __always_inline void merge_incomming_offcpu_event(trace_event_data_t *evt_data, oncpu_m_enter_t *oncpu_enter) -{ - evt_data->offcpu_d.end_time = oncpu_enter->start_time; - evt_data->offcpu_d.duration += oncpu_enter->start_time - oncpu_enter->end_time; - evt_data->offcpu_d.count++; -} - +#define CAN_EMIT(aggr_dur, stime, etime) ((etime) >= ((stime) + (aggr_dur)) ? 1 : 0) static __always_inline void process_oncpu_event(oncpu_m_enter_t *oncpu_enter, struct task_struct *task, - void *ctx) + void *ctx, u64 min_aggr_dur) { trace_event_data_t *evt_data; u32 pid; + u64 aggr_dur = min_aggr_dur == 0 ? DFT_AGGR_DURATION : min_aggr_dur; pid = BPF_CORE_READ(task, pid); evt_data = (trace_event_data_t *)bpf_map_lookup_elem(&oncpu_stash_map, &pid); if (evt_data == (void *)0) { - if (can_emit(oncpu_enter->start_time, oncpu_enter->end_time)) { + if (CAN_EMIT(aggr_dur, oncpu_enter->start_time, oncpu_enter->end_time)) { emit_incomming_oncpu_event(oncpu_enter, task, ctx); } else { stash_incomming_oncpu_event(oncpu_enter, task); @@ -206,11 +133,11 @@ static __always_inline void process_oncpu_event(oncpu_m_enter_t *oncpu_enter, st return; } - if (can_emit(evt_data->oncpu_d.start_time, oncpu_enter->end_time)) { - emit_event_stashed(evt_data, ctx); + if (CAN_EMIT(aggr_dur, evt_data->oncpu_d.start_time, oncpu_enter->end_time)) { + emit_oncpu_event_stashed(evt_data, ctx); bpf_map_delete_elem(&oncpu_stash_map, &pid); - if (can_emit(oncpu_enter->start_time, oncpu_enter->end_time)) { + if (CAN_EMIT(aggr_dur, oncpu_enter->start_time, oncpu_enter->end_time)) { emit_incomming_oncpu_event(oncpu_enter, task, ctx); } else { stash_incomming_oncpu_event(oncpu_enter, task); @@ -220,95 +147,33 @@ static __always_inline void process_oncpu_event(oncpu_m_enter_t *oncpu_enter, st } } -static __always_inline void process_offcpu_event(oncpu_m_enter_t *oncpu_enter, struct task_struct *task, - void *ctx) -{ - trace_event_data_t *evt_data; - u32 pid; - - pid = BPF_CORE_READ(task, pid); - evt_data = (trace_event_data_t *)bpf_map_lookup_elem(&offcpu_stash_map, &pid); - - if (evt_data == (void *)0) { - if (can_emit(oncpu_enter->end_time, oncpu_enter->start_time)) { - emit_incomming_offcpu_event(oncpu_enter, task, ctx); - } else { - stash_incomming_offcpu_event(oncpu_enter, task, ctx); - } - return; - } - - if (can_emit(evt_data->offcpu_d.start_time, oncpu_enter->start_time)) { - emit_event_stashed(evt_data, ctx); - bpf_map_delete_elem(&offcpu_stash_map, &pid); - - if (can_emit(oncpu_enter->end_time, oncpu_enter->start_time)) { - emit_incomming_offcpu_event(oncpu_enter, task, ctx); - } else { - stash_incomming_offcpu_event(oncpu_enter, task, ctx); - } - } else { - merge_incomming_offcpu_event(evt_data, oncpu_enter); - } -} - -static __always_inline oncpu_m_enter_t *get_oncpu_enter(struct task_struct *task) +static __always_inline void process_oncpu(struct task_struct *task) { u32 pid, tgid; + oncpu_m_enter_t oncpu_enter; + pid = BPF_CORE_READ(task, pid); tgid = BPF_CORE_READ(task, tgid); - - oncpu_m_enter_t *oncpu_enter; - oncpu_enter = (oncpu_m_enter_t *)bpf_map_lookup_elem(&oncpu_enter_map, &pid); - if (oncpu_enter == (void *)0) { - oncpu_m_enter_t oncpu_enter_tmp; - if (!is_proc_enabled(tgid) || !is_thrd_enabled(pid, tgid)) { - return 0; - } - - __builtin_memset(&oncpu_enter_tmp, 0, sizeof(oncpu_enter_tmp)); - oncpu_enter_tmp.pid = pid; - (void)bpf_map_update_elem(&oncpu_enter_map, &oncpu_enter_tmp.pid, &oncpu_enter_tmp, BPF_ANY); - oncpu_enter = (oncpu_m_enter_t *)bpf_map_lookup_elem(&oncpu_enter_map, &pid); - } - - return oncpu_enter; -} - -static __always_inline void process_oncpu(struct task_struct *task, void *ctx) -{ - oncpu_m_enter_t *oncpu_enter; - trace_setting_t *setting; - - oncpu_enter = get_oncpu_enter(task); - if (oncpu_enter == (void *)0) { - return; - } - - setting = get_trace_setting(); - if (setting == (void *)0) { + if (!is_proc_enabled(tgid) || !is_thrd_enabled(pid, tgid)) { return; } - oncpu_enter->start_time = bpf_ktime_get_ns(); - if (oncpu_enter->start_time < oncpu_enter->end_time + setting->min_exec_dur) { - // offcpu time is too short - return; - } - - process_offcpu_event(oncpu_enter, task, ctx); + __builtin_memset(&oncpu_enter, 0, sizeof(oncpu_enter)); + oncpu_enter.pid = pid; + oncpu_enter.start_time = bpf_ktime_get_ns(); + (void)bpf_map_update_elem(&oncpu_enter_map, &oncpu_enter.pid, &oncpu_enter, BPF_ANY); } static __always_inline void process_offcpu(struct task_struct *task, void *ctx) { + u32 pid = BPF_CORE_READ(task, pid); oncpu_m_enter_t *oncpu_enter; trace_setting_t *setting; - oncpu_enter = get_oncpu_enter(task); + oncpu_enter = (oncpu_m_enter_t *)bpf_map_lookup_elem(&oncpu_enter_map, &pid); if (oncpu_enter == (void *)0) { return; } - setting = get_trace_setting(); if (setting == (void *)0) { return; @@ -316,11 +181,13 @@ static __always_inline void process_offcpu(struct task_struct *task, void *ctx) oncpu_enter->end_time = bpf_ktime_get_ns(); if (oncpu_enter->end_time < oncpu_enter->start_time + setting->min_exec_dur) { - // oncpu time is too short + (void)bpf_map_delete_elem(&oncpu_enter_map, &pid); return; } - process_oncpu_event(oncpu_enter, task, ctx); + process_oncpu_event(oncpu_enter, task, ctx, setting->min_aggr_dur); + + (void)bpf_map_delete_elem(&oncpu_enter_map, &pid); } KRAWTRACE(sched_switch, bpf_raw_tracepoint_args) @@ -328,7 +195,7 @@ KRAWTRACE(sched_switch, bpf_raw_tracepoint_args) struct task_struct *prev = (struct task_struct *)ctx->args[1]; struct task_struct *current = (struct task_struct *)ctx->args[2]; process_offcpu(prev, (void *)ctx); - process_oncpu(current, (void *)ctx); + process_oncpu(current); return 0; } @@ -338,7 +205,7 @@ KPROBE(finish_task_switch, pt_regs) struct task_struct *prev = (struct task_struct *)PT_REGS_PARM1(ctx); struct task_struct *current = (struct task_struct *)bpf_get_current_task(); process_offcpu(prev, (void *)ctx); - process_oncpu(current, (void *)ctx); + process_oncpu(current); return 0; } diff --git a/src/probes/extends/ebpf.probe/src/tprofilingprobe/profiling_event.c b/src/probes/extends/ebpf.probe/src/tprofilingprobe/profiling_event.c index 19844e6ee4236c5e1c912c49c81c6e935faa3820..b58b8f1c099aa5379e1b7c9043fa9e96be3bac72 100644 --- a/src/probes/extends/ebpf.probe/src/tprofilingprobe/profiling_event.c +++ b/src/probes/extends/ebpf.probe/src/tprofilingprobe/profiling_event.c @@ -2070,7 +2070,7 @@ int gen_oom_trace_file(char *file_path, int size) struct tm *tm = localtime(&now); file_path[0] = 0; - char timestamp[TASK_COMM_LEN]; + char timestamp[MAX_PROCESS_NAME_LEN]; sz = strftime(timestamp, sizeof(timestamp), "oom-trace-%Y%m%d%H%M.json", tm); if (sz == 0) { TP_ERROR("Failed to set oom trace file path\n"); diff --git a/src/probes/extends/ebpf.probe/src/tprofilingprobe/profiling_event.h b/src/probes/extends/ebpf.probe/src/tprofilingprobe/profiling_event.h index bf2688250329140926b051075289b44d1209c4a2..2347dc45f64124101c4366354af63f67e4443925 100644 --- a/src/probes/extends/ebpf.probe/src/tprofilingprobe/profiling_event.h +++ b/src/probes/extends/ebpf.probe/src/tprofilingprobe/profiling_event.h @@ -27,7 +27,7 @@ #define PROFILE_EVT_TYPE_LOCK "lock" #define PROFILE_EVT_TYPE_IO "io" #define PROFILE_EVT_TYPE_ONCPU "oncpu" -#define PROFILE_EVT_TYPE_OFFCPU "offcpu" +#define PROFILE_EVT_TYPE_OFFCPU "offcpu" #define PROFILE_EVT_TYPE_PYGC "python_gc" #define PROFILE_EVT_TYPE_PTHRD_SYNC "pthread_sync" #define PROFILE_EVT_TYPE_SAMPLE "sample" diff --git a/src/probes/extends/ebpf.probe/src/tprofilingprobe/tprofiling.h b/src/probes/extends/ebpf.probe/src/tprofilingprobe/tprofiling.h index 95acb260e81515caab5786e59a38eee63938d3a9..1e696cd17dfef6ca6b2334487e8587b8b7ace179 100644 --- a/src/probes/extends/ebpf.probe/src/tprofilingprobe/tprofiling.h +++ b/src/probes/extends/ebpf.probe/src/tprofilingprobe/tprofiling.h @@ -24,11 +24,13 @@ | PROBE_RANGE_TPROFILING_SYSCALL_SCHED | PROBE_RANGE_TPROFILING_SYSCALL_LOCK) #define TPROFILING_EBPF_PROBE_ALL \ (u32)(PROBE_RANGE_TPROFILING_ONCPU | TPROFILING_PROBE_SYSCALL_ALL | PROBE_RANGE_TPROFILING_PYTHON_GC \ - | PROBE_RANGE_TPROFILING_PTHREAD_SYNC | PROBE_RANGE_TPROFILING_ONCPU_SAMPLE | PROBE_RANGE_TPROFILING_MEM_GLIBC) + | PROBE_RANGE_TPROFILING_PTHREAD_SYNC | PROBE_RANGE_TPROFILING_ONCPU_SAMPLE | PROBE_RANGE_TPROFILING_MEM_GLIBC \ + | PROBE_RANGE_TPROFILING_OFFCPU) #define TPROFILING_PROBE_ALL (u32)(TPROFILING_EBPF_PROBE_ALL | PROBE_RANGE_TPROFILING_MEM_USAGE) #define TPROFILING_PROBES_WITH_STACK (u32)(TPROFILING_PROBE_SYSCALL_ALL | PROBE_RANGE_TPROFILING_PYTHON_GC \ - | PROBE_RANGE_TPROFILING_PTHREAD_SYNC | PROBE_RANGE_TPROFILING_ONCPU_SAMPLE | PROBE_RANGE_TPROFILING_MEM_GLIBC) + | PROBE_RANGE_TPROFILING_PTHREAD_SYNC | PROBE_RANGE_TPROFILING_ONCPU_SAMPLE | PROBE_RANGE_TPROFILING_MEM_GLIBC \ + | PROBE_RANGE_TPROFILING_OFFCPU) #define EVENT_NAME_LEN 16 #define MAX_SIZE_OF_PROC 128 @@ -154,6 +156,13 @@ typedef struct { int count; } common_data_t; +typedef struct { + int pid; + __u64 start_time; + __u64 end_time; + struct stats_stack_elem stats_stack; +} offcpu_m_enter_t; + typedef common_m_enter_t oncpu_m_enter_t; typedef common_data_t oncpu_data_t;