diff --git a/src/probes/extends/ebpf.probe/src/taskprobe/output_proc.h b/src/probes/extends/ebpf.probe/src/taskprobe/output_proc.h index 14ee23e13cf2e9c228f079d20dd47e7183500820..e0f9e2dc07192af8b31dd9b614791c47a33680a4 100644 --- a/src/probes/extends/ebpf.probe/src/taskprobe/output_proc.h +++ b/src/probes/extends/ebpf.probe/src/taskprobe/output_proc.h @@ -75,6 +75,8 @@ static __always_inline __maybe_unused char is_proc_tmout(struct proc_data_s *pro IS_PROC_TMOUT(stats_ts, ts, period, page, tmout); } else if (flags & TASK_PROBE_DNS_OP) { IS_PROC_TMOUT(stats_ts, ts, period, dns, tmout); + } else if (flags & TASK_PROBE_IO) { + IS_PROC_TMOUT(stats_ts, ts, period, io, tmout); } else { tmout = 0; } @@ -101,7 +103,6 @@ static __always_inline __maybe_unused void reset_proc_stats(struct proc_data_s * proc->syscall.ns_futex = 0; proc->syscall.ns_epoll_wait = 0; proc->syscall.ns_epoll_pwait = 0; - } else if (flags & TASK_PROBE_FORK_SYSCALL) { proc->syscall.ns_fork = 0; proc->syscall.ns_vfork = 0; @@ -116,6 +117,8 @@ static __always_inline __maybe_unused void reset_proc_stats(struct proc_data_s * __builtin_memset(&(proc->page_op), 0x0, sizeof(proc->page_op)); } else if (flags & TASK_PROBE_DNS_OP) { __builtin_memset(&(proc->dns_op), 0x0, sizeof(proc->dns_op)); + } else if (flags & TASK_PROBE_IO) { + __builtin_memset(&(proc->proc_io), 0x0, sizeof(proc->proc_io)); } } diff --git a/src/probes/extends/ebpf.probe/src/taskprobe/output_task.h b/src/probes/extends/ebpf.probe/src/taskprobe/output_task.h index 16264c2683de078708935b8749747c373cdc030c..49730230ce0ebfc41e323353c3a48d4d013ab3a0 100644 --- a/src/probes/extends/ebpf.probe/src/taskprobe/output_task.h +++ b/src/probes/extends/ebpf.probe/src/taskprobe/output_task.h @@ -57,9 +57,7 @@ static __always_inline __maybe_unused char is_task_tmout(struct task_data *task, struct task_ts_s *stats_ts = &(task->stats_ts); - if (flags & TASK_PROBE_THREAD_IO) { - IS_TASK_TMOUT(stats_ts, ts, period, io, tmout); - } else if (flags & TASK_PROBE_THREAD_CPU) { + if (flags & TASK_PROBE_THREAD_CPU) { IS_TASK_TMOUT(stats_ts, ts, period, cpu, tmout); } else { tmout = 0; @@ -78,9 +76,7 @@ static __always_inline __maybe_unused void report_task(void *ctx, struct task_da (void)bpf_perf_event_output(ctx, &g_task_output, BPF_F_CURRENT_CPU, val, sizeof(struct task_data)); val->flags = 0; - if (flags & TASK_PROBE_THREAD_IO) { - __builtin_memset(&(val->io), 0x0, sizeof(val->io)); - } else if (flags & TASK_PROBE_THREAD_CPU) { + if (flags & TASK_PROBE_THREAD_CPU) { __builtin_memset(&(val->cpu), 0x0, sizeof(val->cpu)); } } diff --git a/src/probes/extends/ebpf.probe/src/taskprobe/proc.h b/src/probes/extends/ebpf.probe/src/taskprobe/proc.h index c5f062a226b442aff2115f95851d20465110e9d4..afd09368d30c1da932f8b0360cf4dc13de6db76f 100644 --- a/src/probes/extends/ebpf.probe/src/taskprobe/proc.h +++ b/src/probes/extends/ebpf.probe/src/taskprobe/proc.h @@ -81,6 +81,19 @@ struct proc_ts_s { u64 ts_page; u64 ts_dns; + + u64 ts_io; +}; + +struct proc_io_s { + u32 less_4k_io_read; + u32 less_4k_io_write; + u32 greater_4k_io_read; + u32 greater_4k_io_write; + u32 bio_latency; + u32 bio_err_count; + u32 hang_count; + u64 iowait_us; }; struct proc_data_s { @@ -95,6 +108,7 @@ struct proc_data_s { struct fs_op_s op_tmpfs; struct page_op_s page_op; struct dns_op_s dns_op; + struct proc_io_s proc_io; }; #endif diff --git a/src/probes/extends/ebpf.probe/src/taskprobe/proc.meta b/src/probes/extends/ebpf.probe/src/taskprobe/proc.meta index af70633069849d0ad522732982c6d9746c3b6acd..efb8935e1b1c37e3823695345ba7d6467706ef0b 100644 --- a/src/probes/extends/ebpf.probe/src/taskprobe/proc.meta +++ b/src/probes/extends/ebpf.probe/src/taskprobe/proc.meta @@ -330,5 +330,62 @@ measurements: name: "ns_gethostname", } ) + }, + { + table_name: "proc_io", + entity_name: "proc", + fields: + ( + { + description: "ID of the process", + type: "key", + name: "tgid", + }, + { + description: "comm of the process", + type: "label", + name: "comm", + }, + { + description: "Number of small I/O (less than 4 KB) read operations at the BIO layer.", + type: "gauge", + name: "less_4k_io_read", + }, + { + description: "Number of small I/O (less than 4 KB) write operations at the BIO layer.", + type: "gauge", + name: "less_4k_io_write", + }, + { + description: "Number of big I/O (greater than 4 KB) read operations at the BIO layer.", + type: "gauge", + name: "greater_4k_io_read", + }, + { + description: "Number of big I/O (greater than 4 KB) write operations at the BIO layer.", + type: "gauge", + name: "greater_4k_io_write", + }, + { + description: "I/O operation delay at the BIO layer (unit: us).", + type: "gauge", + name: "bio_latency", + }, + { + description: "Number of I/O operation failures at the BIO layer.", + type: "gauge", + name: "bio_err_count", + }, + { + description: "Number of process hang times.", + type: "gauge", + name: "hang_count", + }, + { + description: "Process IO_wait time (unit: us).", + type: "gauge", + name: "iowait_us", + } + ) } ) diff --git a/src/probes/extends/ebpf.probe/src/taskprobe/thread_io.bpf.c b/src/probes/extends/ebpf.probe/src/taskprobe/proc_io.bpf.c similarity index 46% rename from src/probes/extends/ebpf.probe/src/taskprobe/thread_io.bpf.c rename to src/probes/extends/ebpf.probe/src/taskprobe/proc_io.bpf.c index 7ce561d43e3a1105ed40709a5e8eab2ed3eea268..1fe62b302e8c89a2fb4e7418c4beec6031f850d8 100644 --- a/src/probes/extends/ebpf.probe/src/taskprobe/thread_io.bpf.c +++ b/src/probes/extends/ebpf.probe/src/taskprobe/proc_io.bpf.c @@ -18,19 +18,26 @@ #define BPF_PROG_KERN #include "bpf.h" #include "task.h" -#include "task_map.h" -#include "output_task.h" +#include "proc.h" +#include "proc_map.h" +#include "output_proc.h" #define REQ_OP_BITS 8 #define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) +#define BIG_BIO_SIZE (4 * 1024) char g_linsence[] SEC("license") = "GPL"; +struct proc_bio_stats_s { + int proc_id; + u64 start_ts; +}; + #define __BIO_MAX 1000 struct bpf_map_def SEC("maps") bio_map = { .type = BPF_MAP_TYPE_HASH, .key_size = sizeof(struct bio*), - .value_size = sizeof(int), // pid + .value_size = sizeof(struct proc_bio_stats_s), .max_entries = __BIO_MAX, }; @@ -47,9 +54,12 @@ static __always_inline char is_write_bio(struct bio *bio) ((op & REQ_OP_MASK) == REQ_OP_FLUSH); } -static __always_inline int store_bio(struct bio *bio, int pid) +static __always_inline int store_bio(struct bio *bio, int proc_id) { - return bpf_map_update_elem(&bio_map, &bio, &pid, BPF_ANY); + struct proc_bio_stats_s bio_stats = {0}; + bio_stats.proc_id = proc_id; + bio_stats.start_ts = bpf_ktime_get_ns(); + return bpf_map_update_elem(&bio_map, &bio, &bio_stats, BPF_ANY); } static __always_inline int is_err_bio(struct bio *bio) @@ -59,81 +69,105 @@ static __always_inline int is_err_bio(struct bio *bio) static __always_inline void end_bio(void *ctx, struct bio *bio) { - int *pid = bpf_map_lookup_elem(&bio_map, &bio); - if (pid == NULL) { + struct proc_bio_stats_s *bio_stats = bpf_map_lookup_elem(&bio_map, &bio); + if (bio_stats == NULL) { + return; + } + struct proc_data_s *proc; + + proc = get_proc_entry(bio_stats->proc_id); + if (proc == NULL) { + (void)bpf_map_delete_elem(&bio_map, &bio); return; } if (is_err_bio(bio)) { - struct task_data* data = get_task(*pid); - if (data) { - __sync_fetch_and_add(&(data->io.bio_err_count), 1); - report_task(ctx, data, TASK_PROBE_THREAD_IO); + __sync_fetch_and_add(&(proc->proc_io.bio_err_count), 1); + report_proc(ctx, proc, TASK_PROBE_IO); + } else { + u64 end_ts = bpf_ktime_get_ns(); + if (end_ts > bio_stats->start_ts) { + u64 delta = end_ts - bio_stats->start_ts; + proc->proc_io.bio_latency = max(proc->proc_io.bio_latency, delta); + report_proc(ctx, proc, TASK_PROBE_IO); } - return; } (void)bpf_map_delete_elem(&bio_map, &bio); return; } - -KPROBE(submit_bio, pt_regs) +KRAWTRACE(block_bio_queue, bpf_raw_tracepoint_args) { - struct bio *bio = (struct bio *)PT_REGS_PARM1(ctx); + u32 bio_size; + struct bio *bio = (struct bio*)ctx->args[1]; + u32 proc_id = bpf_get_current_pid_tgid() >> INT_LEN; + struct proc_data_s *proc; - int pid = (int)bpf_get_current_pid_tgid(); - - if (!user_mode(ctx)) { - return; - } - - struct task_data *data = get_task(pid); - if (data == NULL) { + proc = get_proc_entry(proc_id); + if (proc == NULL) { return; } if (is_read_bio(bio)) { - data->io.bio_bytes_read += _(bio->bi_iter.bi_size); - report_task(ctx, data, TASK_PROBE_THREAD_IO); + bio_size = _(bio->bi_iter.bi_size); + if (bio_size > BIG_BIO_SIZE) { + __sync_fetch_and_add(&(proc->proc_io.greater_4k_io_read), 1); + } else { + __sync_fetch_and_add(&(proc->proc_io.less_4k_io_read), 1); + } + report_proc(ctx, proc, TASK_PROBE_IO); - store_bio(bio, pid); + store_bio(bio, proc_id); return; } if (is_write_bio(bio)) { - data->io.bio_bytes_write += _(bio->bi_iter.bi_size); - report_task(ctx, data, TASK_PROBE_THREAD_IO); + bio_size = _(bio->bi_iter.bi_size); + if (bio_size > BIG_BIO_SIZE) { + __sync_fetch_and_add(&(proc->proc_io.greater_4k_io_write), 1); + } else { + __sync_fetch_and_add(&(proc->proc_io.less_4k_io_write), 1); + } + report_proc(ctx, proc, TASK_PROBE_IO); - store_bio(bio, pid); + store_bio(bio, proc_id); return; } } -KPROBE(bio_endio, pt_regs) +KRAWTRACE(block_bio_complete, bpf_raw_tracepoint_args) { - struct bio *bio = (struct bio *)PT_REGS_PARM1(ctx); + struct bio *bio = (struct bio*)ctx->args[1]; end_bio(ctx, bio); } -KRAWTRACE(sched_stat_iowait, bpf_raw_tracepoint_args) +KRAWTRACE(sched_process_hang, bpf_raw_tracepoint_args) { + struct proc_data_s *proc; struct task_struct* task = (struct task_struct*)ctx->args[0]; - u64 delta = (u64)ctx->args[1]; - - struct task_data *data = get_task((int)_(task->pid)); - if (data) { - __sync_fetch_and_add(&(data->io.iowait_us), delta); - report_task(ctx, data, TASK_PROBE_THREAD_IO); + u32 proc_id = (u32)_(task->tgid); + proc = get_proc_entry(proc_id); + if (proc == NULL) { + return; } + __sync_fetch_and_add(&(proc->proc_io.hang_count), 1); + report_proc(ctx, proc, TASK_PROBE_IO); } -KRAWTRACE(sched_process_hang, bpf_raw_tracepoint_args) +KRAWTRACE(sched_stat_iowait, bpf_raw_tracepoint_args) { + struct proc_data_s *proc; struct task_struct* task = (struct task_struct*)ctx->args[0]; - struct task_data *data = get_task((int)_(task->pid)); - if (data) { - __sync_fetch_and_add(&(data->io.hang_count), 1); - report_task(ctx, data, TASK_PROBE_THREAD_IO); + u64 delta = (u64)ctx->args[1]; + u32 proc_id = (u32)_(task->tgid); + + proc = get_proc_entry(proc_id); + if (proc == NULL) { + return; } + + __sync_fetch_and_add(&(proc->proc_io.iowait_us), delta); + report_proc(ctx, proc, TASK_PROBE_IO); } + diff --git a/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c b/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c index a8a442503ac3149b2a8dc059d86aa609698c6188..b3c970555a83ccc20e95e28a2f48314e051a5b12 100644 --- a/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c +++ b/src/probes/extends/ebpf.probe/src/taskprobe/proc_probe.c @@ -41,6 +41,7 @@ #include "overlay.skel.h" #include "tmpfs.skel.h" #include "page.skel.h" +#include "proc_io.skel.h" #include "bpf_prog.h" #ifdef OO_NAME @@ -63,6 +64,8 @@ static struct probe_params *g_args; #define PROC_TBL_PAGE "proc_page" #define PROC_TBL_DNS "proc_dns" +#define PROC_TBL_IO "proc_io" + #define OVERLAY_MOD "overlay" #define EXT4_MOD "ext4" @@ -76,6 +79,7 @@ static struct probe_params *g_args; static void report_proc_metrics(struct proc_data_s *proc) { char entityId[INT_LEN]; + u64 latency_thr_us = US(g_args->latency_thr); if (g_args->logs == 0) { return; @@ -107,6 +111,39 @@ static void report_proc_metrics(struct proc_data_s *proc) proc->proc_id, proc->dns_op.gethostname_failed); } + + if (proc->proc_io.iowait_us > latency_thr_us) { + report_logs(OO_NAME, + entityId, + "iowait_us", + EVT_SEC_WARN, + "Process(COMM:%s PID:%u) iowait %llu us.", + proc->comm, + proc->proc_id, + proc->proc_io.iowait_us); + } + + if (proc->proc_io.hang_count > 0) { + report_logs(OO_NAME, + entityId, + "hang_count", + EVT_SEC_WARN, + "Process(COMM:%s PID:%u) hang count %u.", + proc->comm, + proc->proc_id, + proc->proc_io.hang_count); + } + + if (proc->proc_io.bio_err_count > 0) { + report_logs(OO_NAME, + entityId, + "bio_err_count", + EVT_SEC_WARN, + "Process(COMM:%s PID:%u) bio error %u.", + proc->comm, + proc->proc_id, + proc->proc_io.bio_err_count); + } } static void output_proc_metrics_syscall(struct proc_data_s *proc) @@ -254,6 +291,27 @@ static void output_proc_metrics_dns(struct proc_data_s *proc) proc->dns_op.gethostname_ns); } +static void output_proc_io_stats(struct proc_data_s *proc) +{ + (void)fprintf(stdout, + "|%s|%u|%s|" + "%u|%u|%u|%u|" + "%u|%u|%u|%llu|\n", + PROC_TBL_IO, + proc->proc_id, + proc->comm, + + proc->proc_io.less_4k_io_read, + proc->proc_io.less_4k_io_write, + proc->proc_io.greater_4k_io_read, + proc->proc_io.greater_4k_io_write, + + proc->proc_io.bio_latency, + proc->proc_io.bio_err_count, + proc->proc_io.hang_count, + proc->proc_io.iowait_us); +} + static void output_proc_metrics(void *ctx, int cpu, void *data, __u32 size) { struct proc_data_s *proc = (struct proc_data_s *)data; @@ -269,9 +327,8 @@ static void output_proc_metrics(void *ctx, int cpu, void *data, __u32 size) output_proc_metrics_syscall_net(proc); } else if (flags & TASK_PROBE_SCHED_SYSCALL) { output_proc_metrics_syscall_sched(proc); - - } else if (flags & TASK_PROBE_FORK_SYSCALL) { - output_proc_metrics_syscall_fork(proc); + } else if (flags & TASK_PROBE_FORK_SYSCALL) { + output_proc_metrics_syscall_fork(proc); } else if (flags & TASK_PROBE_EXT4_OP) { output_proc_metrics_ext4(proc); } else if (flags & TASK_PROBE_OVERLAY_OP) { @@ -282,6 +339,8 @@ static void output_proc_metrics(void *ctx, int cpu, void *data, __u32 size) output_proc_metrics_page(proc); } else if (flags & TASK_PROBE_DNS_OP) { output_proc_metrics_dns(proc); + } else if (flags & TASK_PROBE_IO) { + output_proc_io_stats(proc); } (void)fflush(stdout); @@ -475,11 +534,30 @@ err: return -1; } +static int load_proc_io_prog(struct bpf_prog_s *prog, char is_load) +{ + int ret = 0; + + __LOAD_PROBE(proc_io, err, is_load); + if (is_load) { + prog->skels[prog->num].skel = proc_io_skel; + prog->skels[prog->num].fn = (skel_destroy_fn)proc_io_bpf__destroy; + prog->num++; + + ret = load_proc_create_pb(prog, GET_MAP_FD(proc_io, g_proc_output)); + } + + return ret; +err: + UNLOAD(proc_io); + return -1; +} + struct bpf_prog_s* load_proc_bpf_prog(struct probe_params *args) { struct bpf_prog_s *prog; char is_load_syscall, is_load_syscall_io, is_load_syscall_net; - char is_load_syscall_fork, is_load_syscall_sched; + char is_load_syscall_fork, is_load_syscall_sched, is_load_proc_io; char is_load_overlay, is_load_ext4, is_load_tmpfs, is_load_page; is_load_overlay = is_load_probe(args, TASK_PROBE_OVERLAY_OP) & is_exist_mod(OVERLAY_MOD); @@ -494,6 +572,8 @@ struct bpf_prog_s* load_proc_bpf_prog(struct probe_params *args) is_load_tmpfs = is_load_probe(args, TASK_PROBE_TMPFS_OP); is_load_page = is_load_probe(args, TASK_PROBE_PAGE_OP); + is_load_proc_io = is_load_probe(args, TASK_PROBE_IO); + g_args = args; prog = alloc_bpf_prog(); @@ -537,6 +617,10 @@ struct bpf_prog_s* load_proc_bpf_prog(struct probe_params *args) goto err; } + if (load_proc_io_prog(prog, is_load_proc_io)) { + goto err; + } + return prog; err: diff --git a/src/probes/extends/ebpf.probe/src/taskprobe/task.h b/src/probes/extends/ebpf.probe/src/taskprobe/task.h index cfd1cd210d05106a93f23f4af2ac356182fe9e6f..4f74bd152bcb4ca55fafadafd6c953ac39f4f41e 100644 --- a/src/probes/extends/ebpf.probe/src/taskprobe/task.h +++ b/src/probes/extends/ebpf.probe/src/taskprobe/task.h @@ -30,14 +30,16 @@ #define TASK_PROBE_TMPFS_OP (u32)(1 << 7) #define TASK_PROBE_PAGE_OP (u32)(1 << 8) #define TASK_PROBE_DNS_OP (u32)(1 << 9) -#define TASK_PROBE_THREAD_IO (u32)(1 << 10) +#define TASK_PROBE_IO (u32)(1 << 10) #define TASK_PROBE_THREAD_CPU (u32)(1 << 11) +#define US(ms) ((u64)(ms) * 1000) + #define TASK_PROBE_ALL (u32)(TASK_PROBE_SYSCALL | TASK_PROBE_IO_SYSCALL \ | TASK_PROBE_NET_SYSCALL | TASK_PROBE_SCHED_SYSCALL \ | TASK_PROBE_FORK_SYSCALL | TASK_PROBE_EXT4_OP | TASK_PROBE_OVERLAY_OP \ | TASK_PROBE_TMPFS_OP | TASK_PROBE_PAGE_OP | TASK_PROBE_DNS_OP \ - | TASK_PROBE_THREAD_IO | TASK_PROBE_THREAD_CPU) + | TASK_PROBE_IO | TASK_PROBE_THREAD_CPU) static __always_inline __maybe_unused char is_load_probe(struct probe_params *args, u32 probe) { diff --git a/src/probes/extends/ebpf.probe/src/taskprobe/thread.h b/src/probes/extends/ebpf.probe/src/taskprobe/thread.h index 315f97bfec6499c754497b9a99cead30d88eeda3..89f6809e10b186b75a3f9df6bbfd8360c56c3ee3 100644 --- a/src/probes/extends/ebpf.probe/src/taskprobe/thread.h +++ b/src/probes/extends/ebpf.probe/src/taskprobe/thread.h @@ -17,8 +17,6 @@ #pragma once -#define SHARE_MAP_TASK_MAX_ENTRIES (10 * 1024) - enum task_status_type { TASK_STATUS_ACTIVE = 0, TASK_STATUS_INACTIVE, @@ -26,16 +24,6 @@ enum task_status_type { TASK_STATUS_MAX, }; -struct task_io_data { - __u64 bio_bytes_read; - __u64 bio_bytes_write; - - __u64 iowait_us; - __u32 hang_count; - - __u32 bio_err_count; -}; - struct task_cpu_data { int off_cpu_no; __u64 off_cpu_ns; @@ -56,7 +44,6 @@ struct task_id { }; struct task_ts_s { - u64 ts_io; u64 ts_cpu; }; @@ -64,7 +51,6 @@ struct task_data { u32 flags; struct task_ts_s stats_ts; struct task_id id; - struct task_io_data io; struct task_cpu_data cpu; }; diff --git a/src/probes/extends/ebpf.probe/src/taskprobe/thread.meta b/src/probes/extends/ebpf.probe/src/taskprobe/thread.meta index 790f33c91d10658f8f0ea1cc16e6474006a8ebad..fa21056a5ebd004762a7ca5da711630c88f3c660 100644 --- a/src/probes/extends/ebpf.probe/src/taskprobe/thread.meta +++ b/src/probes/extends/ebpf.probe/src/taskprobe/thread.meta @@ -1,53 +1,6 @@ version = "1.0.0" measurements: ( - { - table_name: "thread_io", - entity_name: "thread", - fields: - ( - { - description: "ID of the thread", - type: "key", - name: "pid", - }, - { - description: "PID id of thd process", - type: "label", - name: "tgid", - }, - { - description: "comm of the process", - type: "label", - name: "comm", - }, - { - description: "Number of read bytes by thead, in BIO phase.", - type: "gauge", - name: "bio_bytes_read", - }, - { - description: "Number of write bytes by thead, in BIO phase.", - type: "gauge", - name: "bio_bytes_write", - }, - { - description: "Thread iowait times, unit is us.", - type: "gauge", - name: "iowait_us", - }, - { - description: "Thead I/O hang count.", - type: "gauge", - name: "hang_count", - }, - { - description: "Thread bio err count", - type: "gauge", - name: "bio_err_count", - } - ) - }, { table_name: "thread_cpu", entity_name: "thread", diff --git a/src/probes/extends/ebpf.probe/src/taskprobe/thread_probe.c b/src/probes/extends/ebpf.probe/src/taskprobe/thread_probe.c index 1a27706bd236a05ee8877a06b296b0e7e6190fc8..0f8aa9fe3b2a341fa45a3714a02edba8daa1e7e8 100644 --- a/src/probes/extends/ebpf.probe/src/taskprobe/thread_probe.c +++ b/src/probes/extends/ebpf.probe/src/taskprobe/thread_probe.c @@ -32,7 +32,6 @@ #include "thread.h" #include "event.h" #include "task.h" -#include "thread_io.skel.h" #include "cpu.skel.h" #include "bpf_prog.h" @@ -40,13 +39,10 @@ #undef OO_NAME #endif #define OO_NAME "thread" -#define THREAD_TBL_IO "thread_io" #define THREAD_TBL_CPU "thread_cpu" static struct probe_params *g_args; -#define US(ms) ((u64)(ms) * 1000) - #define __LOAD_PROBE(probe_name, end, load) \ OPEN(probe_name, end, load); \ MAP_SET_PIN_PATH(probe_name, g_task_output, TASK_OUTPUT_PATH, load); \ @@ -57,7 +53,7 @@ static struct probe_params *g_args; static void report_task_metrics(struct task_data *data) { char entityId[INT_LEN]; - u64 latency_thr_us = US(g_args->latency_thr); + u64 offline_thr_us = US(g_args->offline_thr); if (g_args->logs == 0) { return; @@ -66,7 +62,7 @@ static void report_task_metrics(struct task_data *data) entityId[0] = 0; (void)snprintf(entityId, INT_LEN, "%d", data->id.pid); - if (data->cpu.off_cpu_ns > 0) { + if (data->cpu.off_cpu_ns > offline_thr_us) { report_logs(OO_NAME, entityId, "off_cpu_ns", @@ -78,57 +74,6 @@ static void report_task_metrics(struct task_data *data) data->cpu.preempt_id, data->cpu.off_cpu_ns); } - - if (data->io.iowait_us > latency_thr_us) { - report_logs(OO_NAME, - entityId, - "iowait_us", - EVT_SEC_WARN, - "Process(COMM:%s TID:%d) iowait %llu us.", - data->id.comm, - data->id.pid, - data->io.iowait_us); - } - - if (data->io.hang_count > 0) { - report_logs(OO_NAME, - entityId, - "hang_count", - EVT_SEC_WARN, - "Process(COMM:%s TID:%d) io hang %u.", - data->id.comm, - data->id.pid, - data->io.hang_count); - } - - if (data->io.bio_err_count > 0) { - report_logs(OO_NAME, - entityId, - "bio_err_count", - EVT_SEC_WARN, - "Process(COMM:%s TID:%d) bio error %u.", - data->id.comm, - data->id.pid, - data->io.bio_err_count); - } -} - -static void output_task_metrics_io(struct task_data *task_data) -{ - (void)fprintf(stdout, - "|%s|%d|%d|%s|" - "%llu|%llu|%llu|%u|%u|\n", - THREAD_TBL_IO, - task_data->id.pid, - task_data->id.tgid, - task_data->id.comm, - - task_data->io.bio_bytes_read, - task_data->io.bio_bytes_write, - task_data->io.iowait_us, - task_data->io.hang_count, - task_data->io.bio_err_count); - return; } static void output_task_metrics_cpu(struct task_data *task_data) @@ -153,9 +98,7 @@ static void output_task_metrics(void *ctx, int cpu, void *data, __u32 size) report_task_metrics(task_data); - if (flags & TASK_PROBE_THREAD_IO) { - output_task_metrics_io(task_data); - } else if (flags & TASK_PROBE_THREAD_CPU) { + if (flags & TASK_PROBE_THREAD_CPU) { output_task_metrics_cpu(task_data); } (void)fflush(stdout); @@ -178,25 +121,6 @@ static int load_task_create_pb(struct bpf_prog_s* prog, int fd) return 0; } -static int load_task_io_prog(struct bpf_prog_s *prog, char is_load) -{ - int ret = 0; - - __LOAD_PROBE(thread_io, err, is_load); - if (is_load) { - prog->skels[prog->num].skel = thread_io_skel; - prog->skels[prog->num].fn = (skel_destroy_fn)thread_io_bpf__destroy; - prog->num++; - - ret = load_task_create_pb(prog, GET_MAP_FD(thread_io, g_task_output)); - } - - return ret; -err: - UNLOAD(thread_io); - return -1; -} - static int load_task_cpu_prog(struct bpf_prog_s *prog, char is_load) { int ret = 0; @@ -219,7 +143,7 @@ err: struct bpf_prog_s* load_task_bpf_prog(struct probe_params *args) { struct bpf_prog_s *prog; - char is_load_io, is_load_cpu; + char is_load_cpu; g_args = args; prog = alloc_bpf_prog(); @@ -227,13 +151,8 @@ struct bpf_prog_s* load_task_bpf_prog(struct probe_params *args) return NULL; } - is_load_io = is_load_probe(args, TASK_PROBE_THREAD_IO); is_load_cpu = is_load_probe(args, TASK_PROBE_THREAD_CPU); - if (load_task_io_prog(prog, is_load_io)) { - goto err; - } - if (load_task_cpu_prog(prog, is_load_cpu)) { goto err; }