|
@@ -0,0 +1,281 @@
|
|
|
+// SPDX-License-Identifier: GPL-2.0
|
|
|
+
|
|
|
+#include <linux/version.h>
|
|
|
+#include <linux/ptrace.h>
|
|
|
+#include <uapi/linux/bpf.h>
|
|
|
+#include "bpf_helpers.h"
|
|
|
+
|
|
|
+/*
|
|
|
+ * The CPU number, cstate number and pstate number are based
|
|
|
+ * on 96boards Hikey with octa CA53 CPUs.
|
|
|
+ *
|
|
|
+ * Every CPU have three idle states for cstate:
|
|
|
+ * WFI, CPU_OFF, CLUSTER_OFF
|
|
|
+ *
|
|
|
+ * Every CPU have 5 operating points:
|
|
|
+ * 208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
|
|
|
+ *
|
|
|
+ * This code is based on these assumption and other platforms
|
|
|
+ * need to adjust these definitions.
|
|
|
+ */
|
|
|
+#define MAX_CPU 8
|
|
|
+#define MAX_PSTATE_ENTRIES 5
|
|
|
+#define MAX_CSTATE_ENTRIES 3
|
|
|
+
|
|
|
+static int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
|
|
|
+
|
|
|
+/*
|
|
|
+ * my_map structure is used to record cstate and pstate index and
|
|
|
+ * timestamp (Idx, Ts), when new event incoming we need to update
|
|
|
+ * combination for new state index and timestamp (Idx`, Ts`).
|
|
|
+ *
|
|
|
+ * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
|
|
|
+ * interval for the previous state: Duration(Idx) = Ts` - Ts.
|
|
|
+ *
|
|
|
+ * Every CPU has one below array for recording state index and
|
|
|
+ * timestamp, and record for cstate and pstate saperately:
|
|
|
+ *
|
|
|
+ * +--------------------------+
|
|
|
+ * | cstate timestamp |
|
|
|
+ * +--------------------------+
|
|
|
+ * | cstate index |
|
|
|
+ * +--------------------------+
|
|
|
+ * | pstate timestamp |
|
|
|
+ * +--------------------------+
|
|
|
+ * | pstate index |
|
|
|
+ * +--------------------------+
|
|
|
+ */
|
|
|
+#define MAP_OFF_CSTATE_TIME 0
|
|
|
+#define MAP_OFF_CSTATE_IDX 1
|
|
|
+#define MAP_OFF_PSTATE_TIME 2
|
|
|
+#define MAP_OFF_PSTATE_IDX 3
|
|
|
+#define MAP_OFF_NUM 4
|
|
|
+
|
|
|
+struct bpf_map_def SEC("maps") my_map = {
|
|
|
+ .type = BPF_MAP_TYPE_ARRAY,
|
|
|
+ .key_size = sizeof(u32),
|
|
|
+ .value_size = sizeof(u64),
|
|
|
+ .max_entries = MAX_CPU * MAP_OFF_NUM,
|
|
|
+};
|
|
|
+
|
|
|
+/* cstate_duration records duration time for every idle state per CPU */
|
|
|
+struct bpf_map_def SEC("maps") cstate_duration = {
|
|
|
+ .type = BPF_MAP_TYPE_ARRAY,
|
|
|
+ .key_size = sizeof(u32),
|
|
|
+ .value_size = sizeof(u64),
|
|
|
+ .max_entries = MAX_CPU * MAX_CSTATE_ENTRIES,
|
|
|
+};
|
|
|
+
|
|
|
+/* pstate_duration records duration time for every operating point per CPU */
|
|
|
+struct bpf_map_def SEC("maps") pstate_duration = {
|
|
|
+ .type = BPF_MAP_TYPE_ARRAY,
|
|
|
+ .key_size = sizeof(u32),
|
|
|
+ .value_size = sizeof(u64),
|
|
|
+ .max_entries = MAX_CPU * MAX_PSTATE_ENTRIES,
|
|
|
+};
|
|
|
+
|
|
|
+/*
|
|
|
+ * The trace events for cpu_idle and cpu_frequency are taken from:
|
|
|
+ * /sys/kernel/debug/tracing/events/power/cpu_idle/format
|
|
|
+ * /sys/kernel/debug/tracing/events/power/cpu_frequency/format
|
|
|
+ *
|
|
|
+ * These two events have same format, so define one common structure.
|
|
|
+ */
|
|
|
+struct cpu_args {
|
|
|
+ u64 pad;
|
|
|
+ u32 state;
|
|
|
+ u32 cpu_id;
|
|
|
+};
|
|
|
+
|
|
|
+/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
|
|
|
+static u32 find_cpu_pstate_idx(u32 frequency)
|
|
|
+{
|
|
|
+ u32 i;
|
|
|
+
|
|
|
+ for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
|
|
|
+ if (frequency == cpu_opps[i])
|
|
|
+ return i;
|
|
|
+ }
|
|
|
+
|
|
|
+ return i;
|
|
|
+}
|
|
|
+
|
|
|
+SEC("tracepoint/power/cpu_idle")
|
|
|
+int bpf_prog1(struct cpu_args *ctx)
|
|
|
+{
|
|
|
+ u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
|
|
|
+ u32 key, cpu, pstate_idx;
|
|
|
+ u64 *val;
|
|
|
+
|
|
|
+ if (ctx->cpu_id > MAX_CPU)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ cpu = ctx->cpu_id;
|
|
|
+
|
|
|
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
|
|
|
+ cts = bpf_map_lookup_elem(&my_map, &key);
|
|
|
+ if (!cts)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
|
|
|
+ cstate = bpf_map_lookup_elem(&my_map, &key);
|
|
|
+ if (!cstate)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
|
|
|
+ pts = bpf_map_lookup_elem(&my_map, &key);
|
|
|
+ if (!pts)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
|
|
|
+ pstate = bpf_map_lookup_elem(&my_map, &key);
|
|
|
+ if (!pstate)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ prev_state = *cstate;
|
|
|
+ *cstate = ctx->state;
|
|
|
+
|
|
|
+ if (!*cts) {
|
|
|
+ *cts = bpf_ktime_get_ns();
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ cur_ts = bpf_ktime_get_ns();
|
|
|
+ delta = cur_ts - *cts;
|
|
|
+ *cts = cur_ts;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * When state doesn't equal to (u32)-1, the cpu will enter
|
|
|
+ * one idle state; for this case we need to record interval
|
|
|
+ * for the pstate.
|
|
|
+ *
|
|
|
+ * OPP2
|
|
|
+ * +---------------------+
|
|
|
+ * OPP1 | |
|
|
|
+ * ---------+ |
|
|
|
+ * | Idle state
|
|
|
+ * +---------------
|
|
|
+ *
|
|
|
+ * |<- pstate duration ->|
|
|
|
+ * ^ ^
|
|
|
+ * pts cur_ts
|
|
|
+ */
|
|
|
+ if (ctx->state != (u32)-1) {
|
|
|
+
|
|
|
+ /* record pstate after have first cpu_frequency event */
|
|
|
+ if (!*pts)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ delta = cur_ts - *pts;
|
|
|
+
|
|
|
+ pstate_idx = find_cpu_pstate_idx(*pstate);
|
|
|
+ if (pstate_idx >= MAX_PSTATE_ENTRIES)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
|
|
|
+ val = bpf_map_lookup_elem(&pstate_duration, &key);
|
|
|
+ if (val)
|
|
|
+ __sync_fetch_and_add((long *)val, delta);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * When state equal to (u32)-1, the cpu just exits from one
|
|
|
+ * specific idle state; for this case we need to record
|
|
|
+ * interval for the pstate.
|
|
|
+ *
|
|
|
+ * OPP2
|
|
|
+ * -----------+
|
|
|
+ * | OPP1
|
|
|
+ * | +-----------
|
|
|
+ * | Idle state |
|
|
|
+ * +---------------------+
|
|
|
+ *
|
|
|
+ * |<- cstate duration ->|
|
|
|
+ * ^ ^
|
|
|
+ * cts cur_ts
|
|
|
+ */
|
|
|
+ } else {
|
|
|
+
|
|
|
+ key = cpu * MAX_CSTATE_ENTRIES + prev_state;
|
|
|
+ val = bpf_map_lookup_elem(&cstate_duration, &key);
|
|
|
+ if (val)
|
|
|
+ __sync_fetch_and_add((long *)val, delta);
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Update timestamp for pstate as new start time */
|
|
|
+ if (*pts)
|
|
|
+ *pts = cur_ts;
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+SEC("tracepoint/power/cpu_frequency")
|
|
|
+int bpf_prog2(struct cpu_args *ctx)
|
|
|
+{
|
|
|
+ u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
|
|
|
+ u32 key, cpu, pstate_idx;
|
|
|
+ u64 *val;
|
|
|
+
|
|
|
+ cpu = ctx->cpu_id;
|
|
|
+
|
|
|
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
|
|
|
+ pts = bpf_map_lookup_elem(&my_map, &key);
|
|
|
+ if (!pts)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
|
|
|
+ pstate = bpf_map_lookup_elem(&my_map, &key);
|
|
|
+ if (!pstate)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
|
|
|
+ cstate = bpf_map_lookup_elem(&my_map, &key);
|
|
|
+ if (!cstate)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ prev_state = *pstate;
|
|
|
+ *pstate = ctx->state;
|
|
|
+
|
|
|
+ if (!*pts) {
|
|
|
+ *pts = bpf_ktime_get_ns();
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ cur_ts = bpf_ktime_get_ns();
|
|
|
+ delta = cur_ts - *pts;
|
|
|
+ *pts = cur_ts;
|
|
|
+
|
|
|
+ /* When CPU is in idle, bail out to skip pstate statistics */
|
|
|
+ if (*cstate != (u32)(-1))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * The cpu changes to another different OPP (in below diagram
|
|
|
+ * change frequency from OPP3 to OPP1), need recording interval
|
|
|
+ * for previous frequency OPP3 and update timestamp as start
|
|
|
+ * time for new frequency OPP1.
|
|
|
+ *
|
|
|
+ * OPP3
|
|
|
+ * +---------------------+
|
|
|
+ * OPP2 | |
|
|
|
+ * ---------+ |
|
|
|
+ * | OPP1
|
|
|
+ * +---------------
|
|
|
+ *
|
|
|
+ * |<- pstate duration ->|
|
|
|
+ * ^ ^
|
|
|
+ * pts cur_ts
|
|
|
+ */
|
|
|
+ pstate_idx = find_cpu_pstate_idx(*pstate);
|
|
|
+ if (pstate_idx >= MAX_PSTATE_ENTRIES)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
|
|
|
+ val = bpf_map_lookup_elem(&pstate_duration, &key);
|
|
|
+ if (val)
|
|
|
+ __sync_fetch_and_add((long *)val, delta);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+char _license[] SEC("license") = "GPL";
|
|
|
+u32 _version SEC("version") = LINUX_VERSION_CODE;
|