|
@@ -61,8 +61,8 @@ unsigned int has_epb;
|
|
unsigned int units = 1000000; /* MHz etc */
|
|
unsigned int units = 1000000; /* MHz etc */
|
|
unsigned int genuine_intel;
|
|
unsigned int genuine_intel;
|
|
unsigned int has_invariant_tsc;
|
|
unsigned int has_invariant_tsc;
|
|
-unsigned int do_nehalem_platform_info;
|
|
|
|
-unsigned int do_nehalem_turbo_ratio_limit;
|
|
|
|
|
|
+unsigned int do_nhm_platform_info;
|
|
|
|
+unsigned int do_nhm_turbo_ratio_limit;
|
|
unsigned int do_ivt_turbo_ratio_limit;
|
|
unsigned int do_ivt_turbo_ratio_limit;
|
|
unsigned int extra_msr_offset32;
|
|
unsigned int extra_msr_offset32;
|
|
unsigned int extra_msr_offset64;
|
|
unsigned int extra_msr_offset64;
|
|
@@ -284,7 +284,7 @@ void print_header(void)
|
|
outp += sprintf(outp, " CPU");
|
|
outp += sprintf(outp, " CPU");
|
|
if (has_aperf)
|
|
if (has_aperf)
|
|
outp += sprintf(outp, " Avg_MHz");
|
|
outp += sprintf(outp, " Avg_MHz");
|
|
- if (do_nhm_cstates)
|
|
|
|
|
|
+ if (has_aperf)
|
|
outp += sprintf(outp, " %%Busy");
|
|
outp += sprintf(outp, " %%Busy");
|
|
if (has_aperf)
|
|
if (has_aperf)
|
|
outp += sprintf(outp, " Bzy_MHz");
|
|
outp += sprintf(outp, " Bzy_MHz");
|
|
@@ -340,7 +340,7 @@ void print_header(void)
|
|
outp += sprintf(outp, " PKG_%%");
|
|
outp += sprintf(outp, " PKG_%%");
|
|
if (do_rapl & RAPL_DRAM_PERF_STATUS)
|
|
if (do_rapl & RAPL_DRAM_PERF_STATUS)
|
|
outp += sprintf(outp, " RAM_%%");
|
|
outp += sprintf(outp, " RAM_%%");
|
|
- } else {
|
|
|
|
|
|
+ } else if (do_rapl && rapl_joules) {
|
|
if (do_rapl & RAPL_PKG)
|
|
if (do_rapl & RAPL_PKG)
|
|
outp += sprintf(outp, " Pkg_J");
|
|
outp += sprintf(outp, " Pkg_J");
|
|
if (do_rapl & RAPL_CORES)
|
|
if (do_rapl & RAPL_CORES)
|
|
@@ -460,25 +460,25 @@ int format_counters(struct thread_data *t, struct core_data *c,
|
|
outp += sprintf(outp, "%8d", t->cpu_id);
|
|
outp += sprintf(outp, "%8d", t->cpu_id);
|
|
}
|
|
}
|
|
|
|
|
|
- /* AvgMHz */
|
|
|
|
|
|
+ /* Avg_MHz */
|
|
if (has_aperf)
|
|
if (has_aperf)
|
|
outp += sprintf(outp, "%8.0f",
|
|
outp += sprintf(outp, "%8.0f",
|
|
1.0 / units * t->aperf / interval_float);
|
|
1.0 / units * t->aperf / interval_float);
|
|
|
|
|
|
- /* %c0 */
|
|
|
|
- if (do_nhm_cstates) {
|
|
|
|
|
|
+ /* %Busy */
|
|
|
|
+ if (has_aperf) {
|
|
if (!skip_c0)
|
|
if (!skip_c0)
|
|
outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc);
|
|
outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc);
|
|
else
|
|
else
|
|
outp += sprintf(outp, "********");
|
|
outp += sprintf(outp, "********");
|
|
}
|
|
}
|
|
|
|
|
|
- /* BzyMHz */
|
|
|
|
|
|
+ /* Bzy_MHz */
|
|
if (has_aperf)
|
|
if (has_aperf)
|
|
outp += sprintf(outp, "%8.0f",
|
|
outp += sprintf(outp, "%8.0f",
|
|
1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
|
|
1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
|
|
|
|
|
|
- /* TSC */
|
|
|
|
|
|
+ /* TSC_MHz */
|
|
outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float);
|
|
outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float);
|
|
|
|
|
|
/* SMI */
|
|
/* SMI */
|
|
@@ -564,7 +564,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
|
|
outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
|
|
outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
|
|
if (do_rapl & RAPL_DRAM_PERF_STATUS)
|
|
if (do_rapl & RAPL_DRAM_PERF_STATUS)
|
|
outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
|
|
outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
|
|
- } else {
|
|
|
|
|
|
+ } else if (do_rapl && rapl_joules) {
|
|
if (do_rapl & RAPL_PKG)
|
|
if (do_rapl & RAPL_PKG)
|
|
outp += sprintf(outp, fmt8,
|
|
outp += sprintf(outp, fmt8,
|
|
p->energy_pkg * rapl_energy_units);
|
|
p->energy_pkg * rapl_energy_units);
|
|
@@ -581,8 +581,8 @@ int format_counters(struct thread_data *t, struct core_data *c,
|
|
outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
|
|
outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
|
|
if (do_rapl & RAPL_DRAM_PERF_STATUS)
|
|
if (do_rapl & RAPL_DRAM_PERF_STATUS)
|
|
outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
|
|
outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
|
|
- outp += sprintf(outp, fmt8, interval_float);
|
|
|
|
|
|
|
|
|
|
+ outp += sprintf(outp, fmt8, interval_float);
|
|
}
|
|
}
|
|
done:
|
|
done:
|
|
outp += sprintf(outp, "\n");
|
|
outp += sprintf(outp, "\n");
|
|
@@ -1022,7 +1022,7 @@ void print_verbose_header(void)
|
|
unsigned long long msr;
|
|
unsigned long long msr;
|
|
unsigned int ratio;
|
|
unsigned int ratio;
|
|
|
|
|
|
- if (!do_nehalem_platform_info)
|
|
|
|
|
|
+ if (!do_nhm_platform_info)
|
|
return;
|
|
return;
|
|
|
|
|
|
get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
|
|
get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
|
|
@@ -1135,7 +1135,7 @@ print_nhm_turbo_ratio_limits:
|
|
}
|
|
}
|
|
fprintf(stderr, ")\n");
|
|
fprintf(stderr, ")\n");
|
|
|
|
|
|
- if (!do_nehalem_turbo_ratio_limit)
|
|
|
|
|
|
+ if (!do_nhm_turbo_ratio_limit)
|
|
return;
|
|
return;
|
|
|
|
|
|
get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
|
|
get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
|
|
@@ -1462,8 +1462,7 @@ void check_dev_msr()
|
|
struct stat sb;
|
|
struct stat sb;
|
|
|
|
|
|
if (stat("/dev/cpu/0/msr", &sb))
|
|
if (stat("/dev/cpu/0/msr", &sb))
|
|
- err(-5, "no /dev/cpu/0/msr\n"
|
|
|
|
- "Try \"# modprobe msr\"");
|
|
|
|
|
|
+ err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
|
|
}
|
|
}
|
|
|
|
|
|
void check_permissions()
|
|
void check_permissions()
|
|
@@ -1496,13 +1495,27 @@ void check_permissions()
|
|
/* if all else fails, thell them to be root */
|
|
/* if all else fails, thell them to be root */
|
|
if (do_exit)
|
|
if (do_exit)
|
|
if (getuid() != 0)
|
|
if (getuid() != 0)
|
|
- warnx("Or simply run as root");
|
|
|
|
|
|
+ warnx("... or simply run as root");
|
|
|
|
|
|
if (do_exit)
|
|
if (do_exit)
|
|
exit(-6);
|
|
exit(-6);
|
|
}
|
|
}
|
|
|
|
|
|
-int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
|
|
|
|
|
|
+/*
|
|
|
|
+ * NHM adds support for additional MSRs:
|
|
|
|
+ *
|
|
|
|
+ * MSR_SMI_COUNT 0x00000034
|
|
|
|
+ *
|
|
|
|
+ * MSR_NHM_PLATFORM_INFO 0x000000ce
|
|
|
|
+ * MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2
|
|
|
|
+ *
|
|
|
|
+ * MSR_PKG_C3_RESIDENCY 0x000003f8
|
|
|
|
+ * MSR_PKG_C6_RESIDENCY 0x000003f9
|
|
|
|
+ * MSR_CORE_C3_RESIDENCY 0x000003fc
|
|
|
|
+ * MSR_CORE_C6_RESIDENCY 0x000003fd
|
|
|
|
+ *
|
|
|
|
+ */
|
|
|
|
+int has_nhm_msrs(unsigned int family, unsigned int model)
|
|
{
|
|
{
|
|
if (!genuine_intel)
|
|
if (!genuine_intel)
|
|
return 0;
|
|
return 0;
|
|
@@ -1529,13 +1542,27 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
|
|
case 0x3D: /* BDW */
|
|
case 0x3D: /* BDW */
|
|
case 0x4F: /* BDX */
|
|
case 0x4F: /* BDX */
|
|
case 0x56: /* BDX-DE */
|
|
case 0x56: /* BDX-DE */
|
|
- return 1;
|
|
|
|
case 0x2E: /* Nehalem-EX Xeon - Beckton */
|
|
case 0x2E: /* Nehalem-EX Xeon - Beckton */
|
|
case 0x2F: /* Westmere-EX Xeon - Eagleton */
|
|
case 0x2F: /* Westmere-EX Xeon - Eagleton */
|
|
|
|
+ return 1;
|
|
default:
|
|
default:
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
+int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
|
|
|
|
+{
|
|
|
|
+ if (!has_nhm_msrs(family, model))
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ switch (model) {
|
|
|
|
+ /* Nehalem compatible, but do not include turbo-ratio limit support */
|
|
|
|
+ case 0x2E: /* Nehalem-EX Xeon - Beckton */
|
|
|
|
+ case 0x2F: /* Westmere-EX Xeon - Eagleton */
|
|
|
|
+ return 0;
|
|
|
|
+ default:
|
|
|
|
+ return 1;
|
|
|
|
+ }
|
|
|
|
+}
|
|
int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
|
|
int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
|
|
{
|
|
{
|
|
if (!genuine_intel)
|
|
if (!genuine_intel)
|
|
@@ -1994,8 +2021,15 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * SNB adds support for additional MSRs:
|
|
|
|
+ *
|
|
|
|
+ * MSR_PKG_C7_RESIDENCY 0x000003fa
|
|
|
|
+ * MSR_CORE_C7_RESIDENCY 0x000003fe
|
|
|
|
+ * MSR_PKG_C2_RESIDENCY 0x0000060d
|
|
|
|
+ */
|
|
|
|
|
|
-int is_snb(unsigned int family, unsigned int model)
|
|
|
|
|
|
+int has_snb_msrs(unsigned int family, unsigned int model)
|
|
{
|
|
{
|
|
if (!genuine_intel)
|
|
if (!genuine_intel)
|
|
return 0;
|
|
return 0;
|
|
@@ -2017,7 +2051,14 @@ int is_snb(unsigned int family, unsigned int model)
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
-int has_c8_c9_c10(unsigned int family, unsigned int model)
|
|
|
|
|
|
+/*
|
|
|
|
+ * HSW adds support for additional MSRs:
|
|
|
|
+ *
|
|
|
|
+ * MSR_PKG_C8_RESIDENCY 0x00000630
|
|
|
|
+ * MSR_PKG_C9_RESIDENCY 0x00000631
|
|
|
|
+ * MSR_PKG_C10_RESIDENCY 0x00000632
|
|
|
|
+ */
|
|
|
|
+int has_hsw_msrs(unsigned int family, unsigned int model)
|
|
{
|
|
{
|
|
if (!genuine_intel)
|
|
if (!genuine_intel)
|
|
return 0;
|
|
return 0;
|
|
@@ -2069,7 +2110,7 @@ double slm_bclk(void)
|
|
|
|
|
|
double discover_bclk(unsigned int family, unsigned int model)
|
|
double discover_bclk(unsigned int family, unsigned int model)
|
|
{
|
|
{
|
|
- if (is_snb(family, model))
|
|
|
|
|
|
+ if (has_snb_msrs(family, model))
|
|
return 100.00;
|
|
return 100.00;
|
|
else if (is_slm(family, model))
|
|
else if (is_slm(family, model))
|
|
return slm_bclk();
|
|
return slm_bclk();
|
|
@@ -2117,7 +2158,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
|
|
}
|
|
}
|
|
|
|
|
|
/* Temperature Target MSR is Nehalem and newer only */
|
|
/* Temperature Target MSR is Nehalem and newer only */
|
|
- if (!do_nehalem_platform_info)
|
|
|
|
|
|
+ if (!do_nhm_platform_info)
|
|
goto guess;
|
|
goto guess;
|
|
|
|
|
|
if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr))
|
|
if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr))
|
|
@@ -2181,18 +2222,15 @@ void check_cpuid()
|
|
ebx = ecx = edx = 0;
|
|
ebx = ecx = edx = 0;
|
|
__get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx);
|
|
__get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx);
|
|
|
|
|
|
- if (max_level < 0x80000007)
|
|
|
|
- errx(1, "CPUID: no invariant TSC (max_level 0x%x)", max_level);
|
|
|
|
|
|
+ if (max_level >= 0x80000007) {
|
|
|
|
|
|
- /*
|
|
|
|
- * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
|
|
|
|
- * this check is valid for both Intel and AMD
|
|
|
|
- */
|
|
|
|
- __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
|
|
|
|
- has_invariant_tsc = edx & (1 << 8);
|
|
|
|
-
|
|
|
|
- if (!has_invariant_tsc)
|
|
|
|
- errx(1, "No invariant TSC");
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
|
|
|
|
+ * this check is valid for both Intel and AMD
|
|
|
|
+ */
|
|
|
|
+ __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
|
|
|
|
+ has_invariant_tsc = edx & (1 << 8);
|
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
/*
|
|
* APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
|
|
* APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
|
|
@@ -2215,15 +2253,13 @@ void check_cpuid()
|
|
if (!has_aperf)
|
|
if (!has_aperf)
|
|
errx(-1, "No APERF");
|
|
errx(-1, "No APERF");
|
|
|
|
|
|
- do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
|
|
|
|
- do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */
|
|
|
|
- do_smi = do_nhm_cstates;
|
|
|
|
- do_snb_cstates = is_snb(family, model);
|
|
|
|
- do_c8_c9_c10 = has_c8_c9_c10(family, model);
|
|
|
|
|
|
+ do_nhm_platform_info = do_nhm_cstates = do_smi = has_nhm_msrs(family, model);
|
|
|
|
+ do_snb_cstates = has_snb_msrs(family, model);
|
|
|
|
+ do_c8_c9_c10 = has_hsw_msrs(family, model);
|
|
do_slm_cstates = is_slm(family, model);
|
|
do_slm_cstates = is_slm(family, model);
|
|
bclk = discover_bclk(family, model);
|
|
bclk = discover_bclk(family, model);
|
|
|
|
|
|
- do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
|
|
|
|
|
|
+ do_nhm_turbo_ratio_limit = has_nhm_turbo_ratio_limit(family, model);
|
|
do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model);
|
|
do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model);
|
|
rapl_probe(family, model);
|
|
rapl_probe(family, model);
|
|
perf_limit_reasons_probe(family, model);
|
|
perf_limit_reasons_probe(family, model);
|