|
@@ -127,6 +127,32 @@ static __always_inline unsigned long long rdtsc(void)
|
|
|
return EAX_EDX_VAL(val, low, high);
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * rdtsc_ordered() - read the current TSC in program order
|
|
|
+ *
|
|
|
+ * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer.
|
|
|
+ * It is ordered like a load to a global in-memory counter. It should
|
|
|
+ * be impossible to observe non-monotonic rdtsc_unordered() behavior
|
|
|
+ * across multiple CPUs as long as the TSC is synced.
|
|
|
+ */
|
|
|
+static __always_inline unsigned long long rdtsc_ordered(void)
|
|
|
+{
|
|
|
+ /*
|
|
|
+ * The RDTSC instruction is not ordered relative to memory
|
|
|
+ * access. The Intel SDM and the AMD APM are both vague on this
|
|
|
+ * point, but empirically an RDTSC instruction can be
|
|
|
+ * speculatively executed before prior loads. An RDTSC
|
|
|
+ * immediately after an appropriate barrier appears to be
|
|
|
+ * ordered as a normal load, that is, it provides the same
|
|
|
+ * ordering guarantees as reading from a global memory location
|
|
|
+ * that some other imaginary CPU is updating continuously with a
|
|
|
+ * time stamp.
|
|
|
+ */
|
|
|
+ alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
|
|
|
+ "lfence", X86_FEATURE_LFENCE_RDTSC);
|
|
|
+ return rdtsc();
|
|
|
+}
|
|
|
+
|
|
|
static inline unsigned long long native_read_pmc(int counter)
|
|
|
{
|
|
|
DECLARE_ARGS(val, low, high);
|