|
@@ -1722,6 +1722,60 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
|
|
|
#endif
|
|
|
}
|
|
|
|
|
|
+static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
|
|
|
+{
|
|
|
+ struct kvm_vcpu_arch *vcpu = &v->arch;
|
|
|
+ struct pvclock_vcpu_time_info guest_hv_clock;
|
|
|
+
|
|
|
+ if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
|
|
|
+ &guest_hv_clock, sizeof(guest_hv_clock))))
|
|
|
+ return;
|
|
|
+
|
|
|
+ /* This VCPU is paused, but it's legal for a guest to read another
|
|
|
+ * VCPU's kvmclock, so we really have to follow the specification where
|
|
|
+ * it says that version is odd if data is being modified, and even after
|
|
|
+ * it is consistent.
|
|
|
+ *
|
|
|
+ * Version field updates must be kept separate. This is because
|
|
|
+ * kvm_write_guest_cached might use a "rep movs" instruction, and
|
|
|
+ * writes within a string instruction are weakly ordered. So there
|
|
|
+ * are three writes overall.
|
|
|
+ *
|
|
|
+ * As a small optimization, only write the version field in the first
|
|
|
+ * and third write. The vcpu->pv_time cache is still valid, because the
|
|
|
+ * version field is the first in the struct.
|
|
|
+ */
|
|
|
+ BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
|
|
|
+
|
|
|
+ vcpu->hv_clock.version = guest_hv_clock.version + 1;
|
|
|
+ kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
|
|
|
+ &vcpu->hv_clock,
|
|
|
+ sizeof(vcpu->hv_clock.version));
|
|
|
+
|
|
|
+ smp_wmb();
|
|
|
+
|
|
|
+ /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
|
|
|
+ vcpu->hv_clock.flags |= (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
|
|
|
+
|
|
|
+ if (vcpu->pvclock_set_guest_stopped_request) {
|
|
|
+ vcpu->hv_clock.flags |= PVCLOCK_GUEST_STOPPED;
|
|
|
+ vcpu->pvclock_set_guest_stopped_request = false;
|
|
|
+ }
|
|
|
+
|
|
|
+ trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
|
|
|
+
|
|
|
+ kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
|
|
|
+ &vcpu->hv_clock,
|
|
|
+ sizeof(vcpu->hv_clock));
|
|
|
+
|
|
|
+ smp_wmb();
|
|
|
+
|
|
|
+ vcpu->hv_clock.version++;
|
|
|
+ kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
|
|
|
+ &vcpu->hv_clock,
|
|
|
+ sizeof(vcpu->hv_clock.version));
|
|
|
+}
|
|
|
+
|
|
|
static int kvm_guest_time_update(struct kvm_vcpu *v)
|
|
|
{
|
|
|
unsigned long flags, tgt_tsc_khz;
|
|
@@ -1729,7 +1783,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
|
|
struct kvm_arch *ka = &v->kvm->arch;
|
|
|
s64 kernel_ns;
|
|
|
u64 tsc_timestamp, host_tsc;
|
|
|
- struct pvclock_vcpu_time_info guest_hv_clock;
|
|
|
u8 pvclock_flags;
|
|
|
bool use_master_clock;
|
|
|
|
|
@@ -1783,8 +1836,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
|
|
|
|
|
local_irq_restore(flags);
|
|
|
|
|
|
- if (!vcpu->pv_time_enabled)
|
|
|
- return 0;
|
|
|
+ /* With all the info we got, fill in the values */
|
|
|
|
|
|
if (kvm_has_tsc_control)
|
|
|
tgt_tsc_khz = kvm_scale_tsc(v, tgt_tsc_khz);
|
|
@@ -1796,64 +1848,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
|
|
|
vcpu->hw_tsc_khz = tgt_tsc_khz;
|
|
|
}
|
|
|
|
|
|
- /* With all the info we got, fill in the values */
|
|
|
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
|
|
|
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
|
|
|
vcpu->last_guest_tsc = tsc_timestamp;
|
|
|
|
|
|
- if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
|
|
|
- &guest_hv_clock, sizeof(guest_hv_clock))))
|
|
|
- return 0;
|
|
|
-
|
|
|
- /* This VCPU is paused, but it's legal for a guest to read another
|
|
|
- * VCPU's kvmclock, so we really have to follow the specification where
|
|
|
- * it says that version is odd if data is being modified, and even after
|
|
|
- * it is consistent.
|
|
|
- *
|
|
|
- * Version field updates must be kept separate. This is because
|
|
|
- * kvm_write_guest_cached might use a "rep movs" instruction, and
|
|
|
- * writes within a string instruction are weakly ordered. So there
|
|
|
- * are three writes overall.
|
|
|
- *
|
|
|
- * As a small optimization, only write the version field in the first
|
|
|
- * and third write. The vcpu->pv_time cache is still valid, because the
|
|
|
- * version field is the first in the struct.
|
|
|
- */
|
|
|
- BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
|
|
|
-
|
|
|
- vcpu->hv_clock.version = guest_hv_clock.version + 1;
|
|
|
- kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
|
|
|
- &vcpu->hv_clock,
|
|
|
- sizeof(vcpu->hv_clock.version));
|
|
|
-
|
|
|
- smp_wmb();
|
|
|
-
|
|
|
- /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
|
|
|
- pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
|
|
|
-
|
|
|
- if (vcpu->pvclock_set_guest_stopped_request) {
|
|
|
- pvclock_flags |= PVCLOCK_GUEST_STOPPED;
|
|
|
- vcpu->pvclock_set_guest_stopped_request = false;
|
|
|
- }
|
|
|
-
|
|
|
/* If the host uses TSC clocksource, then it is stable */
|
|
|
+ pvclock_flags = 0;
|
|
|
if (use_master_clock)
|
|
|
pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
|
|
|
|
|
|
vcpu->hv_clock.flags = pvclock_flags;
|
|
|
|
|
|
- trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
|
|
|
-
|
|
|
- kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
|
|
|
- &vcpu->hv_clock,
|
|
|
- sizeof(vcpu->hv_clock));
|
|
|
-
|
|
|
- smp_wmb();
|
|
|
+ if (!vcpu->pv_time_enabled)
|
|
|
+ return 0;
|
|
|
|
|
|
- vcpu->hv_clock.version++;
|
|
|
- kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
|
|
|
- &vcpu->hv_clock,
|
|
|
- sizeof(vcpu->hv_clock.version));
|
|
|
+ kvm_setup_pvclock_page(v);
|
|
|
return 0;
|
|
|
}
|
|
|
|