Browse Source

KVM: MMU: document clear_spte_count

Document it to Documentation/virtual/kvm/mmu.txt

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Xiao Guangrong 12 years ago
parent
commit
accaefe07d
3 changed files with 23 additions and 3 deletions
  1. 5 0
      Documentation/virtual/kvm/mmu.txt
  2. 4 0
      arch/x86/include/asm/kvm_host.h
  3. 14 3
      arch/x86/kvm/mmu.c

+ 5 - 0
Documentation/virtual/kvm/mmu.txt

@@ -210,6 +210,11 @@ Shadow pages contain the following information:
     A bitmap indicating which sptes in spt point (directly or indirectly) at
     A bitmap indicating which sptes in spt point (directly or indirectly) at
     pages that may be unsynchronized.  Used to quickly locate all unsychronized
     pages that may be unsynchronized.  Used to quickly locate all unsychronized
     pages reachable from a given page.
     pages reachable from a given page.
+  clear_spte_count:
+    Only present on 32-bit hosts, where a 64-bit spte cannot be written
+    atomically.  The reader uses this while running out of the MMU lock
+    to detect in-progress updates and retry them until the writer has
+    finished the write.
 
 
 Reverse map
 Reverse map
 ===========
 ===========

+ 4 - 0
arch/x86/include/asm/kvm_host.h

@@ -226,6 +226,10 @@ struct kvm_mmu_page {
 	DECLARE_BITMAP(unsync_child_bitmap, 512);
 	DECLARE_BITMAP(unsync_child_bitmap, 512);
 
 
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
+	/*
+	 * Used out of the mmu-lock to avoid reading spte values while an
+	 * update is in progress; see the comments in __get_spte_lockless().
+	 */
 	int clear_spte_count;
 	int clear_spte_count;
 #endif
 #endif
 
 

+ 14 - 3
arch/x86/kvm/mmu.c

@@ -466,9 +466,20 @@ static u64 __update_clear_spte_slow(u64 *sptep, u64 spte)
 /*
 /*
  * The idea using the light way get the spte on x86_32 guest is from
  * The idea using the light way get the spte on x86_32 guest is from
  * gup_get_pte(arch/x86/mm/gup.c).
  * gup_get_pte(arch/x86/mm/gup.c).
- * The difference is we can not catch the spte tlb flush if we leave
- * guest mode, so we emulate it by increase clear_spte_count when spte
- * is cleared.
+ *
+ * An spte tlb flush may be pending, because kvm_set_pte_rmapp
+ * coalesces them and we are running out of the MMU lock.  Therefore
+ * we need to protect against in-progress updates of the spte.
+ *
+ * Reading the spte while an update is in progress may get the old value
+ * for the high part of the spte.  The race is fine for a present->non-present
+ * change (because the high part of the spte is ignored for non-present spte),
+ * but for a present->present change we must reread the spte.
+ *
+ * All such changes are done in two steps (present->non-present and
+ * non-present->present), hence it is enough to count the number of
+ * present->non-present updates: if it changed while reading the spte,
+ * we might have hit the race.  This is done using clear_spte_count.
  */
  */
 static u64 __get_spte_lockless(u64 *sptep)
 static u64 __get_spte_lockless(u64 *sptep)
 {
 {