|
@@ -631,6 +631,29 @@ void pfault_fini(void)
|
|
|
static DEFINE_SPINLOCK(pfault_lock);
|
|
|
static LIST_HEAD(pfault_list);
|
|
|
|
|
|
+#define PF_COMPLETE 0x0080
|
|
|
+
|
|
|
+/*
|
|
|
+ * The mechanism of our pfault code: if Linux is running as guest, runs a user
|
|
|
+ * space process and the user space process accesses a page that the host has
|
|
|
+ * paged out we get a pfault interrupt.
|
|
|
+ *
|
|
|
+ * This allows us, within the guest, to schedule a different process. Without
|
|
|
+ * this mechanism the host would have to suspend the whole virtual cpu until
|
|
|
+ * the page has been paged in.
|
|
|
+ *
|
|
|
+ * So when we get such an interrupt then we set the state of the current task
|
|
|
+ * to uninterruptible and also set the need_resched flag. Both happens within
|
|
|
+ * interrupt context(!). If we later on want to return to user space we
|
|
|
+ * recognize the need_resched flag and then call schedule(). It's not very
|
|
|
+ * obvious how this works...
|
|
|
+ *
|
|
|
+ * Of course we have a lot of additional fun with the completion interrupt (->
|
|
|
+ * host signals that a page of a process has been paged in and the process can
|
|
|
+ * continue to run). This interrupt can arrive on any cpu and, since we have
|
|
|
+ * virtual cpus, actually appear before the interrupt that signals that a page
|
|
|
+ * is missing.
|
|
|
+ */
|
|
|
static void pfault_interrupt(struct ext_code ext_code,
|
|
|
unsigned int param32, unsigned long param64)
|
|
|
{
|
|
@@ -639,10 +662,9 @@ static void pfault_interrupt(struct ext_code ext_code,
|
|
|
pid_t pid;
|
|
|
|
|
|
/*
|
|
|
- * Get the external interruption subcode & pfault
|
|
|
- * initial/completion signal bit. VM stores this
|
|
|
- * in the 'cpu address' field associated with the
|
|
|
- * external interrupt.
|
|
|
+ * Get the external interruption subcode & pfault initial/completion
|
|
|
+ * signal bit. VM stores this in the 'cpu address' field associated
|
|
|
+ * with the external interrupt.
|
|
|
*/
|
|
|
subcode = ext_code.subcode;
|
|
|
if ((subcode & 0xff00) != __SUBCODE_MASK)
|
|
@@ -658,7 +680,7 @@ static void pfault_interrupt(struct ext_code ext_code,
|
|
|
if (!tsk)
|
|
|
return;
|
|
|
spin_lock(&pfault_lock);
|
|
|
- if (subcode & 0x0080) {
|
|
|
+ if (subcode & PF_COMPLETE) {
|
|
|
/* signal bit is set -> a page has been swapped in by VM */
|
|
|
if (tsk->thread.pfault_wait == 1) {
|
|
|
/* Initial interrupt was faster than the completion
|
|
@@ -687,8 +709,7 @@ static void pfault_interrupt(struct ext_code ext_code,
|
|
|
goto out;
|
|
|
if (tsk->thread.pfault_wait == 1) {
|
|
|
/* Already on the list with a reference: put to sleep */
|
|
|
- __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
|
|
|
- set_tsk_need_resched(tsk);
|
|
|
+ goto block;
|
|
|
} else if (tsk->thread.pfault_wait == -1) {
|
|
|
/* Completion interrupt was faster than the initial
|
|
|
* interrupt (pfault_wait == -1). Set pfault_wait
|
|
@@ -703,7 +724,11 @@ static void pfault_interrupt(struct ext_code ext_code,
|
|
|
get_task_struct(tsk);
|
|
|
tsk->thread.pfault_wait = 1;
|
|
|
list_add(&tsk->thread.list, &pfault_list);
|
|
|
- __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
|
|
|
+block:
|
|
|
+ /* Since this must be a userspace fault, there
|
|
|
+ * is no kernel task state to trample. Rely on the
|
|
|
+ * return to userspace schedule() to block. */
|
|
|
+ __set_current_state(TASK_UNINTERRUPTIBLE);
|
|
|
set_tsk_need_resched(tsk);
|
|
|
}
|
|
|
}
|