|
@@ -398,30 +398,27 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Number of OOM killer invocations (including memcg OOM killer).
|
|
|
- * Primarily used by PM freezer to check for potential races with
|
|
|
- * OOM killed frozen task.
|
|
|
+ * Number of OOM victims in flight
|
|
|
*/
|
|
|
-static atomic_t oom_kills = ATOMIC_INIT(0);
|
|
|
+static atomic_t oom_victims = ATOMIC_INIT(0);
|
|
|
+static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
|
|
|
|
|
|
-int oom_kills_count(void)
|
|
|
-{
|
|
|
- return atomic_read(&oom_kills);
|
|
|
-}
|
|
|
-
|
|
|
-void note_oom_kill(void)
|
|
|
-{
|
|
|
- atomic_inc(&oom_kills);
|
|
|
-}
|
|
|
+bool oom_killer_disabled __read_mostly;
|
|
|
+static DECLARE_RWSEM(oom_sem);
|
|
|
|
|
|
/**
|
|
|
* mark_tsk_oom_victim - marks the given taks as OOM victim.
|
|
|
* @tsk: task to mark
|
|
|
+ *
|
|
|
+ * Has to be called with oom_sem taken for read and never after
|
|
|
+ * oom has been disabled already.
|
|
|
*/
|
|
|
void mark_tsk_oom_victim(struct task_struct *tsk)
|
|
|
{
|
|
|
- set_tsk_thread_flag(tsk, TIF_MEMDIE);
|
|
|
-
|
|
|
+ WARN_ON(oom_killer_disabled);
|
|
|
+ /* OOM killer might race with memcg OOM */
|
|
|
+ if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
|
|
|
+ return;
|
|
|
/*
|
|
|
* Make sure that the task is woken up from uninterruptible sleep
|
|
|
* if it is frozen because OOM killer wouldn't be able to free
|
|
@@ -429,14 +426,70 @@ void mark_tsk_oom_victim(struct task_struct *tsk)
|
|
|
* that TIF_MEMDIE tasks should be ignored.
|
|
|
*/
|
|
|
__thaw_task(tsk);
|
|
|
+ atomic_inc(&oom_victims);
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
* unmark_oom_victim - unmarks the current task as OOM victim.
|
|
|
+ *
|
|
|
+ * Wakes up all waiters in oom_killer_disable()
|
|
|
*/
|
|
|
void unmark_oom_victim(void)
|
|
|
{
|
|
|
- clear_thread_flag(TIF_MEMDIE);
|
|
|
+ if (!test_and_clear_thread_flag(TIF_MEMDIE))
|
|
|
+ return;
|
|
|
+
|
|
|
+ down_read(&oom_sem);
|
|
|
+ /*
|
|
|
+ * There is no need to signal the lasst oom_victim if there
|
|
|
+ * is nobody who cares.
|
|
|
+ */
|
|
|
+ if (!atomic_dec_return(&oom_victims) && oom_killer_disabled)
|
|
|
+ wake_up_all(&oom_victims_wait);
|
|
|
+ up_read(&oom_sem);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * oom_killer_disable - disable OOM killer
|
|
|
+ *
|
|
|
+ * Forces all page allocations to fail rather than trigger OOM killer.
|
|
|
+ * Will block and wait until all OOM victims are killed.
|
|
|
+ *
|
|
|
+ * The function cannot be called when there are runnable user tasks because
|
|
|
+ * the userspace would see unexpected allocation failures as a result. Any
|
|
|
+ * new usage of this function should be consulted with MM people.
|
|
|
+ *
|
|
|
+ * Returns true if successful and false if the OOM killer cannot be
|
|
|
+ * disabled.
|
|
|
+ */
|
|
|
+bool oom_killer_disable(void)
|
|
|
+{
|
|
|
+ /*
|
|
|
+ * Make sure to not race with an ongoing OOM killer
|
|
|
+ * and that the current is not the victim.
|
|
|
+ */
|
|
|
+ down_write(&oom_sem);
|
|
|
+ if (test_thread_flag(TIF_MEMDIE)) {
|
|
|
+ up_write(&oom_sem);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ oom_killer_disabled = true;
|
|
|
+ up_write(&oom_sem);
|
|
|
+
|
|
|
+ wait_event(oom_victims_wait, !atomic_read(&oom_victims));
|
|
|
+
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * oom_killer_enable - enable OOM killer
|
|
|
+ */
|
|
|
+void oom_killer_enable(void)
|
|
|
+{
|
|
|
+ down_write(&oom_sem);
|
|
|
+ oom_killer_disabled = false;
|
|
|
+ up_write(&oom_sem);
|
|
|
}
|
|
|
|
|
|
#define K(x) ((x) << (PAGE_SHIFT-10))
|
|
@@ -637,7 +690,7 @@ void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask)
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * out_of_memory - kill the "best" process when we run out of memory
|
|
|
+ * __out_of_memory - kill the "best" process when we run out of memory
|
|
|
* @zonelist: zonelist pointer
|
|
|
* @gfp_mask: memory allocation flags
|
|
|
* @order: amount of memory being requested as a power of 2
|
|
@@ -649,7 +702,7 @@ void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask)
|
|
|
* OR try to be smart about which process to kill. Note that we
|
|
|
* don't have to be perfect here, we just have to be good.
|
|
|
*/
|
|
|
-void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
|
|
|
+static void __out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
|
|
|
int order, nodemask_t *nodemask, bool force_kill)
|
|
|
{
|
|
|
const nodemask_t *mpol_mask;
|
|
@@ -718,6 +771,32 @@ out:
|
|
|
schedule_timeout_killable(1);
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * out_of_memory - tries to invoke OOM killer.
|
|
|
+ * @zonelist: zonelist pointer
|
|
|
+ * @gfp_mask: memory allocation flags
|
|
|
+ * @order: amount of memory being requested as a power of 2
|
|
|
+ * @nodemask: nodemask passed to page allocator
|
|
|
+ * @force_kill: true if a task must be killed, even if others are exiting
|
|
|
+ *
|
|
|
+ * invokes __out_of_memory if the OOM is not disabled by oom_killer_disable()
|
|
|
+ * when it returns false. Otherwise returns true.
|
|
|
+ */
|
|
|
+bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
|
|
|
+ int order, nodemask_t *nodemask, bool force_kill)
|
|
|
+{
|
|
|
+ bool ret = false;
|
|
|
+
|
|
|
+ down_read(&oom_sem);
|
|
|
+ if (!oom_killer_disabled) {
|
|
|
+ __out_of_memory(zonelist, gfp_mask, order, nodemask, force_kill);
|
|
|
+ ret = true;
|
|
|
+ }
|
|
|
+ up_read(&oom_sem);
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* The pagefault handler calls here because it is out of memory, so kill a
|
|
|
* memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a
|
|
@@ -727,12 +806,25 @@ void pagefault_out_of_memory(void)
|
|
|
{
|
|
|
struct zonelist *zonelist;
|
|
|
|
|
|
+ down_read(&oom_sem);
|
|
|
if (mem_cgroup_oom_synchronize(true))
|
|
|
- return;
|
|
|
+ goto unlock;
|
|
|
|
|
|
zonelist = node_zonelist(first_memory_node, GFP_KERNEL);
|
|
|
if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) {
|
|
|
- out_of_memory(NULL, 0, 0, NULL, false);
|
|
|
+ if (!oom_killer_disabled)
|
|
|
+ __out_of_memory(NULL, 0, 0, NULL, false);
|
|
|
+ else
|
|
|
+ /*
|
|
|
+ * There shouldn't be any user tasks runable while the
|
|
|
+ * OOM killer is disabled so the current task has to
|
|
|
+ * be a racing OOM victim for which oom_killer_disable()
|
|
|
+ * is waiting for.
|
|
|
+ */
|
|
|
+ WARN_ON(test_thread_flag(TIF_MEMDIE));
|
|
|
+
|
|
|
oom_zonelist_unlock(zonelist, GFP_KERNEL);
|
|
|
}
|
|
|
+unlock:
|
|
|
+ up_read(&oom_sem);
|
|
|
}
|