|
@@ -7,6 +7,7 @@
|
|
|
* zoned VM statistics
|
|
|
* Copyright (C) 2006 Silicon Graphics, Inc.,
|
|
|
* Christoph Lameter <christoph@lameter.com>
|
|
|
+ * Copyright (C) 2008-2014 Christoph Lameter
|
|
|
*/
|
|
|
#include <linux/fs.h>
|
|
|
#include <linux/mm.h>
|
|
@@ -14,6 +15,7 @@
|
|
|
#include <linux/module.h>
|
|
|
#include <linux/slab.h>
|
|
|
#include <linux/cpu.h>
|
|
|
+#include <linux/cpumask.h>
|
|
|
#include <linux/vmstat.h>
|
|
|
#include <linux/sched.h>
|
|
|
#include <linux/math64.h>
|
|
@@ -419,13 +421,22 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item)
|
|
|
EXPORT_SYMBOL(dec_zone_page_state);
|
|
|
#endif
|
|
|
|
|
|
-static inline void fold_diff(int *diff)
|
|
|
+
|
|
|
+/*
|
|
|
+ * Fold a differential into the global counters.
|
|
|
+ * Returns the number of counters updated.
|
|
|
+ */
|
|
|
+static int fold_diff(int *diff)
|
|
|
{
|
|
|
int i;
|
|
|
+ int changes = 0;
|
|
|
|
|
|
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
|
|
|
- if (diff[i])
|
|
|
+ if (diff[i]) {
|
|
|
atomic_long_add(diff[i], &vm_stat[i]);
|
|
|
+ changes++;
|
|
|
+ }
|
|
|
+ return changes;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -441,12 +452,15 @@ static inline void fold_diff(int *diff)
|
|
|
* statistics in the remote zone struct as well as the global cachelines
|
|
|
* with the global counters. These could cause remote node cache line
|
|
|
* bouncing and will have to be only done when necessary.
|
|
|
+ *
|
|
|
+ * The function returns the number of global counters updated.
|
|
|
*/
|
|
|
-static void refresh_cpu_vm_stats(void)
|
|
|
+static int refresh_cpu_vm_stats(void)
|
|
|
{
|
|
|
struct zone *zone;
|
|
|
int i;
|
|
|
int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
|
|
|
+ int changes = 0;
|
|
|
|
|
|
for_each_populated_zone(zone) {
|
|
|
struct per_cpu_pageset __percpu *p = zone->pageset;
|
|
@@ -486,15 +500,17 @@ static void refresh_cpu_vm_stats(void)
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
-
|
|
|
if (__this_cpu_dec_return(p->expire))
|
|
|
continue;
|
|
|
|
|
|
- if (__this_cpu_read(p->pcp.count))
|
|
|
+ if (__this_cpu_read(p->pcp.count)) {
|
|
|
drain_zone_pages(zone, this_cpu_ptr(&p->pcp));
|
|
|
+ changes++;
|
|
|
+ }
|
|
|
#endif
|
|
|
}
|
|
|
- fold_diff(global_diff);
|
|
|
+ changes += fold_diff(global_diff);
|
|
|
+ return changes;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1239,20 +1255,108 @@ static const struct file_operations proc_vmstat_file_operations = {
|
|
|
#ifdef CONFIG_SMP
|
|
|
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
|
|
|
int sysctl_stat_interval __read_mostly = HZ;
|
|
|
+static cpumask_var_t cpu_stat_off;
|
|
|
|
|
|
static void vmstat_update(struct work_struct *w)
|
|
|
{
|
|
|
- refresh_cpu_vm_stats();
|
|
|
- schedule_delayed_work(this_cpu_ptr(&vmstat_work),
|
|
|
+ if (refresh_cpu_vm_stats())
|
|
|
+ /*
|
|
|
+ * Counters were updated so we expect more updates
|
|
|
+ * to occur in the future. Keep on running the
|
|
|
+ * update worker thread.
|
|
|
+ */
|
|
|
+ schedule_delayed_work(this_cpu_ptr(&vmstat_work),
|
|
|
+ round_jiffies_relative(sysctl_stat_interval));
|
|
|
+ else {
|
|
|
+ /*
|
|
|
+ * We did not update any counters so the app may be in
|
|
|
+ * a mode where it does not cause counter updates.
|
|
|
+ * We may be uselessly running vmstat_update.
|
|
|
+ * Defer the checking for differentials to the
|
|
|
+ * shepherd thread on a different processor.
|
|
|
+ */
|
|
|
+ int r;
|
|
|
+ /*
|
|
|
+ * Shepherd work thread does not race since it never
|
|
|
+ * changes the bit if its zero but the cpu
|
|
|
+ * online / off line code may race if
|
|
|
+ * worker threads are still allowed during
|
|
|
+ * shutdown / startup.
|
|
|
+ */
|
|
|
+ r = cpumask_test_and_set_cpu(smp_processor_id(),
|
|
|
+ cpu_stat_off);
|
|
|
+ VM_BUG_ON(r);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Check if the diffs for a certain cpu indicate that
|
|
|
+ * an update is needed.
|
|
|
+ */
|
|
|
+static bool need_update(int cpu)
|
|
|
+{
|
|
|
+ struct zone *zone;
|
|
|
+
|
|
|
+ for_each_populated_zone(zone) {
|
|
|
+ struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
|
|
|
+
|
|
|
+ BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
|
|
|
+ /*
|
|
|
+ * The fast way of checking if there are any vmstat diffs.
|
|
|
+ * This works because the diffs are byte sized items.
|
|
|
+ */
|
|
|
+ if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS))
|
|
|
+ return true;
|
|
|
+
|
|
|
+ }
|
|
|
+ return false;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+/*
|
|
|
+ * Shepherd worker thread that checks the
|
|
|
+ * differentials of processors that have their worker
|
|
|
+ * threads for vm statistics updates disabled because of
|
|
|
+ * inactivity.
|
|
|
+ */
|
|
|
+static void vmstat_shepherd(struct work_struct *w);
|
|
|
+
|
|
|
+static DECLARE_DELAYED_WORK(shepherd, vmstat_shepherd);
|
|
|
+
|
|
|
+static void vmstat_shepherd(struct work_struct *w)
|
|
|
+{
|
|
|
+ int cpu;
|
|
|
+
|
|
|
+ get_online_cpus();
|
|
|
+ /* Check processors whose vmstat worker threads have been disabled */
|
|
|
+ for_each_cpu(cpu, cpu_stat_off)
|
|
|
+ if (need_update(cpu) &&
|
|
|
+ cpumask_test_and_clear_cpu(cpu, cpu_stat_off))
|
|
|
+
|
|
|
+ schedule_delayed_work_on(cpu, &per_cpu(vmstat_work, cpu),
|
|
|
+ __round_jiffies_relative(sysctl_stat_interval, cpu));
|
|
|
+
|
|
|
+ put_online_cpus();
|
|
|
+
|
|
|
+ schedule_delayed_work(&shepherd,
|
|
|
round_jiffies_relative(sysctl_stat_interval));
|
|
|
+
|
|
|
}
|
|
|
|
|
|
-static void start_cpu_timer(int cpu)
|
|
|
+static void __init start_shepherd_timer(void)
|
|
|
{
|
|
|
- struct delayed_work *work = &per_cpu(vmstat_work, cpu);
|
|
|
+ int cpu;
|
|
|
+
|
|
|
+ for_each_possible_cpu(cpu)
|
|
|
+ INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu),
|
|
|
+ vmstat_update);
|
|
|
+
|
|
|
+ if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL))
|
|
|
+ BUG();
|
|
|
+ cpumask_copy(cpu_stat_off, cpu_online_mask);
|
|
|
|
|
|
- INIT_DEFERRABLE_WORK(work, vmstat_update);
|
|
|
- schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
|
|
|
+ schedule_delayed_work(&shepherd,
|
|
|
+ round_jiffies_relative(sysctl_stat_interval));
|
|
|
}
|
|
|
|
|
|
static void vmstat_cpu_dead(int node)
|
|
@@ -1283,17 +1387,17 @@ static int vmstat_cpuup_callback(struct notifier_block *nfb,
|
|
|
case CPU_ONLINE:
|
|
|
case CPU_ONLINE_FROZEN:
|
|
|
refresh_zone_stat_thresholds();
|
|
|
- start_cpu_timer(cpu);
|
|
|
node_set_state(cpu_to_node(cpu), N_CPU);
|
|
|
+ cpumask_set_cpu(cpu, cpu_stat_off);
|
|
|
break;
|
|
|
case CPU_DOWN_PREPARE:
|
|
|
case CPU_DOWN_PREPARE_FROZEN:
|
|
|
cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu));
|
|
|
- per_cpu(vmstat_work, cpu).work.func = NULL;
|
|
|
+ cpumask_clear_cpu(cpu, cpu_stat_off);
|
|
|
break;
|
|
|
case CPU_DOWN_FAILED:
|
|
|
case CPU_DOWN_FAILED_FROZEN:
|
|
|
- start_cpu_timer(cpu);
|
|
|
+ cpumask_set_cpu(cpu, cpu_stat_off);
|
|
|
break;
|
|
|
case CPU_DEAD:
|
|
|
case CPU_DEAD_FROZEN:
|
|
@@ -1313,15 +1417,10 @@ static struct notifier_block vmstat_notifier =
|
|
|
static int __init setup_vmstat(void)
|
|
|
{
|
|
|
#ifdef CONFIG_SMP
|
|
|
- int cpu;
|
|
|
-
|
|
|
cpu_notifier_register_begin();
|
|
|
__register_cpu_notifier(&vmstat_notifier);
|
|
|
|
|
|
- for_each_online_cpu(cpu) {
|
|
|
- start_cpu_timer(cpu);
|
|
|
- node_set_state(cpu_to_node(cpu), N_CPU);
|
|
|
- }
|
|
|
+ start_shepherd_timer();
|
|
|
cpu_notifier_register_done();
|
|
|
#endif
|
|
|
#ifdef CONFIG_PROC_FS
|