9 years ago · 0052af4411
--- a/include/linux/proportions.h
+++ b/include/linux/proportions.h
@@ -1,137 +0,0 @@
 
															-/*
														
 
															- * FLoating proportions
														
 
															- *
														
 
															- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
														
 
															- *
														
 
															- * This file contains the public data structure and API definitions.
														
 
															- */
														
 
															-
														
 
															-#ifndef _LINUX_PROPORTIONS_H
														
 
															-#define _LINUX_PROPORTIONS_H
														
 
															-
														
 
															-#include <linux/percpu_counter.h>
														
 
															-#include <linux/spinlock.h>
														
 
															-#include <linux/mutex.h>
														
 
															-#include <linux/gfp.h>
														
 
															-
														
 
															-struct prop_global {
														
 
															-	/*
														
 
															-	 * The period over which we differentiate
														
 
															-	 *
														
 
															-	 *   period = 2^shift
														
 
															-	 */
														
 
															-	int shift;
														
 
															-	/*
														
 
															-	 * The total event counter aka 'time'.
														
 
															-	 *
														
 
															-	 * Treated as an unsigned long; the lower 'shift - 1' bits are the
														
 
															-	 * counter bits, the remaining upper bits the period counter.
														
 
															-	 */
														
 
															-	struct percpu_counter events;
														
 
															-};
														
 
															-
														
 
															-/*
														
 
															- * global proportion descriptor
														
 
															- *
														
 
															- * this is needed to consistently flip prop_global structures.
														
 
															- */
														
 
															-struct prop_descriptor {
														
 
															-	int index;
														
 
															-	struct prop_global pg[2];
														
 
															-	struct mutex mutex;		/* serialize the prop_global switch */
														
 
															-};
														
 
															-
														
 
															-int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp);
														
 
															-void prop_change_shift(struct prop_descriptor *pd, int new_shift);
														
 
															-
														
 
															-/*
														
 
															- * ----- PERCPU ------
														
 
															- */
														
 
															-
														
 
															-struct prop_local_percpu {
														
 
															-	/*
														
 
															-	 * the local events counter
														
 
															-	 */
														
 
															-	struct percpu_counter events;
														
 
															-
														
 
															-	/*
														
 
															-	 * snapshot of the last seen global state
														
 
															-	 */
														
 
															-	int shift;
														
 
															-	unsigned long period;
														
 
															-	raw_spinlock_t lock;		/* protect the snapshot state */
														
 
															-};
														
 
															-
														
 
															-int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp);
														
 
															-void prop_local_destroy_percpu(struct prop_local_percpu *pl);
														
 
															-void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl);
														
 
															-void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl,
														
 
															-		long *numerator, long *denominator);
														
 
															-
														
 
															-static inline
														
 
															-void prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
														
 
															-{
														
 
															-	unsigned long flags;
														
 
															-
														
 
															-	local_irq_save(flags);
														
 
															-	__prop_inc_percpu(pd, pl);
														
 
															-	local_irq_restore(flags);
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * Limit the time part in order to ensure there are some bits left for the
														
 
															- * cycle counter and fraction multiply.
														
 
															- */
														
 
															-#if BITS_PER_LONG == 32
														
 
															-#define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
														
 
															-#else
														
 
															-#define PROP_MAX_SHIFT (BITS_PER_LONG/2)
														
 
															-#endif
														
 
															-
														
 
															-#define PROP_FRAC_SHIFT		(BITS_PER_LONG - PROP_MAX_SHIFT - 1)
														
 
															-#define PROP_FRAC_BASE		(1UL << PROP_FRAC_SHIFT)
														
 
															-
														
 
															-void __prop_inc_percpu_max(struct prop_descriptor *pd,
														
 
															-			   struct prop_local_percpu *pl, long frac);
														
 
															-
														
 
															-
														
 
															-/*
														
 
															- * ----- SINGLE ------
														
 
															- */
														
 
															-
														
 
															-struct prop_local_single {
														
 
															-	/*
														
 
															-	 * the local events counter
														
 
															-	 */
														
 
															-	unsigned long events;
														
 
															-
														
 
															-	/*
														
 
															-	 * snapshot of the last seen global state
														
 
															-	 * and a lock protecting this state
														
 
															-	 */
														
 
															-	unsigned long period;
														
 
															-	int shift;
														
 
															-	raw_spinlock_t lock;		/* protect the snapshot state */
														
 
															-};
														
 
															-
														
 
															-#define INIT_PROP_LOCAL_SINGLE(name)			\
														
 
															-{	.lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock),	\
														
 
															-}
														
 
															-
														
 
															-int prop_local_init_single(struct prop_local_single *pl);
														
 
															-void prop_local_destroy_single(struct prop_local_single *pl);
														
 
															-void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl);
														
 
															-void prop_fraction_single(struct prop_descriptor *pd, struct prop_local_single *pl,
														
 
															-		long *numerator, long *denominator);
														
 
															-
														
 
															-static inline
														
 
															-void prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
														
 
															-{
														
 
															-	unsigned long flags;
														
 
															-
														
 
															-	local_irq_save(flags);
														
 
															-	__prop_inc_single(pd, pl);
														
 
															-	local_irq_restore(flags);
														
 
															-}
														
 
															-
														
 
															-#endif /* _LINUX_PROPORTIONS_H */
														
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -40,7 +40,6 @@ struct sched_param {
 
															 #include <linux/pid.h>
														
 
															 #include <linux/percpu.h>
														
 
															 #include <linux/topology.h>
														
 
															-#include <linux/proportions.h>
														
 
															 #include <linux/seccomp.h>
														
 
															 #include <linux/rcupdate.h>
														
 
															 #include <linux/rculist.h>
														
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -23,7 +23,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 
															 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
														
 
															 	 idr.o int_sqrt.o extable.o \
														
 
															 	 sha1.o md5.o irq_regs.o argv_split.o \
														
 
															-	 proportions.o flex_proportions.o ratelimit.o show_mem.o \
														
 
															+	 flex_proportions.o ratelimit.o show_mem.o \
														
 
															 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
														
 
															 	 earlycpio.o seq_buf.o nmi_backtrace.o
														
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -1,407 +0,0 @@
 
															-/*
														
 
															- * Floating proportions
														
 
															- *
														
 
															- *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
														
 
															- *
														
 
															- * Description:
														
 
															- *
														
 
															- * The floating proportion is a time derivative with an exponentially decaying
														
 
															- * history:
														
 
															- *
														
 
															- *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
														
 
															- *
														
 
															- * Where j is an element from {prop_local}, x_{j} is j's number of events,
														
 
															- * and i the time period over which the differential is taken. So d/dt_{-i} is
														
 
															- * the differential over the i-th last period.
														
 
															- *
														
 
															- * The decaying history gives smooth transitions. The time differential carries
														
 
															- * the notion of speed.
														
 
															- *
														
 
															- * The denominator is 2^(1+i) because we want the series to be normalised, ie.
														
 
															- *
														
 
															- *   \Sum_{i=0} 1/2^(1+i) = 1
														
 
															- *
														
 
															- * Further more, if we measure time (t) in the same events as x; so that:
														
 
															- *
														
 
															- *   t = \Sum_{j} x_{j}
														
 
															- *
														
 
															- * we get that:
														
 
															- *
														
 
															- *   \Sum_{j} p_{j} = 1
														
 
															- *
														
 
															- * Writing this in an iterative fashion we get (dropping the 'd's):
														
 
															- *
														
 
															- *   if (++x_{j}, ++t > period)
														
 
															- *     t /= 2;
														
 
															- *     for_each (j)
														
 
															- *       x_{j} /= 2;
														
 
															- *
														
 
															- * so that:
														
 
															- *
														
 
															- *   p_{j} = x_{j} / t;
														
 
															- *
														
 
															- * We optimize away the '/= 2' for the global time delta by noting that:
														
 
															- *
														
 
															- *   if (++t > period) t /= 2:
														
 
															- *
														
 
															- * Can be approximated by:
														
 
															- *
														
 
															- *   period/2 + (++t % period/2)
														
 
															- *
														
 
															- * [ Furthermore, when we choose period to be 2^n it can be written in terms of
														
 
															- *   binary operations and wraparound artefacts disappear. ]
														
 
															- *
														
 
															- * Also note that this yields a natural counter of the elapsed periods:
														
 
															- *
														
 
															- *   c = t / (period/2)
														
 
															- *
														
 
															- * [ Its monotonic increasing property can be applied to mitigate the wrap-
														
 
															- *   around issue. ]
														
 
															- *
														
 
															- * This allows us to do away with the loop over all prop_locals on each period
														
 
															- * expiration. By remembering the period count under which it was last accessed
														
 
															- * as c_{j}, we can obtain the number of 'missed' cycles from:
														
 
															- *
														
 
															- *   c - c_{j}
														
 
															- *
														
 
															- * We can then lazily catch up to the global period count every time we are
														
 
															- * going to use x_{j}, by doing:
														
 
															- *
														
 
															- *   x_{j} /= 2^(c - c_{j}), c_{j} = c
														
 
															- */
														
 
															-
														
 
															-#include <linux/proportions.h>
														
 
															-#include <linux/rcupdate.h>
														
 
															-
														
 
															-int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp)
														
 
															-{
														
 
															-	int err;
														
 
															-
														
 
															-	if (shift > PROP_MAX_SHIFT)
														
 
															-		shift = PROP_MAX_SHIFT;
														
 
															-
														
 
															-	pd->index = 0;
														
 
															-	pd->pg[0].shift = shift;
														
 
															-	mutex_init(&pd->mutex);
														
 
															-	err = percpu_counter_init(&pd->pg[0].events, 0, gfp);
														
 
															-	if (err)
														
 
															-		goto out;
														
 
															-
														
 
															-	err = percpu_counter_init(&pd->pg[1].events, 0, gfp);
														
 
															-	if (err)
														
 
															-		percpu_counter_destroy(&pd->pg[0].events);
														
 
															-
														
 
															-out:
														
 
															-	return err;
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * We have two copies, and flip between them to make it seem like an atomic
														
 
															- * update. The update is not really atomic wrt the events counter, but
														
 
															- * it is internally consistent with the bit layout depending on shift.
														
 
															- *
														
 
															- * We copy the events count, move the bits around and flip the index.
														
 
															- */
														
 
															-void prop_change_shift(struct prop_descriptor *pd, int shift)
														
 
															-{
														
 
															-	int index;
														
 
															-	int offset;
														
 
															-	u64 events;
														
 
															-	unsigned long flags;
														
 
															-
														
 
															-	if (shift > PROP_MAX_SHIFT)
														
 
															-		shift = PROP_MAX_SHIFT;
														
 
															-
														
 
															-	mutex_lock(&pd->mutex);
														
 
															-
														
 
															-	index = pd->index ^ 1;
														
 
															-	offset = pd->pg[pd->index].shift - shift;
														
 
															-	if (!offset)
														
 
															-		goto out;
														
 
															-
														
 
															-	pd->pg[index].shift = shift;
														
 
															-
														
 
															-	local_irq_save(flags);
														
 
															-	events = percpu_counter_sum(&pd->pg[pd->index].events);
														
 
															-	if (offset < 0)
														
 
															-		events <<= -offset;
														
 
															-	else
														
 
															-		events >>= offset;
														
 
															-	percpu_counter_set(&pd->pg[index].events, events);
														
 
															-
														
 
															-	/*
														
 
															-	 * ensure the new pg is fully written before the switch
														
 
															-	 */
														
 
															-	smp_wmb();
														
 
															-	pd->index = index;
														
 
															-	local_irq_restore(flags);
														
 
															-
														
 
															-	synchronize_rcu();
														
 
															-
														
 
															-out:
														
 
															-	mutex_unlock(&pd->mutex);
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * wrap the access to the data in an rcu_read_lock() section;
														
 
															- * this is used to track the active references.
														
 
															- */
														
 
															-static struct prop_global *prop_get_global(struct prop_descriptor *pd)
														
 
															-__acquires(RCU)
														
 
															-{
														
 
															-	int index;
														
 
															-
														
 
															-	rcu_read_lock();
														
 
															-	index = pd->index;
														
 
															-	/*
														
 
															-	 * match the wmb from vcd_flip()
														
 
															-	 */
														
 
															-	smp_rmb();
														
 
															-	return &pd->pg[index];
														
 
															-}
														
 
															-
														
 
															-static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
														
 
															-__releases(RCU)
														
 
															-{
														
 
															-	rcu_read_unlock();
														
 
															-}
														
 
															-
														
 
															-static void
														
 
															-prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
														
 
															-{
														
 
															-	int offset = *pl_shift - new_shift;
														
 
															-
														
 
															-	if (!offset)
														
 
															-		return;
														
 
															-
														
 
															-	if (offset < 0)
														
 
															-		*pl_period <<= -offset;
														
 
															-	else
														
 
															-		*pl_period >>= offset;
														
 
															-
														
 
															-	*pl_shift = new_shift;
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * PERCPU
														
 
															- */
														
 
															-
														
 
															-#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
														
 
															-
														
 
															-int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp)
														
 
															-{
														
 
															-	raw_spin_lock_init(&pl->lock);
														
 
															-	pl->shift = 0;
														
 
															-	pl->period = 0;
														
 
															-	return percpu_counter_init(&pl->events, 0, gfp);
														
 
															-}
														
 
															-
														
 
															-void prop_local_destroy_percpu(struct prop_local_percpu *pl)
														
 
															-{
														
 
															-	percpu_counter_destroy(&pl->events);
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * Catch up with missed period expirations.
														
 
															- *
														
 
															- *   until (c_{j} == c)
														
 
															- *     x_{j} -= x_{j}/2;
														
 
															- *     c_{j}++;
														
 
															- */
														
 
															-static
														
 
															-void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
														
 
															-{
														
 
															-	unsigned long period = 1UL << (pg->shift - 1);
														
 
															-	unsigned long period_mask = ~(period - 1);
														
 
															-	unsigned long global_period;
														
 
															-	unsigned long flags;
														
 
															-
														
 
															-	global_period = percpu_counter_read(&pg->events);
														
 
															-	global_period &= period_mask;
														
 
															-
														
 
															-	/*
														
 
															-	 * Fast path - check if the local and global period count still match
														
 
															-	 * outside of the lock.
														
 
															-	 */
														
 
															-	if (pl->period == global_period)
														
 
															-		return;
														
 
															-
														
 
															-	raw_spin_lock_irqsave(&pl->lock, flags);
														
 
															-	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
														
 
															-
														
 
															-	/*
														
 
															-	 * For each missed period, we half the local counter.
														
 
															-	 * basically:
														
 
															-	 *   pl->events >> (global_period - pl->period);
														
 
															-	 */
														
 
															-	period = (global_period - pl->period) >> (pg->shift - 1);
														
 
															-	if (period < BITS_PER_LONG) {
														
 
															-		s64 val = percpu_counter_read(&pl->events);
														
 
															-
														
 
															-		if (val < (nr_cpu_ids * PROP_BATCH))
														
 
															-			val = percpu_counter_sum(&pl->events);
														
 
															-
														
 
															-		__percpu_counter_add(&pl->events, -val + (val >> period),
														
 
															-					PROP_BATCH);
														
 
															-	} else
														
 
															-		percpu_counter_set(&pl->events, 0);
														
 
															-
														
 
															-	pl->period = global_period;
														
 
															-	raw_spin_unlock_irqrestore(&pl->lock, flags);
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- *   ++x_{j}, ++t
														
 
															- */
														
 
															-void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
														
 
															-{
														
 
															-	struct prop_global *pg = prop_get_global(pd);
														
 
															-
														
 
															-	prop_norm_percpu(pg, pl);
														
 
															-	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
														
 
															-	percpu_counter_add(&pg->events, 1);
														
 
															-	prop_put_global(pd, pg);
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * identical to __prop_inc_percpu, except that it limits this pl's fraction to
														
 
															- * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
														
 
															- */
														
 
															-void __prop_inc_percpu_max(struct prop_descriptor *pd,
														
 
															-			   struct prop_local_percpu *pl, long frac)
														
 
															-{
														
 
															-	struct prop_global *pg = prop_get_global(pd);
														
 
															-
														
 
															-	prop_norm_percpu(pg, pl);
														
 
															-
														
 
															-	if (unlikely(frac != PROP_FRAC_BASE)) {
														
 
															-		unsigned long period_2 = 1UL << (pg->shift - 1);
														
 
															-		unsigned long counter_mask = period_2 - 1;
														
 
															-		unsigned long global_count;
														
 
															-		long numerator, denominator;
														
 
															-
														
 
															-		numerator = percpu_counter_read_positive(&pl->events);
														
 
															-		global_count = percpu_counter_read(&pg->events);
														
 
															-		denominator = period_2 + (global_count & counter_mask);
														
 
															-
														
 
															-		if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
														
 
															-			goto out_put;
														
 
															-	}
														
 
															-
														
 
															-	percpu_counter_add(&pl->events, 1);
														
 
															-	percpu_counter_add(&pg->events, 1);
														
 
															-
														
 
															-out_put:
														
 
															-	prop_put_global(pd, pg);
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * Obtain a fraction of this proportion
														
 
															- *
														
 
															- *   p_{j} = x_{j} / (period/2 + t % period/2)
														
 
															- */
														
 
															-void prop_fraction_percpu(struct prop_descriptor *pd,
														
 
															-		struct prop_local_percpu *pl,
														
 
															-		long *numerator, long *denominator)
														
 
															-{
														
 
															-	struct prop_global *pg = prop_get_global(pd);
														
 
															-	unsigned long period_2 = 1UL << (pg->shift - 1);
														
 
															-	unsigned long counter_mask = period_2 - 1;
														
 
															-	unsigned long global_count;
														
 
															-
														
 
															-	prop_norm_percpu(pg, pl);
														
 
															-	*numerator = percpu_counter_read_positive(&pl->events);
														
 
															-
														
 
															-	global_count = percpu_counter_read(&pg->events);
														
 
															-	*denominator = period_2 + (global_count & counter_mask);
														
 
															-
														
 
															-	prop_put_global(pd, pg);
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * SINGLE
														
 
															- */
														
 
															-
														
 
															-int prop_local_init_single(struct prop_local_single *pl)
														
 
															-{
														
 
															-	raw_spin_lock_init(&pl->lock);
														
 
															-	pl->shift = 0;
														
 
															-	pl->period = 0;
														
 
															-	pl->events = 0;
														
 
															-	return 0;
														
 
															-}
														
 
															-
														
 
															-void prop_local_destroy_single(struct prop_local_single *pl)
														
 
															-{
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * Catch up with missed period expirations.
														
 
															- */
														
 
															-static
														
 
															-void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
														
 
															-{
														
 
															-	unsigned long period = 1UL << (pg->shift - 1);
														
 
															-	unsigned long period_mask = ~(period - 1);
														
 
															-	unsigned long global_period;
														
 
															-	unsigned long flags;
														
 
															-
														
 
															-	global_period = percpu_counter_read(&pg->events);
														
 
															-	global_period &= period_mask;
														
 
															-
														
 
															-	/*
														
 
															-	 * Fast path - check if the local and global period count still match
														
 
															-	 * outside of the lock.
														
 
															-	 */
														
 
															-	if (pl->period == global_period)
														
 
															-		return;
														
 
															-
														
 
															-	raw_spin_lock_irqsave(&pl->lock, flags);
														
 
															-	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
														
 
															-	/*
														
 
															-	 * For each missed period, we half the local counter.
														
 
															-	 */
														
 
															-	period = (global_period - pl->period) >> (pg->shift - 1);
														
 
															-	if (likely(period < BITS_PER_LONG))
														
 
															-		pl->events >>= period;
														
 
															-	else
														
 
															-		pl->events = 0;
														
 
															-	pl->period = global_period;
														
 
															-	raw_spin_unlock_irqrestore(&pl->lock, flags);
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- *   ++x_{j}, ++t
														
 
															- */
														
 
															-void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
														
 
															-{
														
 
															-	struct prop_global *pg = prop_get_global(pd);
														
 
															-
														
 
															-	prop_norm_single(pg, pl);
														
 
															-	pl->events++;
														
 
															-	percpu_counter_add(&pg->events, 1);
														
 
															-	prop_put_global(pd, pg);
														
 
															-}
														
 
															-
														
 
															-/*
														
 
															- * Obtain a fraction of this proportion
														
 
															- *
														
 
															- *   p_{j} = x_{j} / (period/2 + t % period/2)
														
 
															- */
														
 
															-void prop_fraction_single(struct prop_descriptor *pd,
														
 
															-	       	struct prop_local_single *pl,
														
 
															-		long *numerator, long *denominator)
														
 
															-{
														
 
															-	struct prop_global *pg = prop_get_global(pd);
														
 
															-	unsigned long period_2 = 1UL << (pg->shift - 1);
														
 
															-	unsigned long counter_mask = period_2 - 1;
														
 
															-	unsigned long global_count;
														
 
															-
														
 
															-	prop_norm_single(pg, pl);
														
 
															-	*numerator = pl->events;
														
 
															-
														
 
															-	global_count = percpu_counter_read(&pg->events);
														
 
															-	*denominator = period_2 + (global_count & counter_mask);
														
 
															-
														
 
															-	prop_put_global(pd, pg);
														
 
															-}