Browse Source

[NET_SCHED]: Use ktime as clocksource

Get rid of the manual clock source selection mess and use ktime. Also
use a scalar representation, which allows to clean up pkt_sched.h a bit
more and results in less ktime_to_ns() calls in most cases.

The PSCHED_US2JIFFIE/PSCHED_JIFFIE2US macros are implemented quite
inefficient by this patch, following patches will convert all qdiscs
to hrtimers and get rid of them entirely.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Patrick McHardy 19 years ago
parent
commit
641b9e0e8b
5 changed files with 19 additions and 315 deletions
  1. 13 156
      include/net/pkt_sched.h
  2. 1 0
      kernel/hrtimer.c
  3. 0 56
      net/sched/Kconfig
  4. 2 75
      net/sched/sch_api.c
  5. 3 28
      net/sched/sch_hfsc.c

+ 13 - 156
include/net/pkt_sched.h

@@ -2,6 +2,7 @@
 #define __NET_PKT_SCHED_H
 #define __NET_PKT_SCHED_H
 
 
 #include <linux/jiffies.h>
 #include <linux/jiffies.h>
+#include <linux/ktime.h>
 #include <net/sch_generic.h>
 #include <net/sch_generic.h>
 
 
 struct qdisc_walker
 struct qdisc_walker
@@ -37,176 +38,32 @@ static inline void *qdisc_priv(struct Qdisc *q)
    The things are not so bad, because we may use artifical
    The things are not so bad, because we may use artifical
    clock evaluated by integration of network data flow
    clock evaluated by integration of network data flow
    in the most critical places.
    in the most critical places.
-
-   Note: we do not use fastgettimeofday.
-   The reason is that, when it is not the same thing as
-   gettimeofday, it returns invalid timestamp, which is
-   not updated, when net_bh is active.
  */
  */
 
 
-/* General note about internal clock.
-
-   Any clock source returns time intervals, measured in units
-   close to 1usec. With source CONFIG_NET_SCH_CLK_GETTIMEOFDAY it is precisely
-   microseconds, otherwise something close but different chosen to minimize
-   arithmetic cost. Ratio usec/internal untis in form nominator/denominator
-   may be read from /proc/net/psched.
- */
-
-
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-
-typedef struct timeval	psched_time_t;
-typedef long		psched_tdiff_t;
-
-#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp))
-#define PSCHED_US2JIFFIE(usecs) usecs_to_jiffies(usecs)
-#define PSCHED_JIFFIE2US(delay) jiffies_to_usecs(delay)
-
-#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
-
 typedef u64	psched_time_t;
 typedef u64	psched_time_t;
 typedef long	psched_tdiff_t;
 typedef long	psched_tdiff_t;
 
 
-#ifdef CONFIG_NET_SCH_CLK_JIFFIES
-
-#if HZ < 96
-#define PSCHED_JSCALE 14
-#elif HZ >= 96 && HZ < 192
-#define PSCHED_JSCALE 13
-#elif HZ >= 192 && HZ < 384
-#define PSCHED_JSCALE 12
-#elif HZ >= 384 && HZ < 768
-#define PSCHED_JSCALE 11
-#elif HZ >= 768
-#define PSCHED_JSCALE 10
-#endif
-
-#define PSCHED_GET_TIME(stamp) ((stamp) = (get_jiffies_64()<<PSCHED_JSCALE))
-#define PSCHED_US2JIFFIE(delay) (((delay)+(1<<PSCHED_JSCALE)-1)>>PSCHED_JSCALE)
-#define PSCHED_JIFFIE2US(delay) ((delay)<<PSCHED_JSCALE)
-
-#endif /* CONFIG_NET_SCH_CLK_JIFFIES */
-#ifdef CONFIG_NET_SCH_CLK_CPU
-#include <asm/timex.h>
-
-extern psched_tdiff_t psched_clock_per_hz;
-extern int psched_clock_scale;
-extern psched_time_t psched_time_base;
-extern cycles_t psched_time_mark;
-
-#define PSCHED_GET_TIME(stamp)						\
-do {									\
-	cycles_t cur = get_cycles();					\
-	if (sizeof(cycles_t) == sizeof(u32)) {				\
-		if (cur <= psched_time_mark)				\
-			psched_time_base += 0x100000000ULL;		\
-		psched_time_mark = cur;					\
-		(stamp) = (psched_time_base + cur)>>psched_clock_scale;	\
-	} else {							\
-		(stamp) = cur>>psched_clock_scale;			\
-	}								\
-} while (0)
-#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz)
-#define PSCHED_JIFFIE2US(delay) ((delay)*psched_clock_per_hz)
-
-#endif /* CONFIG_NET_SCH_CLK_CPU */
-
-#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
-
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-#define PSCHED_TDIFF(tv1, tv2) \
-({ \
-	   int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
-	   int __delta = (tv1).tv_usec - (tv2).tv_usec; \
-	   if (__delta_sec) { \
-	           switch (__delta_sec) { \
-		   default: \
-			   __delta = 0; \
-		   case 2: \
-			   __delta += USEC_PER_SEC; \
-		   case 1: \
-			   __delta += USEC_PER_SEC; \
-	           } \
-	   } \
-	   __delta; \
-})
-
-static inline int
-psched_tod_diff(int delta_sec, int bound)
-{
-	int delta;
-
-	if (bound <= USEC_PER_SEC || delta_sec > (0x7FFFFFFF/USEC_PER_SEC)-1)
-		return bound;
-	delta = delta_sec * USEC_PER_SEC;
-	if (delta > bound || delta < 0)
-		delta = bound;
-	return delta;
-}
-
-#define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
-({ \
-	   int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
-	   int __delta = (tv1).tv_usec - (tv2).tv_usec; \
-	   switch (__delta_sec) { \
-	   default: \
-		   __delta = psched_tod_diff(__delta_sec, bound);  break; \
-	   case 2: \
-		   __delta += USEC_PER_SEC; \
-	   case 1: \
-		   __delta += USEC_PER_SEC; \
-	   case 0: \
- 		   if (__delta > bound || __delta < 0) \
- 			__delta = bound; \
-	   } \
-	   __delta; \
-})
-
-#define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \
-				(tv1).tv_sec <= (tv2).tv_sec) || \
-				 (tv1).tv_sec < (tv2).tv_sec)
-
-#define PSCHED_TADD2(tv, delta, tv_res) \
-({ \
-	   int __delta = (tv).tv_usec + (delta); \
-	   (tv_res).tv_sec = (tv).tv_sec; \
-	   while (__delta >= USEC_PER_SEC) { (tv_res).tv_sec++; __delta -= USEC_PER_SEC; } \
-	   (tv_res).tv_usec = __delta; \
-})
-
-#define PSCHED_TADD(tv, delta) \
-({ \
-	   (tv).tv_usec += (delta); \
-	   while ((tv).tv_usec >= USEC_PER_SEC) { (tv).tv_sec++; \
-		 (tv).tv_usec -= USEC_PER_SEC; } \
-})
-
-/* Set/check that time is in the "past perfect";
-   it depends on concrete representation of system time
- */
-
-#define PSCHED_SET_PASTPERFECT(t)	((t).tv_sec = 0)
-#define PSCHED_IS_PASTPERFECT(t)	((t).tv_sec == 0)
+/* Avoid doing 64 bit divide by 1000 */
+#define PSCHED_US2NS(x)			((s64)(x) << 10)
+#define PSCHED_NS2US(x)			((x) >> 10)
 
 
-#define	PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; })
+#define PSCHED_TICKS_PER_SEC		PSCHED_NS2US(NSEC_PER_SEC)
+#define PSCHED_GET_TIME(stamp) \
+	((stamp) = PSCHED_NS2US(ktime_to_ns(ktime_get())))
 
 
-#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
+#define PSCHED_US2JIFFIE(usecs)		usecs_to_jiffies(PSCHED_US2NS((usecs)) / NSEC_PER_USEC)
+#define PSCHED_JIFFIE2US(delay)		PSCHED_NS2US(jiffies_to_usecs((delay)) * NSEC_PER_USEC)
 
 
-#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2))
+#define PSCHED_TDIFF(tv1, tv2)		(long)((tv1) - (tv2))
 #define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
 #define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
-	min_t(long long, (tv1) - (tv2), bound)
-
-
-#define PSCHED_TLESS(tv1, tv2) ((tv1) < (tv2))
+					min_t(long long, (tv1) - (tv2), bound)
+#define PSCHED_TLESS(tv1, tv2)		((tv1) < (tv2))
 #define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta))
 #define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = (tv) + (delta))
-#define PSCHED_TADD(tv, delta) ((tv) += (delta))
+#define PSCHED_TADD(tv, delta)		((tv) += (delta))
 #define PSCHED_SET_PASTPERFECT(t)	((t) = 0)
 #define PSCHED_SET_PASTPERFECT(t)	((t) = 0)
 #define PSCHED_IS_PASTPERFECT(t)	((t) == 0)
 #define PSCHED_IS_PASTPERFECT(t)	((t) == 0)
 #define	PSCHED_AUDIT_TDIFF(t)
 #define	PSCHED_AUDIT_TDIFF(t)
 
 
-#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
-
 extern struct Qdisc_ops pfifo_qdisc_ops;
 extern struct Qdisc_ops pfifo_qdisc_ops;
 extern struct Qdisc_ops bfifo_qdisc_ops;
 extern struct Qdisc_ops bfifo_qdisc_ops;
 
 

+ 1 - 0
kernel/hrtimer.c

@@ -59,6 +59,7 @@ ktime_t ktime_get(void)
 
 
 	return timespec_to_ktime(now);
 	return timespec_to_ktime(now);
 }
 }
+EXPORT_SYMBOL_GPL(ktime_get);
 
 
 /**
 /**
  * ktime_get_real - get the real (wall-) time in ktime_t format
  * ktime_get_real - get the real (wall-) time in ktime_t format

+ 0 - 56
net/sched/Kconfig

@@ -46,62 +46,6 @@ config NET_SCH_FIFO
 
 
 if NET_SCHED
 if NET_SCHED
 
 
-choice
-	prompt "Packet scheduler clock source"
-	default NET_SCH_CLK_GETTIMEOFDAY
-	---help---
-	  Packet schedulers need a monotonic clock that increments at a static
-	  rate. The kernel provides several suitable interfaces, each with
-	  different properties:
-	  
-	  - high resolution (us or better)
-	  - fast to read (minimal locking, no i/o access)
-	  - synchronized on all processors
-	  - handles cpu clock frequency changes
-
-	  but nothing provides all of the above.
-
-config NET_SCH_CLK_JIFFIES
-	bool "Timer interrupt"
-	---help---
-	  Say Y here if you want to use the timer interrupt (jiffies) as clock
-	  source. This clock source is fast, synchronized on all processors and
-	  handles cpu clock frequency changes, but its resolution is too low
-	  for accurate shaping except at very low speed.
-
-config NET_SCH_CLK_GETTIMEOFDAY
-	bool "gettimeofday"
-	---help---
-	  Say Y here if you want to use gettimeofday as clock source. This clock
-	  source has high resolution, is synchronized on all processors and
-	  handles cpu clock frequency changes, but it is slow.
-
-	  Choose this if you need a high resolution clock source but can't use
-	  the CPU's cycle counter.
-
-# don't allow on SMP x86 because they can have unsynchronized TSCs.
-# gettimeofday is a good alternative
-config NET_SCH_CLK_CPU
-	bool "CPU cycle counter"
-	depends on ((X86_TSC || X86_64) && !SMP) || ALPHA || SPARC64 || PPC64 || IA64
-	---help---
-	  Say Y here if you want to use the CPU's cycle counter as clock source.
-	  This is a cheap and high resolution clock source, but on some
-	  architectures it is not synchronized on all processors and doesn't
-	  handle cpu clock frequency changes.
-
-	  The useable cycle counters are:
-
-	  	x86/x86_64	- Timestamp Counter
-		alpha		- Cycle Counter
-		sparc64		- %ticks register
-		ppc64		- Time base
-		ia64		- Interval Time Counter
-
-	  Choose this if your CPU's cycle counter is working properly.
-
-endchoice
-
 comment "Queueing/Scheduling"
 comment "Queueing/Scheduling"
 
 
 config NET_SCH_CBQ
 config NET_SCH_CBQ

+ 2 - 75
net/sched/sch_api.c

@@ -1175,15 +1175,12 @@ reclassify:
 	return -1;
 	return -1;
 }
 }
 
 
-static int psched_us_per_tick = 1;
-static int psched_tick_per_us = 1;
-
 #ifdef CONFIG_PROC_FS
 #ifdef CONFIG_PROC_FS
 static int psched_show(struct seq_file *seq, void *v)
 static int psched_show(struct seq_file *seq, void *v)
 {
 {
 	seq_printf(seq, "%08x %08x %08x %08x\n",
 	seq_printf(seq, "%08x %08x %08x %08x\n",
-		      psched_tick_per_us, psched_us_per_tick,
-		      1000000, HZ);
+		   (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
+		   1000000, HZ);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -1202,80 +1199,10 @@ static const struct file_operations psched_fops = {
 };
 };
 #endif
 #endif
 
 
-#ifdef CONFIG_NET_SCH_CLK_CPU
-psched_tdiff_t psched_clock_per_hz;
-int psched_clock_scale;
-EXPORT_SYMBOL(psched_clock_per_hz);
-EXPORT_SYMBOL(psched_clock_scale);
-
-psched_time_t psched_time_base;
-cycles_t psched_time_mark;
-EXPORT_SYMBOL(psched_time_mark);
-EXPORT_SYMBOL(psched_time_base);
-
-/*
- * Periodically adjust psched_time_base to avoid overflow
- * with 32-bit get_cycles(). Safe up to 4GHz CPU.
- */
-static void psched_tick(unsigned long);
-static DEFINE_TIMER(psched_timer, psched_tick, 0, 0);
-
-static void psched_tick(unsigned long dummy)
-{
-	if (sizeof(cycles_t) == sizeof(u32)) {
-		psched_time_t dummy_stamp;
-		PSCHED_GET_TIME(dummy_stamp);
-		psched_timer.expires = jiffies + 1*HZ;
-		add_timer(&psched_timer);
-	}
-}
-
-int __init psched_calibrate_clock(void)
-{
-	psched_time_t stamp, stamp1;
-	struct timeval tv, tv1;
-	psched_tdiff_t delay;
-	long rdelay;
-	unsigned long stop;
-
-	psched_tick(0);
-	stop = jiffies + HZ/10;
-	PSCHED_GET_TIME(stamp);
-	do_gettimeofday(&tv);
-	while (time_before(jiffies, stop)) {
-		barrier();
-		cpu_relax();
-	}
-	PSCHED_GET_TIME(stamp1);
-	do_gettimeofday(&tv1);
-
-	delay = PSCHED_TDIFF(stamp1, stamp);
-	rdelay = tv1.tv_usec - tv.tv_usec;
-	rdelay += (tv1.tv_sec - tv.tv_sec)*1000000;
-	if (rdelay > delay)
-		return -1;
-	delay /= rdelay;
-	psched_tick_per_us = delay;
-	while ((delay>>=1) != 0)
-		psched_clock_scale++;
-	psched_us_per_tick = 1<<psched_clock_scale;
-	psched_clock_per_hz = (psched_tick_per_us*(1000000/HZ))>>psched_clock_scale;
-	return 0;
-}
-#endif
-
 static int __init pktsched_init(void)
 static int __init pktsched_init(void)
 {
 {
 	struct rtnetlink_link *link_p;
 	struct rtnetlink_link *link_p;
 
 
-#ifdef CONFIG_NET_SCH_CLK_CPU
-	if (psched_calibrate_clock() < 0)
-		return -1;
-#elif defined(CONFIG_NET_SCH_CLK_JIFFIES)
-	psched_tick_per_us = HZ<<PSCHED_JSCALE;
-	psched_us_per_tick = 1000000;
-#endif
-
 	link_p = rtnetlink_links[PF_UNSPEC];
 	link_p = rtnetlink_links[PF_UNSPEC];
 
 
 	/* Setup rtnetlink links. It is made here to avoid
 	/* Setup rtnetlink links. It is made here to avoid

+ 3 - 28
net/sched/sch_hfsc.c

@@ -195,20 +195,6 @@ struct hfsc_sched
 	struct timer_list wd_timer;		/* watchdog timer */
 	struct timer_list wd_timer;		/* watchdog timer */
 };
 };
 
 
-/*
- * macros
- */
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-#include <linux/time.h>
-#undef PSCHED_GET_TIME
-#define PSCHED_GET_TIME(stamp)						\
-do {									\
-	struct timeval tv;						\
-	do_gettimeofday(&tv);						\
-	(stamp) = 1ULL * USEC_PER_SEC * tv.tv_sec + tv.tv_usec;		\
-} while (0)
-#endif
-
 #define	HT_INFINITY	0xffffffffffffffffULL	/* infinite time value */
 #define	HT_INFINITY	0xffffffffffffffffULL	/* infinite time value */
 
 
 
 
@@ -394,28 +380,17 @@ cftree_update(struct hfsc_class *cl)
  *	ism: (psched_us/byte) << ISM_SHIFT
  *	ism: (psched_us/byte) << ISM_SHIFT
  *	dx: psched_us
  *	dx: psched_us
  *
  *
- * Clock source resolution (CONFIG_NET_SCH_CLK_*)
- *  JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us.
- *  CPU: resolution is between 0.5us and 1us.
- *  GETTIMEOFDAY: resolution is exactly 1us.
+ * The clock source resolution with ktime is 1.024us.
  *
  *
  * sm and ism are scaled in order to keep effective digits.
  * sm and ism are scaled in order to keep effective digits.
  * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective
  * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective
  * digits in decimal using the following table.
  * digits in decimal using the following table.
  *
  *
- * Note: We can afford the additional accuracy (altq hfsc keeps at most
- * 3 effective digits) thanks to the fact that linux clock is bounded
- * much more tightly.
- *
  *  bits/sec      100Kbps     1Mbps     10Mbps     100Mbps    1Gbps
  *  bits/sec      100Kbps     1Mbps     10Mbps     100Mbps    1Gbps
  *  ------------+-------------------------------------------------------
  *  ------------+-------------------------------------------------------
- *  bytes/0.5us   6.25e-3    62.5e-3    625e-3     6250e-e    62500e-3
- *  bytes/us      12.5e-3    125e-3     1250e-3    12500e-3   125000e-3
- *  bytes/1.27us  15.875e-3  158.75e-3  1587.5e-3  15875e-3   158750e-3
+ *  bytes/1.024us 12.8e-3    128e-3     1280e-3    12800e-3   128000e-3
  *
  *
- *  0.5us/byte    160        16         1.6        0.16       0.016
- *  us/byte       80         8          0.8        0.08       0.008
- *  1.27us/byte   63         6.3        0.63       0.063      0.0063
+ *  1.024us/byte  78.125     7.8125     0.78125    0.078125   0.0078125
  */
  */
 #define	SM_SHIFT	20
 #define	SM_SHIFT	20
 #define	ISM_SHIFT	18
 #define	ISM_SHIFT	18