Browse Source

Merge branch 'x86-idle-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 idle updates from Ingo Molnar:
 "There were two bigger changes in this development cycle:

   - remove idle notifiers:

       32 files changed, 74 insertions(+), 803 deletions(-)

     These notifiers were of questionable value and the main usecase,
     the i7300 driver, was essentially unmaintained and can be removed,
     plus modern power management concepts don't need the callback - so
     use this golden opportunity and get rid of this opaque and fragile
     callback from a latency sensitive code path.

     (Len Brown, Thomas Gleixner)

   - improve the AMD Erratum 400 workaround that used high overhead MSR
     polling in the idle loop (Borisla Petkov, Thomas Gleixner)"

* 'x86-idle-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86: Remove empty idle.h header
  x86/amd: Simplify AMD E400 aware idle routine
  x86/amd: Check for the C1E bug post ACPI subsystem init
  x86/bugs: Separate AMD E400 erratum and C1E bug
  x86/cpufeature: Provide helper to set bugs bits
  x86/idle: Remove enter_idle(), exit_idle()
  x86: Remove x86_test_and_clear_bit_percpu()
  x86/idle: Remove is_idle flag
  x86/idle: Remove idle_notifier
  i7300_idle: Remove this driver
Linus Torvalds 8 years ago
parent
commit
212f30008a

+ 0 - 6
MAINTAINERS

@@ -6104,12 +6104,6 @@ S:	Maintained
 F:	Documentation/cdrom/ide-cd
 F:	drivers/ide/ide-cd*
 
-IDLE-I7300
-M:	Andy Henroid <andrew.d.henroid@intel.com>
-L:	linux-pm@vger.kernel.org
-S:	Supported
-F:	drivers/idle/i7300_idle.c
-
 IEEE 802.15.4 SUBSYSTEM
 M:	Alexander Aring <aar@pengutronix.de>
 M:	Stefan Schmidt <stefan@osg.samsung.com>

+ 1 - 1
arch/x86/include/asm/acpi.h

@@ -94,7 +94,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
 	    boot_cpu_data.x86_model <= 0x05 &&
 	    boot_cpu_data.x86_mask < 0x0A)
 		return 1;
-	else if (amd_e400_c1e_detected)
+	else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E))
 		return 1;
 	else
 		return max_cstate;

+ 0 - 2
arch/x86/include/asm/apic.h

@@ -11,7 +11,6 @@
 #include <asm/fixmap.h>
 #include <asm/mpspec.h>
 #include <asm/msr.h>
-#include <asm/idle.h>
 
 #define ARCH_APICTIMER_STOPS_ON_C3	1
 
@@ -640,7 +639,6 @@ extern void irq_exit(void);
 static inline void entering_irq(void)
 {
 	irq_enter();
-	exit_idle();
 }
 
 static inline void entering_ack_irq(void)

+ 1 - 0
arch/x86/include/asm/cpufeature.h

@@ -204,6 +204,7 @@ static __always_inline __pure bool _static_cpu_has(u16 bit)
 
 #define static_cpu_has_bug(bit)		static_cpu_has((bit))
 #define boot_cpu_has_bug(bit)		cpu_has_bug(&boot_cpu_data, (bit))
+#define boot_cpu_set_bug(bit)		set_cpu_cap(&boot_cpu_data, (bit))
 
 #define MAX_CPU_FEATURES		(NCAPINTS * 32)
 #define cpu_have_feature		boot_cpu_has

+ 2 - 0
arch/x86/include/asm/cpufeatures.h

@@ -314,4 +314,6 @@
 #define X86_BUG_NULL_SEG	X86_BUG(10) /* Nulling a selector preserves the base */
 #define X86_BUG_SWAPGS_FENCE	X86_BUG(11) /* SWAPGS without input dep on GS */
 #define X86_BUG_MONITOR		X86_BUG(12) /* IPI required to wake up remote CPU */
+#define X86_BUG_AMD_E400	X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+
 #endif /* _ASM_X86_CPUFEATURES_H */

+ 0 - 22
arch/x86/include/asm/idle.h

@@ -1,22 +0,0 @@
-#ifndef _ASM_X86_IDLE_H
-#define _ASM_X86_IDLE_H
-
-#define IDLE_START 1
-#define IDLE_END 2
-
-struct notifier_block;
-void idle_notifier_register(struct notifier_block *n);
-void idle_notifier_unregister(struct notifier_block *n);
-
-#ifdef CONFIG_X86_64
-void enter_idle(void);
-void exit_idle(void);
-#else /* !CONFIG_X86_64 */
-static inline void enter_idle(void) { }
-static inline void exit_idle(void) { }
-static inline void __exit_idle(void) { }
-#endif /* CONFIG_X86_64 */
-
-void amd_e400_remove_cpu(int cpu);
-
-#endif /* _ASM_X86_IDLE_H */

+ 0 - 11
arch/x86/include/asm/percpu.h

@@ -507,17 +507,6 @@ do {									\
 
 #endif
 
-/* This is not atomic against other CPUs -- CPU preemption needs to be off */
-#define x86_test_and_clear_bit_percpu(bit, var)				\
-({									\
-	bool old__;							\
-	asm volatile("btr %2,"__percpu_arg(1)"\n\t"			\
-		     CC_SET(c)						\
-		     : CC_OUT(c) (old__), "+m" (var)			\
-		     : "dIr" (bit));					\
-	old__;								\
-})
-
 static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
                         const unsigned long __percpu *addr)
 {

+ 1 - 2
arch/x86/include/asm/processor.h

@@ -633,10 +633,9 @@ static inline void sync_core(void)
 }
 
 extern void select_idle_routine(const struct cpuinfo_x86 *c);
-extern void init_amd_e400_c1e_mask(void);
+extern void amd_e400_c1e_apic_setup(void);
 
 extern unsigned long		boot_option_idle_override;
-extern bool			amd_e400_c1e_detected;
 
 enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT,
 			 IDLE_POLL};

+ 2 - 1
arch/x86/kernel/apic/apic.c

@@ -48,7 +48,6 @@
 #include <asm/io_apic.h>
 #include <asm/desc.h>
 #include <asm/hpet.h>
-#include <asm/idle.h>
 #include <asm/mtrr.h>
 #include <asm/time.h>
 #include <asm/smp.h>
@@ -894,11 +893,13 @@ void __init setup_boot_APIC_clock(void)
 
 	/* Setup the lapic or request the broadcast */
 	setup_APIC_timer();
+	amd_e400_c1e_apic_setup();
 }
 
 void setup_secondary_APIC_clock(void)
 {
 	setup_APIC_timer();
+	amd_e400_c1e_apic_setup();
 }
 
 /*

+ 0 - 1
arch/x86/kernel/apic/io_apic.c

@@ -48,7 +48,6 @@
 #include <linux/bootmem.h>
 
 #include <asm/irqdomain.h>
-#include <asm/idle.h>
 #include <asm/io.h>
 #include <asm/smp.h>
 #include <asm/cpu.h>

+ 13 - 7
arch/x86/kernel/cpu/amd.c

@@ -20,6 +20,10 @@
 
 #include "cpu.h"
 
+static const int amd_erratum_383[];
+static const int amd_erratum_400[];
+static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
+
 /*
  * nodes_per_socket: Stores the number of nodes per socket.
  * Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX
@@ -592,11 +596,16 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 	/* F16h erratum 793, CVE-2013-6885 */
 	if (c->x86 == 0x16 && c->x86_model <= 0xf)
 		msr_set_bit(MSR_AMD64_LS_CFG, 15);
-}
 
-static const int amd_erratum_383[];
-static const int amd_erratum_400[];
-static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
+	/*
+	 * Check whether the machine is affected by erratum 400. This is
+	 * used to select the proper idle routine and to enable the check
+	 * whether the machine is affected in arch_post_acpi_init(), which
+	 * sets the X86_BUG_AMD_APIC_C1E bug depending on the MSR check.
+	 */
+	if (cpu_has_amd_erratum(c, amd_erratum_400))
+		set_cpu_bug(c, X86_BUG_AMD_E400);
+}
 
 static void init_amd_k8(struct cpuinfo_x86 *c)
 {
@@ -777,9 +786,6 @@ static void init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 > 0x11)
 		set_cpu_cap(c, X86_FEATURE_ARAT);
 
-	if (cpu_has_amd_erratum(c, amd_erratum_400))
-		set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
-
 	rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
 
 	/* 3DNow or LM implies PREFETCHW */

+ 0 - 1
arch/x86/kernel/cpu/common.c

@@ -1172,7 +1172,6 @@ void enable_sep_cpu(void)
 void __init identify_boot_cpu(void)
 {
 	identify_cpu(&boot_cpu_data);
-	init_amd_e400_c1e_mask();
 #ifdef CONFIG_X86_32
 	sysenter_setup();
 	enable_sep_cpu();

+ 0 - 1
arch/x86/kernel/cpu/mcheck/mce_amd.c

@@ -24,7 +24,6 @@
 
 #include <asm/amd_nb.h>
 #include <asm/apic.h>
-#include <asm/idle.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
 #include <asm/trace/irq_vectors.h>

+ 0 - 1
arch/x86/kernel/cpu/mcheck/therm_throt.c

@@ -26,7 +26,6 @@
 
 #include <asm/processor.h>
 #include <asm/apic.h>
-#include <asm/idle.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
 #include <asm/trace/irq_vectors.h>

+ 0 - 1
arch/x86/kernel/cpu/mcheck/threshold.c

@@ -6,7 +6,6 @@
 
 #include <asm/irq_vectors.h>
 #include <asm/apic.h>
-#include <asm/idle.h>
 #include <asm/mce.h>
 #include <asm/trace/irq_vectors.h>
 

+ 0 - 1
arch/x86/kernel/cpu/mshyperv.c

@@ -25,7 +25,6 @@
 #include <asm/hyperv.h>
 #include <asm/mshyperv.h>
 #include <asm/desc.h>
-#include <asm/idle.h>
 #include <asm/irq_regs.h>
 #include <asm/i8259.h>
 #include <asm/apic.h>

+ 0 - 1
arch/x86/kernel/irq.c

@@ -14,7 +14,6 @@
 #include <asm/apic.h>
 #include <asm/io_apic.h>
 #include <asm/irq.h>
-#include <asm/idle.h>
 #include <asm/mce.h>
 #include <asm/hw_irq.h>
 #include <asm/desc.h>

+ 0 - 1
arch/x86/kernel/irq_64.c

@@ -16,7 +16,6 @@
 #include <linux/uaccess.h>
 #include <linux/smp.h>
 #include <asm/io_apic.h>
-#include <asm/idle.h>
 #include <asm/apic.h>
 
 int sysctl_panic_on_stackoverflow;

+ 0 - 3
arch/x86/kernel/kvm.c

@@ -42,7 +42,6 @@
 #include <asm/traps.h>
 #include <asm/desc.h>
 #include <asm/tlbflush.h>
-#include <asm/idle.h>
 #include <asm/apic.h>
 #include <asm/apicdef.h>
 #include <asm/hypervisor.h>
@@ -267,13 +266,11 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
 	case KVM_PV_REASON_PAGE_NOT_PRESENT:
 		/* page is swapped out by the host. */
 		prev_state = exception_enter();
-		exit_idle();
 		kvm_async_pf_task_wait((u32)read_cr2());
 		exception_exit(prev_state);
 		break;
 	case KVM_PV_REASON_PAGE_READY:
 		rcu_irq_enter();
-		exit_idle();
 		kvm_async_pf_task_wake((u32)read_cr2());
 		rcu_irq_exit();
 		break;

+ 50 - 99
arch/x86/kernel/process.c

@@ -23,7 +23,6 @@
 #include <asm/cpu.h>
 #include <asm/apic.h>
 #include <asm/syscalls.h>
-#include <asm/idle.h>
 #include <asm/uaccess.h>
 #include <asm/mwait.h>
 #include <asm/fpu/internal.h>
@@ -65,23 +64,6 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
 };
 EXPORT_PER_CPU_SYMBOL(cpu_tss);
 
-#ifdef CONFIG_X86_64
-static DEFINE_PER_CPU(unsigned char, is_idle);
-static ATOMIC_NOTIFIER_HEAD(idle_notifier);
-
-void idle_notifier_register(struct notifier_block *n)
-{
-	atomic_notifier_chain_register(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_register);
-
-void idle_notifier_unregister(struct notifier_block *n)
-{
-	atomic_notifier_chain_unregister(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_unregister);
-#endif
-
 /*
  * this gets called so that we can store lazy state into memory and copy the
  * current task into the new thread.
@@ -251,39 +233,9 @@ static inline void play_dead(void)
 }
 #endif
 
-#ifdef CONFIG_X86_64
-void enter_idle(void)
-{
-	this_cpu_write(is_idle, 1);
-	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
-}
-
-static void __exit_idle(void)
-{
-	if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
-		return;
-	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
-}
-
-/* Called from interrupts to signify idle end */
-void exit_idle(void)
-{
-	/* idle loop has pid 0 */
-	if (current->pid)
-		return;
-	__exit_idle();
-}
-#endif
-
 void arch_cpu_idle_enter(void)
 {
 	local_touch_nmi();
-	enter_idle();
-}
-
-void arch_cpu_idle_exit(void)
-{
-	__exit_idle();
 }
 
 void arch_cpu_idle_dead(void)
@@ -336,59 +288,33 @@ void stop_this_cpu(void *dummy)
 		halt();
 }
 
-bool amd_e400_c1e_detected;
-EXPORT_SYMBOL(amd_e400_c1e_detected);
-
-static cpumask_var_t amd_e400_c1e_mask;
-
-void amd_e400_remove_cpu(int cpu)
-{
-	if (amd_e400_c1e_mask != NULL)
-		cpumask_clear_cpu(cpu, amd_e400_c1e_mask);
-}
-
 /*
- * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt
- * pending message MSR. If we detect C1E, then we handle it the same
- * way as C3 power states (local apic timer and TSC stop)
+ * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
+ * states (local apic timer and TSC stop).
  */
 static void amd_e400_idle(void)
 {
-	if (!amd_e400_c1e_detected) {
-		u32 lo, hi;
-
-		rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
-
-		if (lo & K8_INTP_C1E_ACTIVE_MASK) {
-			amd_e400_c1e_detected = true;
-			if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
-				mark_tsc_unstable("TSC halt in AMD C1E");
-			pr_info("System has AMD C1E enabled\n");
-		}
+	/*
+	 * We cannot use static_cpu_has_bug() here because X86_BUG_AMD_APIC_C1E
+	 * gets set after static_cpu_has() places have been converted via
+	 * alternatives.
+	 */
+	if (!boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) {
+		default_idle();
+		return;
 	}
 
-	if (amd_e400_c1e_detected) {
-		int cpu = smp_processor_id();
+	tick_broadcast_enter();
 
-		if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) {
-			cpumask_set_cpu(cpu, amd_e400_c1e_mask);
-			/* Force broadcast so ACPI can not interfere. */
-			tick_broadcast_force();
-			pr_info("Switch to broadcast mode on CPU%d\n", cpu);
-		}
-		tick_broadcast_enter();
-
-		default_idle();
+	default_idle();
 
-		/*
-		 * The switch back from broadcast mode needs to be
-		 * called with interrupts disabled.
-		 */
-		local_irq_disable();
-		tick_broadcast_exit();
-		local_irq_enable();
-	} else
-		default_idle();
+	/*
+	 * The switch back from broadcast mode needs to be called with
+	 * interrupts disabled.
+	 */
+	local_irq_disable();
+	tick_broadcast_exit();
+	local_irq_enable();
 }
 
 /*
@@ -448,8 +374,7 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
 	if (x86_idle || boot_option_idle_override == IDLE_POLL)
 		return;
 
-	if (cpu_has_bug(c, X86_BUG_AMD_APIC_C1E)) {
-		/* E400: APIC timer interrupt does not wake up CPU from C1e */
+	if (boot_cpu_has_bug(X86_BUG_AMD_E400)) {
 		pr_info("using AMD E400 aware idle routine\n");
 		x86_idle = amd_e400_idle;
 	} else if (prefer_mwait_c1_over_halt(c)) {
@@ -459,11 +384,37 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
 		x86_idle = default_idle;
 }
 
-void __init init_amd_e400_c1e_mask(void)
+void amd_e400_c1e_apic_setup(void)
+{
+	if (boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E)) {
+		pr_info("Switch to broadcast mode on CPU%d\n", smp_processor_id());
+		local_irq_disable();
+		tick_broadcast_force();
+		local_irq_enable();
+	}
+}
+
+void __init arch_post_acpi_subsys_init(void)
 {
-	/* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */
-	if (x86_idle == amd_e400_idle)
-		zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL);
+	u32 lo, hi;
+
+	if (!boot_cpu_has_bug(X86_BUG_AMD_E400))
+		return;
+
+	/*
+	 * AMD E400 detection needs to happen after ACPI has been enabled. If
+	 * the machine is affected K8_INTP_C1E_ACTIVE_MASK bits are set in
+	 * MSR_K8_INT_PENDING_MSG.
+	 */
+	rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
+	if (!(lo & K8_INTP_C1E_ACTIVE_MASK))
+		return;
+
+	boot_cpu_set_bug(X86_BUG_AMD_APIC_C1E);
+
+	if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
+		mark_tsc_unstable("TSC halt in AMD C1E");
+	pr_info("System has AMD C1E enabled\n");
 }
 
 static int __init idle_setup(char *str)

+ 0 - 1
arch/x86/kernel/process_32.c

@@ -49,7 +49,6 @@
 
 #include <asm/tlbflush.h>
 #include <asm/cpu.h>
-#include <asm/idle.h>
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
 #include <asm/switch_to.h>

+ 0 - 1
arch/x86/kernel/process_64.c

@@ -44,7 +44,6 @@
 #include <asm/desc.h>
 #include <asm/proto.h>
 #include <asm/ia32.h>
-#include <asm/idle.h>
 #include <asm/syscalls.h>
 #include <asm/debugreg.h>
 #include <asm/switch_to.h>

+ 0 - 2
arch/x86/kernel/smpboot.c

@@ -58,7 +58,6 @@
 #include <asm/desc.h>
 #include <asm/nmi.h>
 #include <asm/irq.h>
-#include <asm/idle.h>
 #include <asm/realmode.h>
 #include <asm/cpu.h>
 #include <asm/numa.h>
@@ -1596,7 +1595,6 @@ void play_dead_common(void)
 {
 	idle_task_exit();
 	reset_lazy_tlbstate();
-	amd_e400_remove_cpu(raw_smp_processor_id());
 
 	/* Ack it */
 	(void)cpu_report_death();

+ 0 - 1
arch/x86/platform/uv/tlb_uv.c

@@ -19,7 +19,6 @@
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/uv_bau.h>
 #include <asm/apic.h>
-#include <asm/idle.h>
 #include <asm/tsc.h>
 #include <asm/irq_vectors.h>
 #include <asm/timer.h>

+ 1 - 1
drivers/acpi/processor_idle.c

@@ -141,7 +141,7 @@ static void lapic_timer_check_state(int state, struct acpi_processor *pr,
 	if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT))
 		return;
 
-	if (amd_e400_c1e_detected)
+	if (boot_cpu_has_bug(X86_BUG_AMD_APIC_C1E))
 		type = ACPI_STATE_C1;
 
 	/*

+ 0 - 2
drivers/dma/ioat/registers.h

@@ -106,8 +106,6 @@
 #define IOAT_DMA_COMP_V1			0x0001	/* Compatibility with DMA version 1 */
 #define IOAT_DMA_COMP_V2			0x0002	/* Compatibility with DMA version 2 */
 
-/* IOAT1 define left for i7300_idle driver to not fail compiling */
-#define IOAT1_CHANSTS_OFFSET		0x04
 #define IOAT_CHANSTS_OFFSET		0x08	/* 64-bit Channel Status Register */
 #define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR	(~0x3fULL)
 #define IOAT_CHANSTS_SOFT_ERR			0x10ULL

+ 0 - 17
drivers/idle/Kconfig

@@ -8,20 +8,3 @@ config INTEL_IDLE
 	  native Intel hardware idle features.  The acpi_idle driver
 	  can be configured at the same time, in order to handle
 	  processors intel_idle does not support.
-
-menu "Memory power savings"
-depends on X86_64
-
-config I7300_IDLE_IOAT_CHANNEL
-	bool
-
-config I7300_IDLE
-	tristate "Intel chipset idle memory power saving driver"
-	select I7300_IDLE_IOAT_CHANNEL
-	help
-	  Enable memory power savings when idle with certain Intel server
-	  chipsets. The chipset must have I/O AT support, such as the
-	  Intel 7300. The power savings depends on the type and quantity of
-	  DRAM devices.
-
-endmenu

+ 0 - 1
drivers/idle/Makefile

@@ -1,3 +1,2 @@
-obj-$(CONFIG_I7300_IDLE)			+= i7300_idle.o
 obj-$(CONFIG_INTEL_IDLE)			+= intel_idle.o
 

+ 0 - 612
drivers/idle/i7300_idle.c

@@ -1,612 +0,0 @@
-/*
- * (C) Copyright 2008 Intel Corporation
- * Authors:
- * Andy Henroid <andrew.d.henroid@intel.com>
- * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- */
-
-/*
- * Save DIMM power on Intel 7300-based platforms when all CPUs/cores
- * are idle, using the DIMM thermal throttling capability.
- *
- * This driver depends on the Intel integrated DMA controller (I/O AT).
- * If the driver for I/O AT (drivers/dma/ioatdma*) is also enabled,
- * this driver should work cooperatively.
- */
-
-/* #define DEBUG */
-
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/gfp.h>
-#include <linux/sched.h>
-#include <linux/notifier.h>
-#include <linux/cpumask.h>
-#include <linux/ktime.h>
-#include <linux/delay.h>
-#include <linux/debugfs.h>
-#include <linux/stop_machine.h>
-#include <linux/i7300_idle.h>
-
-#include <asm/idle.h>
-
-#include "../dma/ioat/hw.h"
-#include "../dma/ioat/registers.h"
-
-#define I7300_IDLE_DRIVER_VERSION	"1.55"
-#define I7300_PRINT			"i7300_idle:"
-
-#define MAX_STOP_RETRIES	10
-
-static int debug;
-module_param_named(debug, debug, uint, 0644);
-MODULE_PARM_DESC(debug, "Enable debug printks in this driver");
-
-static int forceload;
-module_param_named(forceload, forceload, uint, 0644);
-MODULE_PARM_DESC(debug, "Enable driver testing on unvalidated i5000");
-
-#define dprintk(fmt, arg...) \
-	do { if (debug) printk(KERN_INFO I7300_PRINT fmt, ##arg); } while (0)
-
-/*
- * Value to set THRTLOW to when initiating throttling
- *  0 = No throttling
- *  1 = Throttle when > 4 activations per eval window (Maximum throttling)
- *  2 = Throttle when > 8 activations
- *  168 = Throttle when > 672 activations (Minimum throttling)
- */
-#define MAX_THROTTLE_LOW_LIMIT		168
-static uint throttle_low_limit = 1;
-module_param_named(throttle_low_limit, throttle_low_limit, uint, 0644);
-MODULE_PARM_DESC(throttle_low_limit,
-		"Value for THRTLOWLM activation field "
-		"(0 = disable throttle, 1 = Max throttle, 168 = Min throttle)");
-
-/*
- * simple invocation and duration statistics
- */
-static unsigned long total_starts;
-static unsigned long total_us;
-
-#ifdef DEBUG
-static unsigned long past_skip;
-#endif
-
-static struct pci_dev *fbd_dev;
-
-static raw_spinlock_t i7300_idle_lock;
-static int i7300_idle_active;
-
-static u8 i7300_idle_thrtctl_saved;
-static u8 i7300_idle_thrtlow_saved;
-static u32 i7300_idle_mc_saved;
-
-static cpumask_var_t idle_cpumask;
-static ktime_t start_ktime;
-static unsigned long avg_idle_us;
-
-static struct dentry *debugfs_dir;
-
-/* Begin: I/O AT Helper routines */
-
-#define IOAT_CHANBASE(ioat_ctl, chan) (ioat_ctl + 0x80 + 0x80 * chan)
-/* Snoop control (disable snoops when coherency is not important) */
-#define IOAT_DESC_SADDR_SNP_CTL (1UL << 1)
-#define IOAT_DESC_DADDR_SNP_CTL (1UL << 2)
-
-static struct pci_dev *ioat_dev;
-static struct ioat_dma_descriptor *ioat_desc; /* I/O AT desc & data (1 page) */
-static unsigned long ioat_desc_phys;
-static u8 *ioat_iomap; /* I/O AT memory-mapped control regs (aka CB_BAR) */
-static u8 *ioat_chanbase;
-
-/* Start I/O AT memory copy */
-static int i7300_idle_ioat_start(void)
-{
-	u32 err;
-	/* Clear error (due to circular descriptor pointer) */
-	err = readl(ioat_chanbase + IOAT_CHANERR_OFFSET);
-	if (err)
-		writel(err, ioat_chanbase + IOAT_CHANERR_OFFSET);
-
-	writeb(IOAT_CHANCMD_START, ioat_chanbase + IOAT1_CHANCMD_OFFSET);
-	return 0;
-}
-
-/* Stop I/O AT memory copy */
-static void i7300_idle_ioat_stop(void)
-{
-	int i;
-	u64 sts;
-
-	for (i = 0; i < MAX_STOP_RETRIES; i++) {
-		writeb(IOAT_CHANCMD_RESET,
-			ioat_chanbase + IOAT1_CHANCMD_OFFSET);
-
-		udelay(10);
-
-		sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
-			IOAT_CHANSTS_STATUS;
-
-		if (sts != IOAT_CHANSTS_ACTIVE)
-			break;
-
-	}
-
-	if (i == MAX_STOP_RETRIES) {
-		dprintk("failed to stop I/O AT after %d retries\n",
-			MAX_STOP_RETRIES);
-	}
-}
-
-/* Test I/O AT by copying 1024 byte from 2k to 1k */
-static int __init i7300_idle_ioat_selftest(u8 *ctl,
-		struct ioat_dma_descriptor *desc, unsigned long desc_phys)
-{
-	u64 chan_sts;
-
-	memset(desc, 0, 2048);
-	memset((u8 *) desc + 2048, 0xab, 1024);
-
-	desc[0].size = 1024;
-	desc[0].ctl = 0;
-	desc[0].src_addr = desc_phys + 2048;
-	desc[0].dst_addr = desc_phys + 1024;
-	desc[0].next = 0;
-
-	writeb(IOAT_CHANCMD_RESET, ioat_chanbase + IOAT1_CHANCMD_OFFSET);
-	writeb(IOAT_CHANCMD_START, ioat_chanbase + IOAT1_CHANCMD_OFFSET);
-
-	udelay(1000);
-
-	chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
-			IOAT_CHANSTS_STATUS;
-
-	if (chan_sts != IOAT_CHANSTS_DONE) {
-		/* Not complete, reset the channel */
-		writeb(IOAT_CHANCMD_RESET,
-		       ioat_chanbase + IOAT1_CHANCMD_OFFSET);
-		return -1;
-	}
-
-	if (*(u32 *) ((u8 *) desc + 3068) != 0xabababab ||
-	    *(u32 *) ((u8 *) desc + 2044) != 0xabababab) {
-		dprintk("Data values src 0x%x, dest 0x%x, memset 0x%x\n",
-			*(u32 *) ((u8 *) desc + 2048),
-			*(u32 *) ((u8 *) desc + 1024),
-			*(u32 *) ((u8 *) desc + 3072));
-		return -1;
-	}
-	return 0;
-}
-
-static struct device dummy_dma_dev = {
-	.init_name = "fallback device",
-	.coherent_dma_mask = DMA_BIT_MASK(64),
-	.dma_mask = &dummy_dma_dev.coherent_dma_mask,
-};
-
-/* Setup and initialize I/O AT */
-/* This driver needs I/O AT as the throttling takes effect only when there is
- * some memory activity. We use I/O AT to set up a dummy copy, while all CPUs
- * go idle and memory is throttled.
- */
-static int __init i7300_idle_ioat_init(void)
-{
-	u8 ver, chan_count, ioat_chan;
-	u16 chan_ctl;
-
-	ioat_iomap = (u8 *) ioremap_nocache(pci_resource_start(ioat_dev, 0),
-					    pci_resource_len(ioat_dev, 0));
-
-	if (!ioat_iomap) {
-		printk(KERN_ERR I7300_PRINT "failed to map I/O AT registers\n");
-		goto err_ret;
-	}
-
-	ver = readb(ioat_iomap + IOAT_VER_OFFSET);
-	if (ver != IOAT_VER_1_2) {
-		printk(KERN_ERR I7300_PRINT "unknown I/O AT version (%u.%u)\n",
-			ver >> 4, ver & 0xf);
-		goto err_unmap;
-	}
-
-	chan_count = readb(ioat_iomap + IOAT_CHANCNT_OFFSET);
-	if (!chan_count) {
-		printk(KERN_ERR I7300_PRINT "unexpected # of I/O AT channels "
-			"(%u)\n",
-			chan_count);
-		goto err_unmap;
-	}
-
-	ioat_chan = chan_count - 1;
-	ioat_chanbase = IOAT_CHANBASE(ioat_iomap, ioat_chan);
-
-	chan_ctl = readw(ioat_chanbase + IOAT_CHANCTRL_OFFSET);
-	if (chan_ctl & IOAT_CHANCTRL_CHANNEL_IN_USE) {
-		printk(KERN_ERR I7300_PRINT "channel %d in use\n", ioat_chan);
-		goto err_unmap;
-	}
-
-	writew(IOAT_CHANCTRL_CHANNEL_IN_USE,
-		ioat_chanbase + IOAT_CHANCTRL_OFFSET);
-
-	ioat_desc = (struct ioat_dma_descriptor *)dma_alloc_coherent(
-			&dummy_dma_dev, 4096,
-			(dma_addr_t *)&ioat_desc_phys, GFP_KERNEL);
-	if (!ioat_desc) {
-		printk(KERN_ERR I7300_PRINT "failed to allocate I/O AT desc\n");
-		goto err_mark_unused;
-	}
-
-	writel(ioat_desc_phys & 0xffffffffUL,
-	       ioat_chanbase + IOAT1_CHAINADDR_OFFSET_LOW);
-	writel(ioat_desc_phys >> 32,
-	       ioat_chanbase + IOAT1_CHAINADDR_OFFSET_HIGH);
-
-	if (i7300_idle_ioat_selftest(ioat_iomap, ioat_desc, ioat_desc_phys)) {
-		printk(KERN_ERR I7300_PRINT "I/O AT self-test failed\n");
-		goto err_free;
-	}
-
-	/* Setup circular I/O AT descriptor chain */
-	ioat_desc[0].ctl = IOAT_DESC_SADDR_SNP_CTL | IOAT_DESC_DADDR_SNP_CTL;
-	ioat_desc[0].src_addr = ioat_desc_phys + 2048;
-	ioat_desc[0].dst_addr = ioat_desc_phys + 3072;
-	ioat_desc[0].size = 128;
-	ioat_desc[0].next = ioat_desc_phys + sizeof(struct ioat_dma_descriptor);
-
-	ioat_desc[1].ctl = ioat_desc[0].ctl;
-	ioat_desc[1].src_addr = ioat_desc[0].src_addr;
-	ioat_desc[1].dst_addr = ioat_desc[0].dst_addr;
-	ioat_desc[1].size = ioat_desc[0].size;
-	ioat_desc[1].next = ioat_desc_phys;
-
-	return 0;
-
-err_free:
-	dma_free_coherent(&dummy_dma_dev, 4096, (void *)ioat_desc, 0);
-err_mark_unused:
-	writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET);
-err_unmap:
-	iounmap(ioat_iomap);
-err_ret:
-	return -ENODEV;
-}
-
-/* Cleanup I/O AT */
-static void __exit i7300_idle_ioat_exit(void)
-{
-	int i;
-	u64 chan_sts;
-
-	i7300_idle_ioat_stop();
-
-	/* Wait for a while for the channel to halt before releasing */
-	for (i = 0; i < MAX_STOP_RETRIES; i++) {
-		writeb(IOAT_CHANCMD_RESET,
-		       ioat_chanbase + IOAT1_CHANCMD_OFFSET);
-
-		chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
-			IOAT_CHANSTS_STATUS;
-
-		if (chan_sts != IOAT_CHANSTS_ACTIVE) {
-			writew(0, ioat_chanbase + IOAT_CHANCTRL_OFFSET);
-			break;
-		}
-		udelay(1000);
-	}
-
-	chan_sts = readq(ioat_chanbase + IOAT1_CHANSTS_OFFSET) &
-			IOAT_CHANSTS_STATUS;
-
-	/*
-	 * We tried to reset multiple times. If IO A/T channel is still active
-	 * flag an error and return without cleanup. Memory leak is better
-	 * than random corruption in that extreme error situation.
-	 */
-	if (chan_sts == IOAT_CHANSTS_ACTIVE) {
-		printk(KERN_ERR I7300_PRINT "Unable to stop IO A/T channels."
-			" Not freeing resources\n");
-		return;
-	}
-
-	dma_free_coherent(&dummy_dma_dev, 4096, (void *)ioat_desc, 0);
-	iounmap(ioat_iomap);
-}
-
-/* End: I/O AT Helper routines */
-
-#define DIMM_THRTLOW 0x64
-#define DIMM_THRTCTL 0x67
-#define DIMM_THRTCTL_THRMHUNT (1UL << 0)
-#define DIMM_MC 0x40
-#define DIMM_GTW_MODE (1UL << 17)
-#define DIMM_GBLACT 0x60
-
-/*
- * Keep track of an exponential-decaying average of recent idle durations.
- * The latest duration gets DURATION_WEIGHT_PCT percentage weight
- * in this average, with the old average getting the remaining weight.
- *
- * High weights emphasize recent history, low weights include long history.
- */
-#define DURATION_WEIGHT_PCT 55
-
-/*
- * When the decaying average of recent durations or the predicted duration
- * of the next timer interrupt is shorter than duration_threshold, the
- * driver will decline to throttle.
- */
-#define DURATION_THRESHOLD_US 100
-
-
-/* Store DIMM thermal throttle configuration */
-static int i7300_idle_thrt_save(void)
-{
-	u32 new_mc_val;
-	u8 gblactlm;
-
-	pci_read_config_byte(fbd_dev, DIMM_THRTCTL, &i7300_idle_thrtctl_saved);
-	pci_read_config_byte(fbd_dev, DIMM_THRTLOW, &i7300_idle_thrtlow_saved);
-	pci_read_config_dword(fbd_dev, DIMM_MC, &i7300_idle_mc_saved);
-	/*
-	 * Make sure we have Global Throttling Window Mode set to have a
-	 * "short" window. This (mostly) works around an issue where
-	 * throttling persists until the end of the global throttling window
-	 * size. On the tested system, this was resulting in a maximum of
-	 * 64 ms to exit throttling (average 32 ms). The actual numbers
-	 * depends on system frequencies. Setting the short window reduces
-	 * this by a factor of 4096.
-	 *
-	 * We will only do this only if the system is set for
-	 * unlimited-activations while in open-loop throttling (i.e., when
-	 * Global Activation Throttle Limit is zero).
-	 */
-	pci_read_config_byte(fbd_dev, DIMM_GBLACT, &gblactlm);
-	dprintk("thrtctl_saved = 0x%02x, thrtlow_saved = 0x%02x\n",
-		i7300_idle_thrtctl_saved,
-		i7300_idle_thrtlow_saved);
-	dprintk("mc_saved = 0x%08x, gblactlm = 0x%02x\n",
-		i7300_idle_mc_saved,
-		gblactlm);
-	if (gblactlm == 0) {
-		new_mc_val = i7300_idle_mc_saved | DIMM_GTW_MODE;
-		pci_write_config_dword(fbd_dev, DIMM_MC, new_mc_val);
-		return 0;
-	} else {
-		dprintk("could not set GTW_MODE = 1 (OLTT enabled)\n");
-		return -ENODEV;
-	}
-}
-
-/* Restore DIMM thermal throttle configuration */
-static void i7300_idle_thrt_restore(void)
-{
-	pci_write_config_dword(fbd_dev, DIMM_MC, i7300_idle_mc_saved);
-	pci_write_config_byte(fbd_dev, DIMM_THRTLOW, i7300_idle_thrtlow_saved);
-	pci_write_config_byte(fbd_dev, DIMM_THRTCTL, i7300_idle_thrtctl_saved);
-}
-
-/* Enable DIMM thermal throttling */
-static void i7300_idle_start(void)
-{
-	u8 new_ctl;
-	u8 limit;
-
-	new_ctl = i7300_idle_thrtctl_saved & ~DIMM_THRTCTL_THRMHUNT;
-	pci_write_config_byte(fbd_dev, DIMM_THRTCTL, new_ctl);
-
-	limit = throttle_low_limit;
-	if (unlikely(limit > MAX_THROTTLE_LOW_LIMIT))
-		limit = MAX_THROTTLE_LOW_LIMIT;
-
-	pci_write_config_byte(fbd_dev, DIMM_THRTLOW, limit);
-
-	new_ctl = i7300_idle_thrtctl_saved | DIMM_THRTCTL_THRMHUNT;
-	pci_write_config_byte(fbd_dev, DIMM_THRTCTL, new_ctl);
-}
-
-/* Disable DIMM thermal throttling */
-static void i7300_idle_stop(void)
-{
-	u8 new_ctl;
-	u8 got_ctl;
-
-	new_ctl = i7300_idle_thrtctl_saved & ~DIMM_THRTCTL_THRMHUNT;
-	pci_write_config_byte(fbd_dev, DIMM_THRTCTL, new_ctl);
-
-	pci_write_config_byte(fbd_dev, DIMM_THRTLOW, i7300_idle_thrtlow_saved);
-	pci_write_config_byte(fbd_dev, DIMM_THRTCTL, i7300_idle_thrtctl_saved);
-	pci_read_config_byte(fbd_dev, DIMM_THRTCTL, &got_ctl);
-	WARN_ON_ONCE(got_ctl != i7300_idle_thrtctl_saved);
-}
-
-
-/*
- * i7300_avg_duration_check()
- * return 0 if the decaying average of recent idle durations is
- * more than DURATION_THRESHOLD_US
- */
-static int i7300_avg_duration_check(void)
-{
-	if (avg_idle_us >= DURATION_THRESHOLD_US)
-		return 0;
-
-#ifdef DEBUG
-	past_skip++;
-#endif
-	return 1;
-}
-
-/* Idle notifier to look at idle CPUs */
-static int i7300_idle_notifier(struct notifier_block *nb, unsigned long val,
-				void *data)
-{
-	unsigned long flags;
-	ktime_t now_ktime;
-	static ktime_t idle_begin_time;
-	static int time_init = 1;
-
-	if (!throttle_low_limit)
-		return 0;
-
-	if (unlikely(time_init)) {
-		time_init = 0;
-		idle_begin_time = ktime_get();
-	}
-
-	raw_spin_lock_irqsave(&i7300_idle_lock, flags);
-	if (val == IDLE_START) {
-
-		cpumask_set_cpu(smp_processor_id(), idle_cpumask);
-
-		if (cpumask_weight(idle_cpumask) != num_online_cpus())
-			goto end;
-
-		now_ktime = ktime_get();
-		idle_begin_time = now_ktime;
-
-		if (i7300_avg_duration_check())
-			goto end;
-
-		i7300_idle_active = 1;
-		total_starts++;
-		start_ktime = now_ktime;
-
-		i7300_idle_start();
-		i7300_idle_ioat_start();
-
-	} else if (val == IDLE_END) {
-		cpumask_clear_cpu(smp_processor_id(), idle_cpumask);
-		if (cpumask_weight(idle_cpumask) == (num_online_cpus() - 1)) {
-			/* First CPU coming out of idle */
-			u64 idle_duration_us;
-
-			now_ktime = ktime_get();
-
-			idle_duration_us = ktime_to_us(ktime_sub
-						(now_ktime, idle_begin_time));
-
-			avg_idle_us =
-				((100 - DURATION_WEIGHT_PCT) * avg_idle_us +
-				 DURATION_WEIGHT_PCT * idle_duration_us) / 100;
-
-			if (i7300_idle_active) {
-				ktime_t idle_ktime;
-
-				idle_ktime = ktime_sub(now_ktime, start_ktime);
-				total_us += ktime_to_us(idle_ktime);
-
-				i7300_idle_ioat_stop();
-				i7300_idle_stop();
-				i7300_idle_active = 0;
-			}
-		}
-	}
-end:
-	raw_spin_unlock_irqrestore(&i7300_idle_lock, flags);
-	return 0;
-}
-
-static struct notifier_block i7300_idle_nb = {
-	.notifier_call = i7300_idle_notifier,
-};
-
-MODULE_DEVICE_TABLE(pci, pci_tbl);
-
-static ssize_t stats_read_ul(struct file *fp, char __user *ubuf, size_t count,
-				loff_t *off)
-{
-	unsigned long *p = fp->private_data;
-	char buf[32];
-	int len;
-
-	len = snprintf(buf, 32, "%lu\n", *p);
-	return simple_read_from_buffer(ubuf, count, off, buf, len);
-}
-
-static const struct file_operations idle_fops = {
-	.open	= simple_open,
-	.read	= stats_read_ul,
-	.llseek = default_llseek,
-};
-
-struct debugfs_file_info {
-	void *ptr;
-	char name[32];
-	struct dentry *file;
-} debugfs_file_list[] = {
-				{&total_starts, "total_starts", NULL},
-				{&total_us, "total_us", NULL},
-#ifdef DEBUG
-				{&past_skip, "past_skip", NULL},
-#endif
-				{NULL, "", NULL}
-			};
-
-static int __init i7300_idle_init(void)
-{
-	raw_spin_lock_init(&i7300_idle_lock);
-	total_us = 0;
-
-	if (i7300_idle_platform_probe(&fbd_dev, &ioat_dev, forceload))
-		return -ENODEV;
-
-	if (i7300_idle_thrt_save())
-		return -ENODEV;
-
-	if (i7300_idle_ioat_init())
-		return -ENODEV;
-
-	if (!zalloc_cpumask_var(&idle_cpumask, GFP_KERNEL))
-		return -ENOMEM;
-
-	debugfs_dir = debugfs_create_dir("i7300_idle", NULL);
-	if (debugfs_dir) {
-		int i = 0;
-
-		while (debugfs_file_list[i].ptr != NULL) {
-			debugfs_file_list[i].file = debugfs_create_file(
-					debugfs_file_list[i].name,
-					S_IRUSR,
-					debugfs_dir,
-					debugfs_file_list[i].ptr,
-					&idle_fops);
-			i++;
-		}
-	}
-
-	idle_notifier_register(&i7300_idle_nb);
-
-	printk(KERN_INFO "i7300_idle: loaded v%s\n", I7300_IDLE_DRIVER_VERSION);
-	return 0;
-}
-
-static void __exit i7300_idle_exit(void)
-{
-	idle_notifier_unregister(&i7300_idle_nb);
-	free_cpumask_var(idle_cpumask);
-
-	if (debugfs_dir) {
-		int i = 0;
-
-		while (debugfs_file_list[i].file != NULL) {
-			debugfs_remove(debugfs_file_list[i].file);
-			i++;
-		}
-
-		debugfs_remove(debugfs_dir);
-	}
-	i7300_idle_thrt_restore();
-	i7300_idle_ioat_exit();
-}
-
-module_init(i7300_idle_init);
-module_exit(i7300_idle_exit);
-
-MODULE_AUTHOR("Andy Henroid <andrew.d.henroid@intel.com>");
-MODULE_DESCRIPTION("Intel Chipset DIMM Idle Power Saving Driver v"
-			I7300_IDLE_DRIVER_VERSION);
-MODULE_LICENSE("GPL");

+ 0 - 1
drivers/thermal/intel_powerclamp.c

@@ -56,7 +56,6 @@
 #include <asm/msr.h>
 #include <asm/mwait.h>
 #include <asm/cpu_device_id.h>
-#include <asm/idle.h>
 #include <asm/hardirq.h>
 
 #define MAX_TARGET_RATIO (50U)

+ 0 - 2
drivers/xen/events/events_base.c

@@ -37,7 +37,6 @@
 #include <asm/desc.h>
 #include <asm/ptrace.h>
 #include <asm/irq.h>
-#include <asm/idle.h>
 #include <asm/io_apic.h>
 #include <asm/i8259.h>
 #include <asm/xen/pci.h>
@@ -1256,7 +1255,6 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
 
 	irq_enter();
 #ifdef CONFIG_X86
-	exit_idle();
 	inc_irq_stat(irq_hv_callback_count);
 #endif
 

+ 3 - 0
init/main.c

@@ -448,6 +448,8 @@ void __init parse_early_param(void)
 	done = 1;
 }
 
+void __init __weak arch_post_acpi_subsys_init(void) { }
+
 void __init __weak smp_setup_processor_id(void)
 {
 }
@@ -649,6 +651,7 @@ asmlinkage __visible void __init start_kernel(void)
 	check_bugs();
 
 	acpi_subsystem_init();
+	arch_post_acpi_subsys_init();
 	sfi_init_late();
 
 	if (efi_enabled(EFI_RUNTIME_SERVICES)) {