Просмотр исходного кода

Merge branch 'x86/signal' into core/signal

Conflicts:
	arch/x86/kernel/cpu/feature_names.c
	arch/x86/kernel/setup.c
	drivers/pci/intel-iommu.c
	include/asm-x86/cpufeature.h

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Ingo Molnar 17 лет назад
Родитель
Сommit
101d5b7137
100 измененных файлов с 3648 добавлено и 1380 удалено
  1. 6 0
      Documentation/kernel-parameters.txt
  2. 9 0
      arch/x86/Kconfig
  3. 70 0
      arch/x86/Kconfig.cpu
  4. 3 2
      arch/x86/boot/compressed/head_32.S
  5. 7 5
      arch/x86/boot/compressed/misc.c
  6. 1 1
      arch/x86/boot/mkcpustr.c
  7. 0 0
      arch/x86/es7000/Makefile
  8. 9 9
      arch/x86/es7000/es7000.h
  9. 1 1
      arch/x86/es7000/es7000plat.c
  10. 7 4
      arch/x86/ia32/ia32_aout.c
  11. 46 61
      arch/x86/ia32/ia32_signal.c
  12. 3 1
      arch/x86/kernel/Makefile
  13. 5 4
      arch/x86/kernel/acpi/boot.c
  14. 37 4
      arch/x86/kernel/apic_32.c
  15. 226 11
      arch/x86/kernel/apic_64.c
  16. 0 1
      arch/x86/kernel/apm_32.c
  17. 1 1
      arch/x86/kernel/asm-offsets_64.c
  18. 5 5
      arch/x86/kernel/bios_uv.c
  19. 21 11
      arch/x86/kernel/cpu/Makefile
  20. 72 0
      arch/x86/kernel/cpu/cmpxchg.c
  21. 41 1
      arch/x86/kernel/cpu/common.c
  22. 2 0
      arch/x86/kernel/cpu/common_64.c
  23. 8 8
      arch/x86/kernel/cpu/cyrix.c
  24. 0 84
      arch/x86/kernel/cpu/feature_names.c
  25. 0 71
      arch/x86/kernel/cpu/intel.c
  26. 147 22
      arch/x86/kernel/cpu/intel_cacheinfo.c
  27. 32 0
      arch/x86/kernel/cpu/mkcapflags.pl
  28. 1 1
      arch/x86/kernel/cpu/mtrr/main.c
  29. 20 0
      arch/x86/kernel/cpu/powerflags.c
  30. 0 1
      arch/x86/kernel/cpuid.c
  31. 3 4
      arch/x86/kernel/crash_dump_64.c
  32. 32 56
      arch/x86/kernel/genapic_64.c
  33. 61 1
      arch/x86/kernel/genapic_flat_64.c
  34. 164 0
      arch/x86/kernel/genx2apic_cluster.c
  35. 159 0
      arch/x86/kernel/genx2apic_phys.c
  36. 67 2
      arch/x86/kernel/genx2apic_uv_x.c
  37. 135 19
      arch/x86/kernel/i387.c
  38. 24 0
      arch/x86/kernel/i8259.c
  39. 4 6
      arch/x86/kernel/io_apic_32.c
  40. 572 36
      arch/x86/kernel/io_apic_64.c
  41. 1 0
      arch/x86/kernel/ioport.c
  42. 2 1
      arch/x86/kernel/ipi.c
  43. 1 1
      arch/x86/kernel/irq_32.c
  44. 1 1
      arch/x86/kernel/irq_64.c
  45. 1 0
      arch/x86/kernel/ldt.c
  46. 2 0
      arch/x86/kernel/mpparse.c
  47. 7 0
      arch/x86/kernel/numaq_32.c
  48. 0 2
      arch/x86/kernel/paravirt.c
  49. 1 1
      arch/x86/kernel/paravirt_patch_32.c
  50. 1 1
      arch/x86/kernel/pci-dma.c
  51. 2 0
      arch/x86/kernel/process_32.c
  52. 67 65
      arch/x86/kernel/process_64.c
  53. 7 29
      arch/x86/kernel/ptrace.c
  54. 2 0
      arch/x86/kernel/setup.c
  55. 17 2
      arch/x86/kernel/sigframe.h
  56. 109 82
      arch/x86/kernel/signal_32.c
  57. 114 194
      arch/x86/kernel/signal_64.c
  58. 22 25
      arch/x86/kernel/smpboot.c
  59. 1 1
      arch/x86/kernel/summit_32.c
  60. 2 0
      arch/x86/kernel/sys_i386_32.c
  61. 23 21
      arch/x86/kernel/sys_x86_64.c
  62. 2 2
      arch/x86/kernel/syscall_64.c
  63. 1 0
      arch/x86/kernel/time_32.c
  64. 1 0
      arch/x86/kernel/tls.c
  65. 0 1
      arch/x86/kernel/traps_32.c
  66. 35 32
      arch/x86/kernel/traps_64.c
  67. 1 15
      arch/x86/kernel/visws_quirks.c
  68. 1 0
      arch/x86/kernel/vm86_32.c
  69. 2 2
      arch/x86/kernel/vmi_32.c
  70. 316 0
      arch/x86/kernel/xsave.c
  71. 34 4
      arch/x86/lguest/boot.c
  72. 36 42
      arch/x86/lib/msr-on-cpu.c
  73. 21 21
      arch/x86/lib/string_32.c
  74. 3 3
      arch/x86/lib/strstr_32.c
  75. 7 0
      arch/x86/lib/usercopy_32.c
  76. 3 1
      arch/x86/mach-default/setup.c
  77. 1 1
      arch/x86/mach-generic/Makefile
  78. 4 5
      arch/x86/mach-generic/bigsmp.c
  79. 6 7
      arch/x86/mach-generic/es7000.c
  80. 6 6
      arch/x86/mach-generic/numaq.c
  81. 5 6
      arch/x86/mach-generic/summit.c
  82. 1 1
      arch/x86/mm/discontig_32.c
  83. 2 2
      arch/x86/mm/dump_pagetables.c
  84. 1 2
      arch/x86/mm/fault.c
  85. 1 0
      arch/x86/mm/init_32.c
  86. 4 4
      arch/x86/mm/init_64.c
  87. 2 2
      arch/x86/mm/ioremap.c
  88. 5 5
      arch/x86/mm/numa_64.c
  89. 1 1
      arch/x86/mm/pageattr.c
  90. 1 1
      arch/x86/pci/amd_bus.c
  91. 38 29
      arch/x86/pci/irq.c
  92. 7 0
      arch/x86/power/cpu_32.c
  93. 7 0
      arch/x86/power/cpu_64.c
  94. 7 7
      arch/x86/power/hibernate_asm_32.S
  95. 41 4
      arch/x86/xen/enlighten.c
  96. 2 0
      drivers/pci/Makefile
  97. 157 0
      drivers/pci/dma_remapping.h
  98. 378 19
      drivers/pci/dmar.c
  99. 26 161
      drivers/pci/intel-iommu.c
  100. 98 135
      drivers/pci/intel-iommu.h

+ 6 - 0
Documentation/kernel-parameters.txt

@@ -1425,6 +1425,12 @@ and is between 256 and 4096 characters. It is defined in the file
 
 
 	nolapic_timer	[X86-32,APIC] Do not use the local APIC timer.
 	nolapic_timer	[X86-32,APIC] Do not use the local APIC timer.
 
 
+	nox2apic	[X86-64,APIC] Do not enable x2APIC mode.
+
+	x2apic_phys	[X86-64,APIC] Use x2apic physical mode instead of
+			default x2apic cluster mode on platforms
+			supporting x2apic.
+
 	noltlbs		[PPC] Do not use large page/tlb entries for kernel
 	noltlbs		[PPC] Do not use large page/tlb entries for kernel
 			lowmem mapping on PPC40x.
 			lowmem mapping on PPC40x.
 
 

+ 9 - 0
arch/x86/Kconfig

@@ -29,6 +29,7 @@ config X86
 	select HAVE_FTRACE
 	select HAVE_FTRACE
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
 	select HAVE_ARCH_KGDB if !X86_VOYAGER
 	select HAVE_ARCH_KGDB if !X86_VOYAGER
+	select HAVE_ARCH_TRACEHOOK
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_GENERIC_DMA_COHERENT if X86_32
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 
 
@@ -1643,6 +1644,14 @@ config DMAR_FLOPPY_WA
 	 workaround will setup a 1:1 mapping for the first
 	 workaround will setup a 1:1 mapping for the first
 	 16M to make floppy (an ISA device) work.
 	 16M to make floppy (an ISA device) work.
 
 
+config INTR_REMAP
+	bool "Support for Interrupt Remapping (EXPERIMENTAL)"
+	depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
+	help
+	 Supports Interrupt remapping for IO-APIC and MSI devices.
+	 To use x2apic mode in the CPU's which support x2APIC enhancements or
+	 to support platforms with CPU's having > 8 bit APIC ID, say Y.
+
 source "drivers/pci/pcie/Kconfig"
 source "drivers/pci/pcie/Kconfig"
 
 
 source "drivers/pci/Kconfig"
 source "drivers/pci/Kconfig"

+ 70 - 0
arch/x86/Kconfig.cpu

@@ -418,3 +418,73 @@ config X86_MINIMUM_CPU_FAMILY
 config X86_DEBUGCTLMSR
 config X86_DEBUGCTLMSR
 	def_bool y
 	def_bool y
 	depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
 	depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
+
+menuconfig PROCESSOR_SELECT
+	default y
+	bool "Supported processor vendors" if EMBEDDED
+	help
+	  This lets you choose what x86 vendor support code your kernel
+	  will include.
+
+config CPU_SUP_INTEL_32
+	default y
+	bool "Support Intel processors" if PROCESSOR_SELECT
+	depends on !64BIT
+	help
+	  This enables extended support for Intel processors
+
+config CPU_SUP_INTEL_64
+	default y
+	bool "Support Intel processors" if PROCESSOR_SELECT
+	depends on 64BIT
+	help
+	  This enables extended support for Intel processors
+
+config CPU_SUP_CYRIX_32
+	default y
+	bool "Support Cyrix processors" if PROCESSOR_SELECT
+	depends on !64BIT
+	help
+	  This enables extended support for Cyrix processors
+
+config CPU_SUP_AMD_32
+	default y
+	bool "Support AMD processors" if PROCESSOR_SELECT
+	depends on !64BIT
+	help
+	  This enables extended support for AMD processors
+
+config CPU_SUP_AMD_64
+	default y
+	bool "Support AMD processors" if PROCESSOR_SELECT
+	depends on 64BIT
+	help
+	  This enables extended support for AMD processors
+
+config CPU_SUP_CENTAUR_32
+	default y
+	bool "Support Centaur processors" if PROCESSOR_SELECT
+	depends on !64BIT
+	help
+	  This enables extended support for Centaur processors
+
+config CPU_SUP_CENTAUR_64
+	default y
+	bool "Support Centaur processors" if PROCESSOR_SELECT
+	depends on 64BIT
+	help
+	  This enables extended support for Centaur processors
+
+config CPU_SUP_TRANSMETA_32
+	default y
+	bool "Support Transmeta processors" if PROCESSOR_SELECT
+	depends on !64BIT
+	help
+	  This enables extended support for Transmeta processors
+
+config CPU_SUP_UMC_32
+	default y
+	bool "Support UMC processors" if PROCESSOR_SELECT
+	depends on !64BIT
+	help
+	  This enables extended support for UMC processors

+ 3 - 2
arch/x86/boot/compressed/head_32.S

@@ -137,14 +137,15 @@ relocated:
  */
  */
 	movl output_len(%ebx), %eax
 	movl output_len(%ebx), %eax
 	pushl %eax
 	pushl %eax
+			# push arguments for decompress_kernel:
 	pushl %ebp	# output address
 	pushl %ebp	# output address
 	movl input_len(%ebx), %eax
 	movl input_len(%ebx), %eax
 	pushl %eax	# input_len
 	pushl %eax	# input_len
 	leal input_data(%ebx), %eax
 	leal input_data(%ebx), %eax
 	pushl %eax	# input_data
 	pushl %eax	# input_data
 	leal boot_heap(%ebx), %eax
 	leal boot_heap(%ebx), %eax
-	pushl %eax	# heap area as third argument
-	pushl %esi	# real mode pointer as second arg
+	pushl %eax	# heap area
+	pushl %esi	# real mode pointer
 	call decompress_kernel
 	call decompress_kernel
 	addl $20, %esp
 	addl $20, %esp
 	popl %ecx
 	popl %ecx

+ 7 - 5
arch/x86/boot/compressed/misc.c

@@ -16,7 +16,7 @@
  */
  */
 #undef CONFIG_PARAVIRT
 #undef CONFIG_PARAVIRT
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
-#define _ASM_DESC_H_ 1
+#define ASM_X86__DESC_H 1
 #endif
 #endif
 
 
 #ifdef CONFIG_X86_64
 #ifdef CONFIG_X86_64
@@ -27,7 +27,7 @@
 #include <linux/linkage.h>
 #include <linux/linkage.h>
 #include <linux/screen_info.h>
 #include <linux/screen_info.h>
 #include <linux/elf.h>
 #include <linux/elf.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/boot.h>
 #include <asm/boot.h>
 #include <asm/bootparam.h>
 #include <asm/bootparam.h>
@@ -251,7 +251,7 @@ static void __putstr(int error, const char *s)
 				y--;
 				y--;
 			}
 			}
 		} else {
 		} else {
-			vidmem [(x + cols * y) * 2] = c;
+			vidmem[(x + cols * y) * 2] = c;
 			if (++x >= cols) {
 			if (++x >= cols) {
 				x = 0;
 				x = 0;
 				if (++y >= lines) {
 				if (++y >= lines) {
@@ -277,7 +277,8 @@ static void *memset(void *s, int c, unsigned n)
 	int i;
 	int i;
 	char *ss = s;
 	char *ss = s;
 
 
-	for (i = 0; i < n; i++) ss[i] = c;
+	for (i = 0; i < n; i++)
+		ss[i] = c;
 	return s;
 	return s;
 }
 }
 
 
@@ -287,7 +288,8 @@ static void *memcpy(void *dest, const void *src, unsigned n)
 	const char *s = src;
 	const char *s = src;
 	char *d = dest;
 	char *d = dest;
 
 
-	for (i = 0; i < n; i++) d[i] = s[i];
+	for (i = 0; i < n; i++)
+		d[i] = s[i];
 	return dest;
 	return dest;
 }
 }
 
 

+ 1 - 1
arch/x86/boot/mkcpustr.c

@@ -15,7 +15,7 @@
 
 
 #include <stdio.h>
 #include <stdio.h>
 
 
-#include "../kernel/cpu/feature_names.c"
+#include "../kernel/cpu/capflags.c"
 
 
 #if NCAPFLAGS > 8
 #if NCAPFLAGS > 8
 # error "Need to adjust the boot code handling of CPUID strings"
 # error "Need to adjust the boot code handling of CPUID strings"

+ 0 - 0
arch/x86/mach-es7000/Makefile → arch/x86/es7000/Makefile


+ 9 - 9
arch/x86/mach-es7000/es7000.h → arch/x86/es7000/es7000.h

@@ -1,7 +1,7 @@
 /*
 /*
  * Written by: Garry Forsgren, Unisys Corporation
  * Written by: Garry Forsgren, Unisys Corporation
  *             Natalie Protasevich, Unisys Corporation
  *             Natalie Protasevich, Unisys Corporation
- * This file contains the code to configure and interface 
+ * This file contains the code to configure and interface
  * with Unisys ES7000 series hardware system manager.
  * with Unisys ES7000 series hardware system manager.
  *
  *
  * Copyright (c) 2003 Unisys Corporation.  All Rights Reserved.
  * Copyright (c) 2003 Unisys Corporation.  All Rights Reserved.
@@ -18,7 +18,7 @@
  * with this program; if not, write the Free Software Foundation, Inc., 59
  * with this program; if not, write the Free Software Foundation, Inc., 59
  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
  *
  *
- * Contact information: Unisys Corporation, Township Line & Union Meeting 
+ * Contact information: Unisys Corporation, Township Line & Union Meeting
  * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
  * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
  *
  *
  * http://www.unisys.com
  * http://www.unisys.com
@@ -41,7 +41,7 @@
 #define	MIP_VALID		0x0100000000000000ULL
 #define	MIP_VALID		0x0100000000000000ULL
 #define	MIP_PORT(VALUE)	((VALUE >> 32) & 0xffff)
 #define	MIP_PORT(VALUE)	((VALUE >> 32) & 0xffff)
 
 
-#define	MIP_RD_LO(VALUE)	(VALUE & 0xffffffff)   
+#define	MIP_RD_LO(VALUE)	(VALUE & 0xffffffff)
 
 
 struct mip_reg_info {
 struct mip_reg_info {
 	unsigned long long mip_info;
 	unsigned long long mip_info;
@@ -51,11 +51,11 @@ struct mip_reg_info {
 };
 };
 
 
 struct part_info {
 struct part_info {
-	unsigned char type;   
+	unsigned char type;
 	unsigned char length;
 	unsigned char length;
 	unsigned char part_id;
 	unsigned char part_id;
 	unsigned char apic_mode;
 	unsigned char apic_mode;
-	unsigned long snum;    
+	unsigned long snum;
 	char ptype[16];
 	char ptype[16];
 	char sname[64];
 	char sname[64];
 	char pname[64];
 	char pname[64];
@@ -68,11 +68,11 @@ struct psai {
 };
 };
 
 
 struct es7000_mem_info {
 struct es7000_mem_info {
-	unsigned char type;   
+	unsigned char type;
 	unsigned char length;
 	unsigned char length;
 	unsigned char resv[6];
 	unsigned char resv[6];
-	unsigned long long  start; 
-	unsigned long long  size; 
+	unsigned long long  start;
+	unsigned long long  size;
 };
 };
 
 
 struct es7000_oem_table {
 struct es7000_oem_table {
@@ -106,7 +106,7 @@ struct mip_reg {
 };
 };
 
 
 #define	MIP_SW_APIC		0x1020b
 #define	MIP_SW_APIC		0x1020b
-#define	MIP_FUNC(VALUE) 	(VALUE & 0xff)
+#define	MIP_FUNC(VALUE)		(VALUE & 0xff)
 
 
 extern int parse_unisys_oem (char *oemptr);
 extern int parse_unisys_oem (char *oemptr);
 extern void setup_unisys(void);
 extern void setup_unisys(void);

+ 1 - 1
arch/x86/mach-es7000/es7000plat.c → arch/x86/es7000/es7000plat.c

@@ -72,7 +72,7 @@ es7000_rename_gsi(int ioapic, int gsi)
 			base += nr_ioapic_registers[i];
 			base += nr_ioapic_registers[i];
 	}
 	}
 
 
-	if (!ioapic && (gsi < 16)) 
+	if (!ioapic && (gsi < 16))
 		gsi += base;
 		gsi += base;
 	return gsi;
 	return gsi;
 }
 }

+ 7 - 4
arch/x86/ia32/ia32_aout.c

@@ -85,8 +85,10 @@ static void dump_thread32(struct pt_regs *regs, struct user32 *dump)
 	dump->regs.ax = regs->ax;
 	dump->regs.ax = regs->ax;
 	dump->regs.ds = current->thread.ds;
 	dump->regs.ds = current->thread.ds;
 	dump->regs.es = current->thread.es;
 	dump->regs.es = current->thread.es;
-	asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs;
-	asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs;
+	savesegment(fs, fs);
+	dump->regs.fs = fs;
+	savesegment(gs, gs);
+	dump->regs.gs = gs;
 	dump->regs.orig_ax = regs->orig_ax;
 	dump->regs.orig_ax = regs->orig_ax;
 	dump->regs.ip = regs->ip;
 	dump->regs.ip = regs->ip;
 	dump->regs.cs = regs->cs;
 	dump->regs.cs = regs->cs;
@@ -430,8 +432,9 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 	current->mm->start_stack =
 	current->mm->start_stack =
 		(unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
 		(unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
 	/* start thread */
 	/* start thread */
-	asm volatile("movl %0,%%fs" :: "r" (0)); \
-	asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS));
+	loadsegment(fs, 0);
+	loadsegment(ds, __USER32_DS);
+	loadsegment(es, __USER32_DS);
 	load_gs_index(0);
 	load_gs_index(0);
 	(regs)->ip = ex.a_entry;
 	(regs)->ip = ex.a_entry;
 	(regs)->sp = current->mm->start_stack;
 	(regs)->sp = current->mm->start_stack;

+ 46 - 61
arch/x86/ia32/ia32_signal.c

@@ -179,9 +179,10 @@ struct sigframe
 	u32 pretcode;
 	u32 pretcode;
 	int sig;
 	int sig;
 	struct sigcontext_ia32 sc;
 	struct sigcontext_ia32 sc;
-	struct _fpstate_ia32 fpstate;
+	struct _fpstate_ia32 fpstate_unused; /* look at kernel/sigframe.h */
 	unsigned int extramask[_COMPAT_NSIG_WORDS-1];
 	unsigned int extramask[_COMPAT_NSIG_WORDS-1];
 	char retcode[8];
 	char retcode[8];
+	/* fp state follows here */
 };
 };
 
 
 struct rt_sigframe
 struct rt_sigframe
@@ -192,8 +193,8 @@ struct rt_sigframe
 	u32 puc;
 	u32 puc;
 	compat_siginfo_t info;
 	compat_siginfo_t info;
 	struct ucontext_ia32 uc;
 	struct ucontext_ia32 uc;
-	struct _fpstate_ia32 fpstate;
 	char retcode[8];
 	char retcode[8];
+	/* fp state follows here */
 };
 };
 
 
 #define COPY(x)		{ 		\
 #define COPY(x)		{ 		\
@@ -206,7 +207,7 @@ struct rt_sigframe
 	{ unsigned int cur;						\
 	{ unsigned int cur;						\
 	  unsigned short pre;						\
 	  unsigned short pre;						\
 	  err |= __get_user(pre, &sc->seg);				\
 	  err |= __get_user(pre, &sc->seg);				\
-	  asm volatile("movl %%" #seg ",%0" : "=r" (cur));		\
+	  savesegment(seg, cur);					\
 	  pre |= mask;							\
 	  pre |= mask;							\
 	  if (pre != cur) loadsegment(seg, pre); }
 	  if (pre != cur) loadsegment(seg, pre); }
 
 
@@ -215,7 +216,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 				   unsigned int *peax)
 				   unsigned int *peax)
 {
 {
 	unsigned int tmpflags, gs, oldgs, err = 0;
 	unsigned int tmpflags, gs, oldgs, err = 0;
-	struct _fpstate_ia32 __user *buf;
+	void __user *buf;
 	u32 tmp;
 	u32 tmp;
 
 
 	/* Always make any pending restarted system calls return -EINTR */
 	/* Always make any pending restarted system calls return -EINTR */
@@ -235,7 +236,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 	 */
 	 */
 	err |= __get_user(gs, &sc->gs);
 	err |= __get_user(gs, &sc->gs);
 	gs |= 3;
 	gs |= 3;
-	asm("movl %%gs,%0" : "=r" (oldgs));
+	savesegment(gs, oldgs);
 	if (gs != oldgs)
 	if (gs != oldgs)
 		load_gs_index(gs);
 		load_gs_index(gs);
 
 
@@ -259,26 +260,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 
 
 	err |= __get_user(tmp, &sc->fpstate);
 	err |= __get_user(tmp, &sc->fpstate);
 	buf = compat_ptr(tmp);
 	buf = compat_ptr(tmp);
-	if (buf) {
-		if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
-			goto badframe;
-		err |= restore_i387_ia32(buf);
-	} else {
-		struct task_struct *me = current;
-
-		if (used_math()) {
-			clear_fpu(me);
-			clear_used_math();
-		}
-	}
+	err |= restore_i387_xstate_ia32(buf);
 
 
 	err |= __get_user(tmp, &sc->ax);
 	err |= __get_user(tmp, &sc->ax);
 	*peax = tmp;
 	*peax = tmp;
 
 
 	return err;
 	return err;
-
-badframe:
-	return 1;
 }
 }
 
 
 asmlinkage long sys32_sigreturn(struct pt_regs *regs)
 asmlinkage long sys32_sigreturn(struct pt_regs *regs)
@@ -350,19 +337,18 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
  */
  */
 
 
 static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
 static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
-				 struct _fpstate_ia32 __user *fpstate,
+				 void __user *fpstate,
 				 struct pt_regs *regs, unsigned int mask)
 				 struct pt_regs *regs, unsigned int mask)
 {
 {
 	int tmp, err = 0;
 	int tmp, err = 0;
 
 
-	tmp = 0;
-	__asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
+	savesegment(gs, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
-	__asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
+	savesegment(fs, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
-	__asm__("movl %%ds,%0" : "=r"(tmp): "0"(tmp));
+	savesegment(ds, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->ds);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->ds);
-	__asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp));
+	savesegment(es, tmp);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->es);
 	err |= __put_user(tmp, (unsigned int __user *)&sc->es);
 
 
 	err |= __put_user((u32)regs->di, &sc->di);
 	err |= __put_user((u32)regs->di, &sc->di);
@@ -381,7 +367,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
 	err |= __put_user((u32)regs->flags, &sc->flags);
 	err |= __put_user((u32)regs->flags, &sc->flags);
 	err |= __put_user((u32)regs->sp, &sc->sp_at_signal);
 	err |= __put_user((u32)regs->sp, &sc->sp_at_signal);
 
 
-	tmp = save_i387_ia32(fpstate);
+	tmp = save_i387_xstate_ia32(fpstate);
 	if (tmp < 0)
 	if (tmp < 0)
 		err = -EFAULT;
 		err = -EFAULT;
 	else {
 	else {
@@ -402,7 +388,8 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
  * Determine which stack to use..
  * Determine which stack to use..
  */
  */
 static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
 static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
-				 size_t frame_size)
+				 size_t frame_size,
+				 void **fpstate)
 {
 {
 	unsigned long sp;
 	unsigned long sp;
 
 
@@ -421,6 +408,11 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
 		 ka->sa.sa_restorer)
 		 ka->sa.sa_restorer)
 		sp = (unsigned long) ka->sa.sa_restorer;
 		sp = (unsigned long) ka->sa.sa_restorer;
 
 
+	if (used_math()) {
+		sp = sp - sig_xstate_ia32_size;
+		*fpstate = (struct _fpstate_ia32 *) sp;
+	}
+
 	sp -= frame_size;
 	sp -= frame_size;
 	/* Align the stack pointer according to the i386 ABI,
 	/* Align the stack pointer according to the i386 ABI,
 	 * i.e. so that on function entry ((sp + 4) & 15) == 0. */
 	 * i.e. so that on function entry ((sp + 4) & 15) == 0. */
@@ -434,6 +426,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 	struct sigframe __user *frame;
 	struct sigframe __user *frame;
 	void __user *restorer;
 	void __user *restorer;
 	int err = 0;
 	int err = 0;
+	void __user *fpstate = NULL;
 
 
 	/* copy_to_user optimizes that into a single 8 byte store */
 	/* copy_to_user optimizes that into a single 8 byte store */
 	static const struct {
 	static const struct {
@@ -448,25 +441,21 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 		0,
 		0,
 	};
 	};
 
 
-	frame = get_sigframe(ka, regs, sizeof(*frame));
+	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
 
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
+		return -EFAULT;
 
 
-	err |= __put_user(sig, &frame->sig);
-	if (err)
-		goto give_sigsegv;
+	if (__put_user(sig, &frame->sig))
+		return -EFAULT;
 
 
-	err |= ia32_setup_sigcontext(&frame->sc, &frame->fpstate, regs,
-					set->sig[0]);
-	if (err)
-		goto give_sigsegv;
+	if (ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
+		return -EFAULT;
 
 
 	if (_COMPAT_NSIG_WORDS > 1) {
 	if (_COMPAT_NSIG_WORDS > 1) {
-		err |= __copy_to_user(frame->extramask, &set->sig[1],
-				      sizeof(frame->extramask));
-		if (err)
-			goto give_sigsegv;
+		if (__copy_to_user(frame->extramask, &set->sig[1],
+				   sizeof(frame->extramask)))
+			return -EFAULT;
 	}
 	}
 
 
 	if (ka->sa.sa_flags & SA_RESTORER) {
 	if (ka->sa.sa_flags & SA_RESTORER) {
@@ -487,7 +476,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 	 */
 	 */
 	err |= __copy_to_user(frame->retcode, &code, 8);
 	err |= __copy_to_user(frame->retcode, &code, 8);
 	if (err)
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 
 	/* Set up registers for signal handler */
 	/* Set up registers for signal handler */
 	regs->sp = (unsigned long) frame;
 	regs->sp = (unsigned long) frame;
@@ -498,8 +487,8 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 	regs->dx = 0;
 	regs->dx = 0;
 	regs->cx = 0;
 	regs->cx = 0;
 
 
-	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
-	asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
+	loadsegment(ds, __USER32_DS);
+	loadsegment(es, __USER32_DS);
 
 
 	regs->cs = __USER32_CS;
 	regs->cs = __USER32_CS;
 	regs->ss = __USER32_DS;
 	regs->ss = __USER32_DS;
@@ -510,10 +499,6 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 #endif
 #endif
 
 
 	return 0;
 	return 0;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return -EFAULT;
 }
 }
 
 
 int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
@@ -522,6 +507,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	struct rt_sigframe __user *frame;
 	struct rt_sigframe __user *frame;
 	void __user *restorer;
 	void __user *restorer;
 	int err = 0;
 	int err = 0;
+	void __user *fpstate = NULL;
 
 
 	/* __copy_to_user optimizes that into a single 8 byte store */
 	/* __copy_to_user optimizes that into a single 8 byte store */
 	static const struct {
 	static const struct {
@@ -537,30 +523,33 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		0,
 		0,
 	};
 	};
 
 
-	frame = get_sigframe(ka, regs, sizeof(*frame));
+	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
 
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
+		return -EFAULT;
 
 
 	err |= __put_user(sig, &frame->sig);
 	err |= __put_user(sig, &frame->sig);
 	err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo);
 	err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo);
 	err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc);
 	err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc);
 	err |= copy_siginfo_to_user32(&frame->info, info);
 	err |= copy_siginfo_to_user32(&frame->info, info);
 	if (err)
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 
 	/* Create the ucontext.  */
 	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
+	if (cpu_has_xsave)
+		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+	else
+		err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
 	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
 	err |= __put_user(sas_ss_flags(regs->sp),
 	err |= __put_user(sas_ss_flags(regs->sp),
 			  &frame->uc.uc_stack.ss_flags);
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
+	err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
 				     regs, set->sig[0]);
 				     regs, set->sig[0]);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	if (err)
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 
 	if (ka->sa.sa_flags & SA_RESTORER)
 	if (ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 		restorer = ka->sa.sa_restorer;
@@ -575,7 +564,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	 */
 	 */
 	err |= __copy_to_user(frame->retcode, &code, 8);
 	err |= __copy_to_user(frame->retcode, &code, 8);
 	if (err)
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 
 	/* Set up registers for signal handler */
 	/* Set up registers for signal handler */
 	regs->sp = (unsigned long) frame;
 	regs->sp = (unsigned long) frame;
@@ -591,8 +580,8 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->dx = (unsigned long) &frame->info;
 	regs->dx = (unsigned long) &frame->info;
 	regs->cx = (unsigned long) &frame->uc;
 	regs->cx = (unsigned long) &frame->uc;
 
 
-	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS));
-	asm volatile("movl %0,%%es" :: "r" (__USER32_DS));
+	loadsegment(ds, __USER32_DS);
+	loadsegment(es, __USER32_DS);
 
 
 	regs->cs = __USER32_CS;
 	regs->cs = __USER32_CS;
 	regs->ss = __USER32_DS;
 	regs->ss = __USER32_DS;
@@ -603,8 +592,4 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 #endif
 #endif
 
 
 	return 0;
 	return 0;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return -EFAULT;
 }
 }

+ 3 - 1
arch/x86/kernel/Makefile

@@ -38,7 +38,7 @@ obj-y			+= tsc.o io_delay.o rtc.o
 
 
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
 obj-y				+= process.o
 obj-y				+= process.o
-obj-y				+= i387.o
+obj-y				+= i387.o xsave.o
 obj-y				+= ptrace.o
 obj-y				+= ptrace.o
 obj-y				+= ds.o
 obj-y				+= ds.o
 obj-$(CONFIG_X86_32)		+= tls.o
 obj-$(CONFIG_X86_32)		+= tls.o
@@ -104,6 +104,8 @@ obj-$(CONFIG_OLPC)		+= olpc.o
 ifeq ($(CONFIG_X86_64),y)
 ifeq ($(CONFIG_X86_64),y)
         obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
         obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
 	obj-y				+= bios_uv.o
 	obj-y				+= bios_uv.o
+        obj-y				+= genx2apic_cluster.o
+        obj-y				+= genx2apic_phys.o
         obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
         obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
         obj-$(CONFIG_AUDIT)		+= audit_64.o
         obj-$(CONFIG_AUDIT)		+= audit_64.o
 
 

+ 5 - 4
arch/x86/kernel/acpi/boot.c

@@ -58,7 +58,6 @@ EXPORT_SYMBOL(acpi_disabled);
 #ifdef	CONFIG_X86_64
 #ifdef	CONFIG_X86_64
 
 
 #include <asm/proto.h>
 #include <asm/proto.h>
-#include <asm/genapic.h>
 
 
 #else				/* X86 */
 #else				/* X86 */
 
 
@@ -97,8 +96,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
 #warning ACPI uses CMPXCHG, i486 and later hardware
 #warning ACPI uses CMPXCHG, i486 and later hardware
 #endif
 #endif
 
 
-static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
-
 /* --------------------------------------------------------------------------
 /* --------------------------------------------------------------------------
                               Boot-time Configuration
                               Boot-time Configuration
    -------------------------------------------------------------------------- */
    -------------------------------------------------------------------------- */
@@ -160,6 +157,8 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
 struct acpi_mcfg_allocation *pci_mmcfg_config;
 struct acpi_mcfg_allocation *pci_mmcfg_config;
 int pci_mmcfg_config_num;
 int pci_mmcfg_config_num;
 
 
+static int acpi_mcfg_64bit_base_addr __initdata = FALSE;
+
 static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
 static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg)
 {
 {
 	if (!strcmp(mcfg->header.oem_id, "SGI"))
 	if (!strcmp(mcfg->header.oem_id, "SGI"))
@@ -775,7 +774,7 @@ static void __init acpi_register_lapic_address(unsigned long address)
 
 
 	set_fixmap_nocache(FIX_APIC_BASE, address);
 	set_fixmap_nocache(FIX_APIC_BASE, address);
 	if (boot_cpu_physical_apicid == -1U) {
 	if (boot_cpu_physical_apicid == -1U) {
-		boot_cpu_physical_apicid  = GET_APIC_ID(read_apic_id());
+		boot_cpu_physical_apicid  = read_apic_id();
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
 		apic_version[boot_cpu_physical_apicid] =
 		apic_version[boot_cpu_physical_apicid] =
 			 GET_APIC_VERSION(apic_read(APIC_LVR));
 			 GET_APIC_VERSION(apic_read(APIC_LVR));
@@ -1351,7 +1350,9 @@ static void __init acpi_process_madt(void)
 				acpi_ioapic = 1;
 				acpi_ioapic = 1;
 
 
 				smp_found_config = 1;
 				smp_found_config = 1;
+#ifdef CONFIG_X86_32
 				setup_apic_routing();
 				setup_apic_routing();
+#endif
 			}
 			}
 		}
 		}
 		if (error == -EINVAL) {
 		if (error == -EINVAL) {

+ 37 - 4
arch/x86/kernel/apic_32.c

@@ -145,13 +145,18 @@ static int modern_apic(void)
 	return lapic_get_version() >= 0x14;
 	return lapic_get_version() >= 0x14;
 }
 }
 
 
-void apic_wait_icr_idle(void)
+/*
+ * Paravirt kernels also might be using these below ops. So we still
+ * use generic apic_read()/apic_write(), which might be pointing to different
+ * ops in PARAVIRT case.
+ */
+void xapic_wait_icr_idle(void)
 {
 {
 	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
 	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
 		cpu_relax();
 		cpu_relax();
 }
 }
 
 
-u32 safe_apic_wait_icr_idle(void)
+u32 safe_xapic_wait_icr_idle(void)
 {
 {
 	u32 send_status;
 	u32 send_status;
 	int timeout;
 	int timeout;
@@ -167,6 +172,34 @@ u32 safe_apic_wait_icr_idle(void)
 	return send_status;
 	return send_status;
 }
 }
 
 
+void xapic_icr_write(u32 low, u32 id)
+{
+	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+	apic_write(APIC_ICR, low);
+}
+
+u64 xapic_icr_read(void)
+{
+	u32 icr1, icr2;
+
+	icr2 = apic_read(APIC_ICR2);
+	icr1 = apic_read(APIC_ICR);
+
+	return icr1 | ((u64)icr2 << 32);
+}
+
+static struct apic_ops xapic_ops = {
+	.read = native_apic_mem_read,
+	.write = native_apic_mem_write,
+	.icr_read = xapic_icr_read,
+	.icr_write = xapic_icr_write,
+	.wait_icr_idle = xapic_wait_icr_idle,
+	.safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+};
+
+struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+EXPORT_SYMBOL_GPL(apic_ops);
+
 /**
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  */
  */
@@ -1205,7 +1238,7 @@ void __init init_apic_mappings(void)
 	 * default configuration (or the MP table is broken).
 	 * default configuration (or the MP table is broken).
 	 */
 	 */
 	if (boot_cpu_physical_apicid == -1U)
 	if (boot_cpu_physical_apicid == -1U)
-		boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+		boot_cpu_physical_apicid = read_apic_id();
 
 
 }
 }
 
 
@@ -1242,7 +1275,7 @@ int __init APIC_init_uniprocessor(void)
 	 * might be zero if read from MP tables. Get it from LAPIC.
 	 * might be zero if read from MP tables. Get it from LAPIC.
 	 */
 	 */
 #ifdef CONFIG_CRASH_DUMP
 #ifdef CONFIG_CRASH_DUMP
-	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+	boot_cpu_physical_apicid = read_apic_id();
 #endif
 #endif
 	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
 	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
 
 

+ 226 - 11
arch/x86/kernel/apic_64.c

@@ -27,6 +27,7 @@
 #include <linux/clockchips.h>
 #include <linux/clockchips.h>
 #include <linux/acpi_pmtmr.h>
 #include <linux/acpi_pmtmr.h>
 #include <linux/module.h>
 #include <linux/module.h>
+#include <linux/dmar.h>
 
 
 #include <asm/atomic.h>
 #include <asm/atomic.h>
 #include <asm/smp.h>
 #include <asm/smp.h>
@@ -39,6 +40,7 @@
 #include <asm/proto.h>
 #include <asm/proto.h>
 #include <asm/timex.h>
 #include <asm/timex.h>
 #include <asm/apic.h>
 #include <asm/apic.h>
+#include <asm/i8259.h>
 
 
 #include <mach_ipi.h>
 #include <mach_ipi.h>
 #include <mach_apic.h>
 #include <mach_apic.h>
@@ -46,6 +48,11 @@
 static int disable_apic_timer __cpuinitdata;
 static int disable_apic_timer __cpuinitdata;
 static int apic_calibrate_pmtmr __initdata;
 static int apic_calibrate_pmtmr __initdata;
 int disable_apic;
 int disable_apic;
+int disable_x2apic;
+int x2apic;
+
+/* x2apic enabled before OS handover */
+int x2apic_preenabled;
 
 
 /* Local APIC timer works in C2 */
 /* Local APIC timer works in C2 */
 int local_apic_timer_c2_ok;
 int local_apic_timer_c2_ok;
@@ -118,13 +125,13 @@ static int modern_apic(void)
 	return lapic_get_version() >= 0x14;
 	return lapic_get_version() >= 0x14;
 }
 }
 
 
-void apic_wait_icr_idle(void)
+void xapic_wait_icr_idle(void)
 {
 {
 	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
 	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
 		cpu_relax();
 		cpu_relax();
 }
 }
 
 
-u32 safe_apic_wait_icr_idle(void)
+u32 safe_xapic_wait_icr_idle(void)
 {
 {
 	u32 send_status;
 	u32 send_status;
 	int timeout;
 	int timeout;
@@ -140,6 +147,69 @@ u32 safe_apic_wait_icr_idle(void)
 	return send_status;
 	return send_status;
 }
 }
 
 
+void xapic_icr_write(u32 low, u32 id)
+{
+	apic_write(APIC_ICR2, id << 24);
+	apic_write(APIC_ICR, low);
+}
+
+u64 xapic_icr_read(void)
+{
+	u32 icr1, icr2;
+
+	icr2 = apic_read(APIC_ICR2);
+	icr1 = apic_read(APIC_ICR);
+
+	return (icr1 | ((u64)icr2 << 32));
+}
+
+static struct apic_ops xapic_ops = {
+	.read = native_apic_mem_read,
+	.write = native_apic_mem_write,
+	.icr_read = xapic_icr_read,
+	.icr_write = xapic_icr_write,
+	.wait_icr_idle = xapic_wait_icr_idle,
+	.safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+};
+
+struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+
+EXPORT_SYMBOL_GPL(apic_ops);
+
+static void x2apic_wait_icr_idle(void)
+{
+	/* no need to wait for icr idle in x2apic */
+	return;
+}
+
+static u32 safe_x2apic_wait_icr_idle(void)
+{
+	/* no need to wait for icr idle in x2apic */
+	return 0;
+}
+
+void x2apic_icr_write(u32 low, u32 id)
+{
+	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+}
+
+u64 x2apic_icr_read(void)
+{
+	unsigned long val;
+
+	rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+	return val;
+}
+
+static struct apic_ops x2apic_ops = {
+	.read = native_apic_msr_read,
+	.write = native_apic_msr_write,
+	.icr_read = x2apic_icr_read,
+	.icr_write = x2apic_icr_write,
+	.wait_icr_idle = x2apic_wait_icr_idle,
+	.safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
+};
+
 /**
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  */
  */
@@ -629,10 +699,10 @@ int __init verify_local_APIC(void)
 	/*
 	/*
 	 * The ID register is read/write in a real APIC.
 	 * The ID register is read/write in a real APIC.
 	 */
 	 */
-	reg0 = read_apic_id();
+	reg0 = apic_read(APIC_ID);
 	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
 	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
 	apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
 	apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
-	reg1 = read_apic_id();
+	reg1 = apic_read(APIC_ID);
 	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
 	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
 	apic_write(APIC_ID, reg0);
 	apic_write(APIC_ID, reg0);
 	if (reg1 != (reg0 ^ APIC_ID_MASK))
 	if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -833,6 +903,125 @@ void __cpuinit end_local_APIC_setup(void)
 	apic_pm_activate();
 	apic_pm_activate();
 }
 }
 
 
+void check_x2apic(void)
+{
+	int msr, msr2;
+
+	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+
+	if (msr & X2APIC_ENABLE) {
+		printk("x2apic enabled by BIOS, switching to x2apic ops\n");
+		x2apic_preenabled = x2apic = 1;
+		apic_ops = &x2apic_ops;
+	}
+}
+
+void enable_x2apic(void)
+{
+	int msr, msr2;
+
+	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+	if (!(msr & X2APIC_ENABLE)) {
+		printk("Enabling x2apic\n");
+		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+	}
+}
+
+void enable_IR_x2apic(void)
+{
+#ifdef CONFIG_INTR_REMAP
+	int ret;
+	unsigned long flags;
+
+	if (!cpu_has_x2apic)
+		return;
+
+	if (!x2apic_preenabled && disable_x2apic) {
+		printk(KERN_INFO
+		       "Skipped enabling x2apic and Interrupt-remapping "
+		       "because of nox2apic\n");
+		return;
+	}
+
+	if (x2apic_preenabled && disable_x2apic)
+		panic("Bios already enabled x2apic, can't enforce nox2apic");
+
+	if (!x2apic_preenabled && skip_ioapic_setup) {
+		printk(KERN_INFO
+		       "Skipped enabling x2apic and Interrupt-remapping "
+		       "because of skipping io-apic setup\n");
+		return;
+	}
+
+	ret = dmar_table_init();
+	if (ret) {
+		printk(KERN_INFO
+		       "dmar_table_init() failed with %d:\n", ret);
+
+		if (x2apic_preenabled)
+			panic("x2apic enabled by bios. But IR enabling failed");
+		else
+			printk(KERN_INFO
+			       "Not enabling x2apic,Intr-remapping\n");
+		return;
+	}
+
+	local_irq_save(flags);
+	mask_8259A();
+	save_mask_IO_APIC_setup();
+
+	ret = enable_intr_remapping(1);
+
+	if (ret && x2apic_preenabled) {
+		local_irq_restore(flags);
+		panic("x2apic enabled by bios. But IR enabling failed");
+	}
+
+	if (ret)
+		goto end;
+
+	if (!x2apic) {
+		x2apic = 1;
+		apic_ops = &x2apic_ops;
+		enable_x2apic();
+	}
+end:
+	if (ret)
+		/*
+		 * IR enabling failed
+		 */
+		restore_IO_APIC_setup();
+	else
+		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+
+	unmask_8259A();
+	local_irq_restore(flags);
+
+	if (!ret) {
+		if (!x2apic_preenabled)
+			printk(KERN_INFO
+			       "Enabled x2apic and interrupt-remapping\n");
+		else
+			printk(KERN_INFO
+			       "Enabled Interrupt-remapping\n");
+	} else
+		printk(KERN_ERR
+		       "Failed to enable Interrupt-remapping and x2apic\n");
+#else
+	if (!cpu_has_x2apic)
+		return;
+
+	if (x2apic_preenabled)
+		panic("x2apic enabled prior OS handover,"
+		      " enable CONFIG_INTR_REMAP");
+
+	printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+	       " and x2apic\n");
+#endif
+
+	return;
+}
+
 /*
 /*
  * Detect and enable local APICs on non-SMP boards.
  * Detect and enable local APICs on non-SMP boards.
  * Original code written by Keir Fraser.
  * Original code written by Keir Fraser.
@@ -872,7 +1061,7 @@ void __init early_init_lapic_mapping(void)
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
 	 * default configuration (or the MP table is broken).
 	 */
 	 */
-	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+	boot_cpu_physical_apicid = read_apic_id();
 }
 }
 
 
 /**
 /**
@@ -880,6 +1069,11 @@ void __init early_init_lapic_mapping(void)
  */
  */
 void __init init_apic_mappings(void)
 void __init init_apic_mappings(void)
 {
 {
+	if (x2apic) {
+		boot_cpu_physical_apicid = read_apic_id();
+		return;
+	}
+
 	/*
 	/*
 	 * If no local APIC can be found then set up a fake all
 	 * If no local APIC can be found then set up a fake all
 	 * zeroes page to simulate the local APIC and another
 	 * zeroes page to simulate the local APIC and another
@@ -899,7 +1093,7 @@ void __init init_apic_mappings(void)
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
 	 * default configuration (or the MP table is broken).
 	 */
 	 */
-	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+	boot_cpu_physical_apicid = read_apic_id();
 }
 }
 
 
 /*
 /*
@@ -918,6 +1112,9 @@ int __init APIC_init_uniprocessor(void)
 		return -1;
 		return -1;
 	}
 	}
 
 
+	enable_IR_x2apic();
+	setup_apic_routing();
+
 	verify_local_APIC();
 	verify_local_APIC();
 
 
 	connect_bsp_APIC();
 	connect_bsp_APIC();
@@ -1093,6 +1290,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
 	cpu_set(cpu, cpu_present_map);
 	cpu_set(cpu, cpu_present_map);
 }
 }
 
 
+int hard_smp_processor_id(void)
+{
+	return read_apic_id();
+}
+
 /*
 /*
  * Power management
  * Power management
  */
  */
@@ -1129,7 +1331,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 
 
 	maxlvt = lapic_get_maxlvt();
 	maxlvt = lapic_get_maxlvt();
 
 
-	apic_pm_state.apic_id = read_apic_id();
+	apic_pm_state.apic_id = apic_read(APIC_ID);
 	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
 	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
 	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
 	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
 	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
 	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1164,10 +1366,14 @@ static int lapic_resume(struct sys_device *dev)
 	maxlvt = lapic_get_maxlvt();
 	maxlvt = lapic_get_maxlvt();
 
 
 	local_irq_save(flags);
 	local_irq_save(flags);
-	rdmsr(MSR_IA32_APICBASE, l, h);
-	l &= ~MSR_IA32_APICBASE_BASE;
-	l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
-	wrmsr(MSR_IA32_APICBASE, l, h);
+	if (!x2apic) {
+		rdmsr(MSR_IA32_APICBASE, l, h);
+		l &= ~MSR_IA32_APICBASE_BASE;
+		l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+		wrmsr(MSR_IA32_APICBASE, l, h);
+	} else
+		enable_x2apic();
+
 	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
 	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
 	apic_write(APIC_ID, apic_pm_state.apic_id);
 	apic_write(APIC_ID, apic_pm_state.apic_id);
 	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
 	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -1307,6 +1513,15 @@ __cpuinit int apic_is_clustered_box(void)
 	return (clusters > 2);
 	return (clusters > 2);
 }
 }
 
 
+static __init int setup_nox2apic(char *str)
+{
+	disable_x2apic = 1;
+	clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
+	return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+
+
 /*
 /*
  * APIC command line parameters
  * APIC command line parameters
  */
  */

+ 0 - 1
arch/x86/kernel/apm_32.c

@@ -228,7 +228,6 @@
 #include <linux/suspend.h>
 #include <linux/suspend.h>
 #include <linux/kthread.h>
 #include <linux/kthread.h>
 #include <linux/jiffies.h>
 #include <linux/jiffies.h>
-#include <linux/smp_lock.h>
 
 
 #include <asm/system.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>

+ 1 - 1
arch/x86/kernel/asm-offsets_64.c

@@ -22,7 +22,7 @@
 
 
 #define __NO_STUBS 1
 #define __NO_STUBS 1
 #undef __SYSCALL
 #undef __SYSCALL
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
 #define __SYSCALL(nr, sym) [nr] = 1,
 #define __SYSCALL(nr, sym) [nr] = 1,
 static char syscalls[] = {
 static char syscalls[] = {
 #include <asm/unistd.h>
 #include <asm/unistd.h>

+ 5 - 5
arch/x86/kernel/bios_uv.c

@@ -25,11 +25,11 @@ x86_bios_strerror(long status)
 {
 {
 	const char *str;
 	const char *str;
 	switch (status) {
 	switch (status) {
-	case  0: str = "Call completed without error"; break;
-	case -1: str = "Not implemented"; break;
-	case -2: str = "Invalid argument"; break;
-	case -3: str = "Call completed with error"; break;
-	default: str = "Unknown BIOS status code"; break;
+	case  0: str = "Call completed without error";	break;
+	case -1: str = "Not implemented";		break;
+	case -2: str = "Invalid argument";		break;
+	case -3: str = "Call completed with error";	break;
+	default: str = "Unknown BIOS status code";	break;
 	}
 	}
 	return str;
 	return str;
 }
 }

+ 21 - 11
arch/x86/kernel/cpu/Makefile

@@ -3,22 +3,32 @@
 #
 #
 
 
 obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
 obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
-obj-y			+= proc.o feature_names.o
+obj-y			+= proc.o capflags.o powerflags.o
 
 
-obj-$(CONFIG_X86_32)	+= common.o bugs.o
+obj-$(CONFIG_X86_32)	+= common.o bugs.o cmpxchg.o
 obj-$(CONFIG_X86_64)	+= common_64.o bugs_64.o
 obj-$(CONFIG_X86_64)	+= common_64.o bugs_64.o
-obj-$(CONFIG_X86_32)	+= amd.o
-obj-$(CONFIG_X86_64)	+= amd_64.o
-obj-$(CONFIG_X86_32)	+= cyrix.o
-obj-$(CONFIG_X86_32)	+= centaur.o
-obj-$(CONFIG_X86_64)	+= centaur_64.o
-obj-$(CONFIG_X86_32)	+= transmeta.o
-obj-$(CONFIG_X86_32)	+= intel.o
-obj-$(CONFIG_X86_64)	+= intel_64.o
-obj-$(CONFIG_X86_32)	+= umc.o
+
+obj-$(CONFIG_CPU_SUP_AMD_32)		+= amd.o
+obj-$(CONFIG_CPU_SUP_AMD_64)		+= amd_64.o
+obj-$(CONFIG_CPU_SUP_CYRIX_32)		+= cyrix.o
+obj-$(CONFIG_CPU_SUP_CENTAUR_32)	+= centaur.o
+obj-$(CONFIG_CPU_SUP_CENTAUR_64)	+= centaur_64.o
+obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
+obj-$(CONFIG_CPU_SUP_INTEL_32)		+= intel.o
+obj-$(CONFIG_CPU_SUP_INTEL_64)		+= intel_64.o
+obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 
 
 obj-$(CONFIG_X86_MCE)	+= mcheck/
 obj-$(CONFIG_X86_MCE)	+= mcheck/
 obj-$(CONFIG_MTRR)	+= mtrr/
 obj-$(CONFIG_MTRR)	+= mtrr/
 obj-$(CONFIG_CPU_FREQ)	+= cpufreq/
 obj-$(CONFIG_CPU_FREQ)	+= cpufreq/
 
 
 obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
 obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
+
+quiet_cmd_mkcapflags = MKCAP   $@
+      cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
+
+cpufeature = $(src)/../../../../include/asm-x86/cpufeature.h
+
+targets += capflags.c
+$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE
+	$(call if_changed,mkcapflags)

+ 72 - 0
arch/x86/kernel/cpu/cmpxchg.c

@@ -0,0 +1,72 @@
+/*
+ * cmpxchg*() fallbacks for CPU not supporting these instructions
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/module.h>
+
+#ifndef CONFIG_X86_CMPXCHG
+unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
+{
+	u8 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u8 *)ptr;
+	if (prev == old)
+		*(u8 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u8);
+
+unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
+{
+	u16 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u16 *)ptr;
+	if (prev == old)
+		*(u16 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u16);
+
+unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
+{
+	u32 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u32 *)ptr;
+	if (prev == old)
+		*(u32 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_386_u32);
+#endif
+
+#ifndef CONFIG_X86_CMPXCHG64
+unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
+{
+	u64 prev;
+	unsigned long flags;
+
+	/* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
+	local_irq_save(flags);
+	prev = *(u64 *)ptr;
+	if (prev == old)
+		*(u64 *)ptr = new;
+	local_irq_restore(flags);
+	return prev;
+}
+EXPORT_SYMBOL(cmpxchg_486_u64);
+#endif
+

+ 41 - 1
arch/x86/kernel/cpu/common.c

@@ -355,6 +355,35 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
 	clear_cpu_cap(c, X86_FEATURE_NOPL);
 	clear_cpu_cap(c, X86_FEATURE_NOPL);
 }
 }
 
 
+/*
+ * The NOPL instruction is supposed to exist on all CPUs with
+ * family >= 6, unfortunately, that's not true in practice because
+ * of early VIA chips and (more importantly) broken virtualizers that
+ * are not easy to detect.  Hence, probe for it based on first
+ * principles.
+ */
+static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
+{
+	const u32 nopl_signature = 0x888c53b1; /* Random number */
+	u32 has_nopl = nopl_signature;
+
+	clear_cpu_cap(c, X86_FEATURE_NOPL);
+	if (c->x86 >= 6) {
+		asm volatile("\n"
+			     "1:      .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
+			     "2:\n"
+			     "        .section .fixup,\"ax\"\n"
+			     "3:      xor %0,%0\n"
+			     "        jmp 2b\n"
+			     "        .previous\n"
+			     _ASM_EXTABLE(1b,3b)
+			     : "+a" (has_nopl));
+
+		if (has_nopl == nopl_signature)
+			set_cpu_cap(c, X86_FEATURE_NOPL);
+	}
+}
+
 static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
 static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
 {
 {
 	u32 tfms, xlvl;
 	u32 tfms, xlvl;
@@ -723,9 +752,20 @@ void __cpuinit cpu_init(void)
 	/*
 	/*
 	 * Force FPU initialization:
 	 * Force FPU initialization:
 	 */
 	 */
-	current_thread_info()->status = 0;
+	if (cpu_has_xsave)
+		current_thread_info()->status = TS_XSAVE;
+	else
+		current_thread_info()->status = 0;
 	clear_used_math();
 	clear_used_math();
 	mxcsr_feature_mask_init();
 	mxcsr_feature_mask_init();
+
+	/*
+	 * Boot processor to setup the FP and extended state context info.
+	 */
+	if (!smp_processor_id())
+		init_thread_xstate();
+
+	xsave_init();
 }
 }
 
 
 #ifdef CONFIG_HOTPLUG_CPU
 #ifdef CONFIG_HOTPLUG_CPU

+ 2 - 0
arch/x86/kernel/cpu/common_64.c

@@ -636,6 +636,8 @@ void __cpuinit cpu_init(void)
 	barrier();
 	barrier();
 
 
 	check_efer();
 	check_efer();
+	if (cpu != 0 && x2apic)
+		enable_x2apic();
 
 
 	/*
 	/*
 	 * set up and load the per-CPU TSS
 	 * set up and load the per-CPU TSS

+ 8 - 8
arch/x86/kernel/cpu/cyrix.c

@@ -121,7 +121,7 @@ static void __cpuinit set_cx86_reorder(void)
 	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
 	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
 
 
 	/* Load/Store Serialize to mem access disable (=reorder it) */
 	/* Load/Store Serialize to mem access disable (=reorder it) */
-	setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80);
+	setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80);
 	/* set load/store serialize from 1GB to 4GB */
 	/* set load/store serialize from 1GB to 4GB */
 	ccr3 |= 0xe0;
 	ccr3 |= 0xe0;
 	setCx86(CX86_CCR3, ccr3);
 	setCx86(CX86_CCR3, ccr3);
@@ -132,11 +132,11 @@ static void __cpuinit set_cx86_memwb(void)
 	printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
 	printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
 
 
 	/* CCR2 bit 2: unlock NW bit */
 	/* CCR2 bit 2: unlock NW bit */
-	setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04);
+	setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
 	/* set 'Not Write-through' */
 	/* set 'Not Write-through' */
 	write_cr0(read_cr0() | X86_CR0_NW);
 	write_cr0(read_cr0() | X86_CR0_NW);
 	/* CCR2 bit 2: lock NW bit and set WT1 */
 	/* CCR2 bit 2: lock NW bit and set WT1 */
-	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14);
+	setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14);
 }
 }
 
 
 /*
 /*
@@ -150,14 +150,14 @@ static void __cpuinit geode_configure(void)
 	local_irq_save(flags);
 	local_irq_save(flags);
 
 
 	/* Suspend on halt power saving and enable #SUSP pin */
 	/* Suspend on halt power saving and enable #SUSP pin */
-	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
+	setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88);
 
 
 	ccr3 = getCx86(CX86_CCR3);
 	ccr3 = getCx86(CX86_CCR3);
 	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);	/* enable MAPEN */
 	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);	/* enable MAPEN */
 
 
 
 
 	/* FPU fast, DTE cache, Mem bypass */
 	/* FPU fast, DTE cache, Mem bypass */
-	setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38);
+	setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38);
 	setCx86(CX86_CCR3, ccr3);			/* disable MAPEN */
 	setCx86(CX86_CCR3, ccr3);			/* disable MAPEN */
 
 
 	set_cx86_memwb();
 	set_cx86_memwb();
@@ -291,7 +291,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 		/* GXm supports extended cpuid levels 'ala' AMD */
 		/* GXm supports extended cpuid levels 'ala' AMD */
 		if (c->cpuid_level == 2) {
 		if (c->cpuid_level == 2) {
 			/* Enable cxMMX extensions (GX1 Datasheet 54) */
 			/* Enable cxMMX extensions (GX1 Datasheet 54) */
-			setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1);
+			setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1);
 
 
 			/*
 			/*
 			 * GXm : 0x30 ... 0x5f GXm  datasheet 51
 			 * GXm : 0x30 ... 0x5f GXm  datasheet 51
@@ -314,7 +314,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 		if (dir1 > 7) {
 		if (dir1 > 7) {
 			dir0_msn++;  /* M II */
 			dir0_msn++;  /* M II */
 			/* Enable MMX extensions (App note 108) */
 			/* Enable MMX extensions (App note 108) */
-			setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
+			setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1);
 		} else {
 		} else {
 			c->coma_bug = 1;      /* 6x86MX, it has the bug. */
 			c->coma_bug = 1;      /* 6x86MX, it has the bug. */
 		}
 		}
@@ -429,7 +429,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
 			local_irq_save(flags);
 			local_irq_save(flags);
 			ccr3 = getCx86(CX86_CCR3);
 			ccr3 = getCx86(CX86_CCR3);
 			setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);       /* enable MAPEN  */
 			setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);       /* enable MAPEN  */
-			setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80);  /* enable cpuid  */
+			setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80);  /* enable cpuid  */
 			setCx86(CX86_CCR3, ccr3);                       /* disable MAPEN */
 			setCx86(CX86_CCR3, ccr3);                       /* disable MAPEN */
 			local_irq_restore(flags);
 			local_irq_restore(flags);
 		}
 		}

+ 0 - 84
arch/x86/kernel/cpu/feature_names.c

@@ -1,84 +0,0 @@
-/*
- * Strings for the various x86 capability flags.
- *
- * This file must not contain any executable code.
- */
-
-#include <asm/cpufeature.h>
-
-/*
- * These flag bits must match the definitions in <asm/cpufeature.h>.
- * NULL means this bit is undefined or reserved; either way it doesn't
- * have meaning as far as Linux is concerned.  Note that it's important
- * to realize there is a difference between this table and CPUID -- if
- * applications want to get the raw CPUID data, they should access
- * /dev/cpu/<cpu_nr>/cpuid instead.
- */
-const char * const x86_cap_flags[NCAPINTS*32] = {
-	/* Intel-defined */
-	"fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
-	"cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
-	"pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
-	"fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
-
-	/* AMD-defined */
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
-	NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
-	"3dnowext", "3dnow",
-
-	/* Transmeta-defined */
-	"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-	/* Other (Linux-defined) */
-	"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
-	NULL, NULL, NULL, NULL,
-	"constant_tsc", "up", NULL, "arch_perfmon",
-	"pebs", "bts", NULL, NULL,
-	"rep_good", NULL, NULL, NULL,
-	"nopl", NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-	/* Intel-defined (#2) */
-	"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
-	"tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
-	NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-	/* VIA/Cyrix/Centaur-defined */
-	NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
-	"ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-	/* AMD-defined (#2) */
-	"lahf_lm", "cmp_legacy", "svm", "extapic",
-	"cr8_legacy", "abm", "sse4a", "misalignsse",
-	"3dnowprefetch", "osvw", "ibs", "sse5",
-	"skinit", "wdt", NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-
-	/* Auxiliary (Linux-defined) */
-	"ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-};
-
-const char *const x86_power_flags[32] = {
-	"ts",	/* temperature sensor */
-	"fid",  /* frequency id control */
-	"vid",  /* voltage id control */
-	"ttp",  /* thermal trip */
-	"tm",
-	"stc",
-	"100mhzsteps",
-	"hwpstate",
-	"",	/* tsc invariant mapped to constant_tsc */
-		/* nothing */
-};

+ 0 - 71
arch/x86/kernel/cpu/intel.c

@@ -23,13 +23,6 @@
 #include <mach_apic.h>
 #include <mach_apic.h>
 #endif
 #endif
 
 
-#ifdef CONFIG_X86_INTEL_USERCOPY
-/*
- * Alignment at which movsl is preferred for bulk memory copies.
- */
-struct movsl_mask movsl_mask __read_mostly;
-#endif
-
 static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 {
 {
 	/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
 	/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
@@ -314,69 +307,5 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = {
 
 
 cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev);
 cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev);
 
 
-#ifndef CONFIG_X86_CMPXCHG
-unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
-{
-	u8 prev;
-	unsigned long flags;
-
-	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
-	local_irq_save(flags);
-	prev = *(u8 *)ptr;
-	if (prev == old)
-		*(u8 *)ptr = new;
-	local_irq_restore(flags);
-	return prev;
-}
-EXPORT_SYMBOL(cmpxchg_386_u8);
-
-unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
-{
-	u16 prev;
-	unsigned long flags;
-
-	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
-	local_irq_save(flags);
-	prev = *(u16 *)ptr;
-	if (prev == old)
-		*(u16 *)ptr = new;
-	local_irq_restore(flags);
-	return prev;
-}
-EXPORT_SYMBOL(cmpxchg_386_u16);
-
-unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
-{
-	u32 prev;
-	unsigned long flags;
-
-	/* Poor man's cmpxchg for 386. Unsuitable for SMP */
-	local_irq_save(flags);
-	prev = *(u32 *)ptr;
-	if (prev == old)
-		*(u32 *)ptr = new;
-	local_irq_restore(flags);
-	return prev;
-}
-EXPORT_SYMBOL(cmpxchg_386_u32);
-#endif
-
-#ifndef CONFIG_X86_CMPXCHG64
-unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
-{
-	u64 prev;
-	unsigned long flags;
-
-	/* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
-	local_irq_save(flags);
-	prev = *(u64 *)ptr;
-	if (prev == old)
-		*(u64 *)ptr = new;
-	local_irq_restore(flags);
-	return prev;
-}
-EXPORT_SYMBOL(cmpxchg_486_u64);
-#endif
-
 /* arch_initcall(intel_cpu_init); */
 /* arch_initcall(intel_cpu_init); */
 
 

+ 147 - 22
arch/x86/kernel/cpu/intel_cacheinfo.c

@@ -1,8 +1,8 @@
 /*
 /*
- *      Routines to indentify caches on Intel CPU.
+ *	Routines to indentify caches on Intel CPU.
  *
  *
- *      Changes:
- *      Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
+ *	Changes:
+ *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
  *		Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
  *		Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
  */
  */
@@ -13,6 +13,7 @@
 #include <linux/compiler.h>
 #include <linux/compiler.h>
 #include <linux/cpu.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
+#include <linux/pci.h>
 
 
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/smp.h>
 #include <asm/smp.h>
@@ -130,9 +131,18 @@ struct _cpuid4_info {
 	union _cpuid4_leaf_ebx ebx;
 	union _cpuid4_leaf_ebx ebx;
 	union _cpuid4_leaf_ecx ecx;
 	union _cpuid4_leaf_ecx ecx;
 	unsigned long size;
 	unsigned long size;
+	unsigned long can_disable;
 	cpumask_t shared_cpu_map;	/* future?: only cpus/node is needed */
 	cpumask_t shared_cpu_map;	/* future?: only cpus/node is needed */
 };
 };
 
 
+#ifdef CONFIG_PCI
+static struct pci_device_id k8_nb_id[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
+	{}
+};
+#endif
+
 unsigned short			num_cache_leaves;
 unsigned short			num_cache_leaves;
 
 
 /* AMD doesn't have CPUID4. Emulate it here to report the same
 /* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -182,9 +192,10 @@ static unsigned short assocs[] __cpuinitdata = {
 static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
 static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
 static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };
 static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };
 
 
-static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
-		       union _cpuid4_leaf_ebx *ebx,
-		       union _cpuid4_leaf_ecx *ecx)
+static void __cpuinit
+amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
+		     union _cpuid4_leaf_ebx *ebx,
+		     union _cpuid4_leaf_ecx *ecx)
 {
 {
 	unsigned dummy;
 	unsigned dummy;
 	unsigned line_size, lines_per_tag, assoc, size_in_kb;
 	unsigned line_size, lines_per_tag, assoc, size_in_kb;
@@ -251,27 +262,40 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 		(ebx->split.ways_of_associativity + 1) - 1;
 		(ebx->split.ways_of_associativity + 1) - 1;
 }
 }
 
 
-static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
+static void __cpuinit
+amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
+{
+	if (index < 3)
+		return;
+	this_leaf->can_disable = 1;
+}
+
+static int
+__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
 {
 {
 	union _cpuid4_leaf_eax 	eax;
 	union _cpuid4_leaf_eax 	eax;
 	union _cpuid4_leaf_ebx 	ebx;
 	union _cpuid4_leaf_ebx 	ebx;
 	union _cpuid4_leaf_ecx 	ecx;
 	union _cpuid4_leaf_ecx 	ecx;
 	unsigned		edx;
 	unsigned		edx;
 
 
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
 		amd_cpuid4(index, &eax, &ebx, &ecx);
 		amd_cpuid4(index, &eax, &ebx, &ecx);
-	else
-		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full,  &edx);
+		if (boot_cpu_data.x86 >= 0x10)
+			amd_check_l3_disable(index, this_leaf);
+	} else {
+		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
+	}
+
 	if (eax.split.type == CACHE_TYPE_NULL)
 	if (eax.split.type == CACHE_TYPE_NULL)
 		return -EIO; /* better error ? */
 		return -EIO; /* better error ? */
 
 
 	this_leaf->eax = eax;
 	this_leaf->eax = eax;
 	this_leaf->ebx = ebx;
 	this_leaf->ebx = ebx;
 	this_leaf->ecx = ecx;
 	this_leaf->ecx = ecx;
-	this_leaf->size = (ecx.split.number_of_sets + 1) *
-		(ebx.split.coherency_line_size + 1) *
-		(ebx.split.physical_line_partition + 1) *
-		(ebx.split.ways_of_associativity + 1);
+	this_leaf->size = (ecx.split.number_of_sets          + 1) *
+			  (ebx.split.coherency_line_size     + 1) *
+			  (ebx.split.physical_line_partition + 1) *
+			  (ebx.split.ways_of_associativity   + 1);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -453,7 +477,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
 
 
 /* pointer to _cpuid4_info array (for each cache leaf) */
 /* pointer to _cpuid4_info array (for each cache leaf) */
 static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
 static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
-#define CPUID4_INFO_IDX(x, y)    (&((per_cpu(cpuid4_info, x))[y]))
+#define CPUID4_INFO_IDX(x, y)	(&((per_cpu(cpuid4_info, x))[y]))
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
@@ -490,7 +514,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
 
 
 	this_leaf = CPUID4_INFO_IDX(cpu, index);
 	this_leaf = CPUID4_INFO_IDX(cpu, index);
 	for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
 	for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
-		sibling_leaf = CPUID4_INFO_IDX(sibling, index);	
+		sibling_leaf = CPUID4_INFO_IDX(sibling, index);
 		cpu_clear(cpu, sibling_leaf->shared_cpu_map);
 		cpu_clear(cpu, sibling_leaf->shared_cpu_map);
 	}
 	}
 }
 }
@@ -572,7 +596,7 @@ struct _index_kobject {
 
 
 /* pointer to array of kobjects for cpuX/cache/indexY */
 /* pointer to array of kobjects for cpuX/cache/indexY */
 static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
 static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
-#define INDEX_KOBJECT_PTR(x, y)    (&((per_cpu(index_kobject, x))[y]))
+#define INDEX_KOBJECT_PTR(x, y)		(&((per_cpu(index_kobject, x))[y]))
 
 
 #define show_one_plus(file_name, object, val)				\
 #define show_one_plus(file_name, object, val)				\
 static ssize_t show_##file_name						\
 static ssize_t show_##file_name						\
@@ -637,6 +661,99 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
 	}
 	}
 }
 }
 
 
+#define to_object(k)	container_of(k, struct _index_kobject, kobj)
+#define to_attr(a)	container_of(a, struct _cache_attr, attr)
+
+#ifdef CONFIG_PCI
+static struct pci_dev *get_k8_northbridge(int node)
+{
+	struct pci_dev *dev = NULL;
+	int i;
+
+	for (i = 0; i <= node; i++) {
+		do {
+			dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
+			if (!dev)
+				break;
+		} while (!pci_match_id(&k8_nb_id[0], dev));
+		if (!dev)
+			break;
+	}
+	return dev;
+}
+#else
+static struct pci_dev *get_k8_northbridge(int node)
+{
+	return NULL;
+}
+#endif
+
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
+{
+	int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+	struct pci_dev *dev = NULL;
+	ssize_t ret = 0;
+	int i;
+
+	if (!this_leaf->can_disable)
+		return sprintf(buf, "Feature not enabled\n");
+
+	dev = get_k8_northbridge(node);
+	if (!dev) {
+		printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < 2; i++) {
+		unsigned int reg;
+
+		pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
+
+		ret += sprintf(buf, "%sEntry: %d\n", buf, i);
+		ret += sprintf(buf, "%sReads:  %s\tNew Entries: %s\n",  
+			buf,
+			reg & 0x80000000 ? "Disabled" : "Allowed",
+			reg & 0x40000000 ? "Disabled" : "Allowed");
+		ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n",
+			buf, (reg & 0x30000) >> 16, reg & 0xfff);
+	}
+	return ret;
+}
+
+static ssize_t
+store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
+		    size_t count)
+{
+	int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
+	struct pci_dev *dev = NULL;
+	unsigned int ret, index, val;
+
+	if (!this_leaf->can_disable)
+		return 0;
+
+	if (strlen(buf) > 15)
+		return -EINVAL;
+
+	ret = sscanf(buf, "%x %x", &index, &val);
+	if (ret != 2)
+		return -EINVAL;
+	if (index > 1)
+		return -EINVAL;
+
+	val |= 0xc0000000;
+	dev = get_k8_northbridge(node);
+	if (!dev) {
+		printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
+		return -EINVAL;
+	}
+
+	pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
+	wbinvd();
+	pci_write_config_dword(dev, 0x1BC + index * 4, val);
+
+	return 1;
+}
+
 struct _cache_attr {
 struct _cache_attr {
 	struct attribute attr;
 	struct attribute attr;
 	ssize_t (*show)(struct _cpuid4_info *, char *);
 	ssize_t (*show)(struct _cpuid4_info *, char *);
@@ -657,6 +774,8 @@ define_one_ro(size);
 define_one_ro(shared_cpu_map);
 define_one_ro(shared_cpu_map);
 define_one_ro(shared_cpu_list);
 define_one_ro(shared_cpu_list);
 
 
+static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable);
+
 static struct attribute * default_attrs[] = {
 static struct attribute * default_attrs[] = {
 	&type.attr,
 	&type.attr,
 	&level.attr,
 	&level.attr,
@@ -667,12 +786,10 @@ static struct attribute * default_attrs[] = {
 	&size.attr,
 	&size.attr,
 	&shared_cpu_map.attr,
 	&shared_cpu_map.attr,
 	&shared_cpu_list.attr,
 	&shared_cpu_list.attr,
+	&cache_disable.attr,
 	NULL
 	NULL
 };
 };
 
 
-#define to_object(k) container_of(k, struct _index_kobject, kobj)
-#define to_attr(a) container_of(a, struct _cache_attr, attr)
-
 static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
 static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
 {
 {
 	struct _cache_attr *fattr = to_attr(attr);
 	struct _cache_attr *fattr = to_attr(attr);
@@ -682,14 +799,22 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
 	ret = fattr->show ?
 	ret = fattr->show ?
 		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
 		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
 			buf) :
 			buf) :
-	       	0;
+		0;
 	return ret;
 	return ret;
 }
 }
 
 
 static ssize_t store(struct kobject * kobj, struct attribute * attr,
 static ssize_t store(struct kobject * kobj, struct attribute * attr,
 		     const char * buf, size_t count)
 		     const char * buf, size_t count)
 {
 {
-	return 0;
+	struct _cache_attr *fattr = to_attr(attr);
+	struct _index_kobject *this_leaf = to_object(kobj);
+	ssize_t ret;
+
+	ret = fattr->store ?
+		fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
+			buf, count) :
+		0;
+	return ret;
 }
 }
 
 
 static struct sysfs_ops sysfs_ops = {
 static struct sysfs_ops sysfs_ops = {

+ 32 - 0
arch/x86/kernel/cpu/mkcapflags.pl

@@ -0,0 +1,32 @@
+#!/usr/bin/perl
+#
+# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h
+#
+
+($in, $out) = @ARGV;
+
+open(IN, "< $in\0")   or die "$0: cannot open: $in: $!\n";
+open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
+
+print OUT "#include <asm/cpufeature.h>\n\n";
+print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
+
+while (defined($line = <IN>)) {
+	if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) {
+		$macro = $1;
+		$feature = $2;
+		$tail = $3;
+		if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) {
+			$feature = $1;
+		}
+
+		if ($feature ne '') {
+			printf OUT "\t%-32s = \"%s\",\n",
+				"[$macro]", "\L$feature";
+		}
+	}
+}
+print OUT "};\n";
+
+close(IN);
+close(OUT);

+ 1 - 1
arch/x86/kernel/cpu/mtrr/main.c

@@ -729,7 +729,7 @@ struct var_mtrr_range_state {
 	mtrr_type type;
 	mtrr_type type;
 };
 };
 
 
-struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
 static int __initdata debug_print;
 static int __initdata debug_print;
 
 
 static int __init
 static int __init

+ 20 - 0
arch/x86/kernel/cpu/powerflags.c

@@ -0,0 +1,20 @@
+/*
+ * Strings for the various x86 power flags
+ *
+ * This file must not contain any executable code.
+ */
+
+#include <asm/cpufeature.h>
+
+const char *const x86_power_flags[32] = {
+	"ts",	/* temperature sensor */
+	"fid",  /* frequency id control */
+	"vid",  /* voltage id control */
+	"ttp",  /* thermal trip */
+	"tm",
+	"stc",
+	"100mhzsteps",
+	"hwpstate",
+	"",	/* tsc invariant mapped to constant_tsc */
+		/* nothing */
+};

+ 0 - 1
arch/x86/kernel/cpuid.c

@@ -36,7 +36,6 @@
 #include <linux/smp_lock.h>
 #include <linux/smp_lock.h>
 #include <linux/major.h>
 #include <linux/major.h>
 #include <linux/fs.h>
 #include <linux/fs.h>
-#include <linux/smp_lock.h>
 #include <linux/device.h>
 #include <linux/device.h>
 #include <linux/cpu.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/notifier.h>

+ 3 - 4
arch/x86/kernel/crash_dump_64.c

@@ -7,9 +7,8 @@
 
 
 #include <linux/errno.h>
 #include <linux/errno.h>
 #include <linux/crash_dump.h>
 #include <linux/crash_dump.h>
-
-#include <asm/uaccess.h>
-#include <asm/io.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
 
 
 /**
 /**
  * copy_oldmem_page - copy one page from "oldmem"
  * copy_oldmem_page - copy one page from "oldmem"
@@ -25,7 +24,7 @@
  * in the current kernel. We stitch up a pte, similar to kmap_atomic.
  * in the current kernel. We stitch up a pte, similar to kmap_atomic.
  */
  */
 ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
 ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
-                               size_t csize, unsigned long offset, int userbuf)
+		size_t csize, unsigned long offset, int userbuf)
 {
 {
 	void  *vaddr;
 	void  *vaddr;
 
 

+ 32 - 56
arch/x86/kernel/genapic_64.c

@@ -16,87 +16,63 @@
 #include <linux/ctype.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/hardirq.h>
 #include <linux/hardirq.h>
+#include <linux/dmar.h>
 
 
 #include <asm/smp.h>
 #include <asm/smp.h>
 #include <asm/ipi.h>
 #include <asm/ipi.h>
 #include <asm/genapic.h>
 #include <asm/genapic.h>
 
 
-#ifdef CONFIG_ACPI
-#include <acpi/acpi_bus.h>
-#endif
-
-DEFINE_PER_CPU(int, x2apic_extra_bits);
+extern struct genapic apic_flat;
+extern struct genapic apic_physflat;
+extern struct genapic apic_x2xpic_uv_x;
+extern struct genapic apic_x2apic_phys;
+extern struct genapic apic_x2apic_cluster;
 
 
 struct genapic __read_mostly *genapic = &apic_flat;
 struct genapic __read_mostly *genapic = &apic_flat;
 
 
-static enum uv_system_type uv_system_type;
+static struct genapic *apic_probe[] __initdata = {
+	&apic_x2apic_uv_x,
+	&apic_x2apic_phys,
+	&apic_x2apic_cluster,
+	&apic_physflat,
+	NULL,
+};
 
 
 /*
 /*
  * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
  * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
  */
  */
 void __init setup_apic_routing(void)
 void __init setup_apic_routing(void)
 {
 {
-	if (uv_system_type == UV_NON_UNIQUE_APIC)
-		genapic = &apic_x2apic_uv_x;
-	else
-#ifdef CONFIG_ACPI
-	/*
-	 * Quirk: some x86_64 machines can only use physical APIC mode
-	 * regardless of how many processors are present (x86_64 ES7000
-	 * is an example).
-	 */
-	if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
-			(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
-		genapic = &apic_physflat;
-	else
-#endif
-
-	if (max_physical_apicid < 8)
-		genapic = &apic_flat;
-	else
-		genapic = &apic_physflat;
+	if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) {
+		if (!intr_remapping_enabled)
+			genapic = &apic_flat;
+	}
 
 
-	printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+	if (genapic == &apic_flat) {
+		if (max_physical_apicid >= 8)
+			genapic = &apic_physflat;
+		printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+	}
 }
 }
 
 
 /* Same for both flat and physical. */
 /* Same for both flat and physical. */
 
 
-void send_IPI_self(int vector)
+void apic_send_IPI_self(int vector)
 {
 {
 	__send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
 	__send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
 }
 }
 
 
 int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
 {
-	if (!strcmp(oem_id, "SGI")) {
-		if (!strcmp(oem_table_id, "UVL"))
-			uv_system_type = UV_LEGACY_APIC;
-		else if (!strcmp(oem_table_id, "UVX"))
-			uv_system_type = UV_X2APIC;
-		else if (!strcmp(oem_table_id, "UVH"))
-			uv_system_type = UV_NON_UNIQUE_APIC;
+	int i;
+
+	for (i = 0; apic_probe[i]; ++i) {
+		if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
+			genapic = apic_probe[i];
+			printk(KERN_INFO "Setting APIC routing to %s.\n",
+				genapic->name);
+			return 1;
+		}
 	}
 	}
 	return 0;
 	return 0;
 }
 }
-
-unsigned int read_apic_id(void)
-{
-	unsigned int id;
-
-	WARN_ON(preemptible() && num_online_cpus() > 1);
-	id = apic_read(APIC_ID);
-	if (uv_system_type >= UV_X2APIC)
-		id  |= __get_cpu_var(x2apic_extra_bits);
-	return id;
-}
-
-enum uv_system_type get_uv_system_type(void)
-{
-	return uv_system_type;
-}
-
-int is_uv_system(void)
-{
-	return uv_system_type != UV_NONE;
-}
-EXPORT_SYMBOL_GPL(is_uv_system);

+ 61 - 1
arch/x86/kernel/genapic_flat_64.c

@@ -15,9 +15,20 @@
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/ctype.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/init.h>
+#include <linux/hardirq.h>
 #include <asm/smp.h>
 #include <asm/smp.h>
 #include <asm/ipi.h>
 #include <asm/ipi.h>
 #include <asm/genapic.h>
 #include <asm/genapic.h>
+#include <mach_apicdef.h>
+
+#ifdef CONFIG_ACPI
+#include <acpi/acpi_bus.h>
+#endif
+
+static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	return 1;
+}
 
 
 static cpumask_t flat_target_cpus(void)
 static cpumask_t flat_target_cpus(void)
 {
 {
@@ -95,9 +106,33 @@ static void flat_send_IPI_all(int vector)
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
 }
 }
 
 
+static unsigned int get_apic_id(unsigned long x)
+{
+	unsigned int id;
+
+	id = (((x)>>24) & 0xFFu);
+	return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	x = ((id & 0xFFu)<<24);
+	return x;
+}
+
+static unsigned int read_xapic_id(void)
+{
+	unsigned int id;
+
+	id = get_apic_id(apic_read(APIC_ID));
+	return id;
+}
+
 static int flat_apic_id_registered(void)
 static int flat_apic_id_registered(void)
 {
 {
-	return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map);
+	return physid_isset(read_xapic_id(), phys_cpu_present_map);
 }
 }
 
 
 static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
 static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -112,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb)
 
 
 struct genapic apic_flat =  {
 struct genapic apic_flat =  {
 	.name = "flat",
 	.name = "flat",
+	.acpi_madt_oem_check = flat_acpi_madt_oem_check,
 	.int_delivery_mode = dest_LowestPrio,
 	.int_delivery_mode = dest_LowestPrio,
 	.int_dest_mode = (APIC_DEST_LOGICAL != 0),
 	.int_dest_mode = (APIC_DEST_LOGICAL != 0),
 	.target_cpus = flat_target_cpus,
 	.target_cpus = flat_target_cpus,
@@ -121,8 +157,12 @@ struct genapic apic_flat =  {
 	.send_IPI_all = flat_send_IPI_all,
 	.send_IPI_all = flat_send_IPI_all,
 	.send_IPI_allbutself = flat_send_IPI_allbutself,
 	.send_IPI_allbutself = flat_send_IPI_allbutself,
 	.send_IPI_mask = flat_send_IPI_mask,
 	.send_IPI_mask = flat_send_IPI_mask,
+	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
 	.phys_pkg_id = phys_pkg_id,
+	.get_apic_id = get_apic_id,
+	.set_apic_id = set_apic_id,
+	.apic_id_mask = (0xFFu<<24),
 };
 };
 
 
 /*
 /*
@@ -130,6 +170,21 @@ struct genapic apic_flat =  {
  * We cannot use logical delivery in this case because the mask
  * We cannot use logical delivery in this case because the mask
  * overflows, so use physical mode.
  * overflows, so use physical mode.
  */
  */
+static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+#ifdef CONFIG_ACPI
+	/*
+	 * Quirk: some x86_64 machines can only use physical APIC mode
+	 * regardless of how many processors are present (x86_64 ES7000
+	 * is an example).
+	 */
+	if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
+		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
+		return 1;
+#endif
+
+	return 0;
+}
 
 
 static cpumask_t physflat_target_cpus(void)
 static cpumask_t physflat_target_cpus(void)
 {
 {
@@ -176,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
 
 
 struct genapic apic_physflat =  {
 struct genapic apic_physflat =  {
 	.name = "physical flat",
 	.name = "physical flat",
+	.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
 	.int_delivery_mode = dest_Fixed,
 	.int_delivery_mode = dest_Fixed,
 	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
 	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
 	.target_cpus = physflat_target_cpus,
 	.target_cpus = physflat_target_cpus,
@@ -185,6 +241,10 @@ struct genapic apic_physflat =  {
 	.send_IPI_all = physflat_send_IPI_all,
 	.send_IPI_all = physflat_send_IPI_all,
 	.send_IPI_allbutself = physflat_send_IPI_allbutself,
 	.send_IPI_allbutself = physflat_send_IPI_allbutself,
 	.send_IPI_mask = physflat_send_IPI_mask,
 	.send_IPI_mask = physflat_send_IPI_mask,
+	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
 	.phys_pkg_id = phys_pkg_id,
+	.get_apic_id = get_apic_id,
+	.set_apic_id = set_apic_id,
+	.apic_id_mask = (0xFFu<<24),
 };
 };

+ 164 - 0
arch/x86/kernel/genx2apic_cluster.c

@@ -0,0 +1,164 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm/genapic.h>
+
+DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+
+static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	if (cpu_has_x2apic)
+		return 1;
+
+	return 0;
+}
+
+/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+
+static cpumask_t x2apic_target_cpus(void)
+{
+	return cpumask_of_cpu(0);
+}
+
+/*
+ * for now each logical cpu is in its own vector allocation domain.
+ */
+static cpumask_t x2apic_vector_allocation_domain(int cpu)
+{
+	cpumask_t domain = CPU_MASK_NONE;
+	cpu_set(cpu, domain);
+	return domain;
+}
+
+static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+				   unsigned int dest)
+{
+	unsigned long cfg;
+
+	cfg = __prepare_ICR(0, vector, dest);
+
+	/*
+	 * send the IPI.
+	 */
+	x2apic_icr_write(cfg, apicid);
+}
+
+/*
+ * for now, we send the IPI's one by one in the cpumask.
+ * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
+ * at once. We have 16 cpu's in a cluster. This will minimize IPI register
+ * writes.
+ */
+static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+{
+	unsigned long flags;
+	unsigned long query_cpu;
+
+	local_irq_save(flags);
+	for_each_cpu_mask(query_cpu, mask) {
+		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+				       vector, APIC_DEST_LOGICAL);
+	}
+	local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+	cpumask_t mask = cpu_online_map;
+
+	cpu_clear(smp_processor_id(), mask);
+
+	if (!cpus_empty(mask))
+		x2apic_send_IPI_mask(mask, vector);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+	x2apic_send_IPI_mask(cpu_online_map, vector);
+}
+
+static int x2apic_apic_id_registered(void)
+{
+	return 1;
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	cpu = first_cpu(cpumask);
+	if ((unsigned)cpu < NR_CPUS)
+		return per_cpu(x86_cpu_to_logical_apicid, cpu);
+	else
+		return BAD_APICID;
+}
+
+static unsigned int get_apic_id(unsigned long x)
+{
+	unsigned int id;
+
+	id = x;
+	return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	x = id;
+	return x;
+}
+
+static unsigned int x2apic_read_id(void)
+{
+	return apic_read(APIC_ID);
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+	return x2apic_read_id() >> index_msb;
+}
+
+static void x2apic_send_IPI_self(int vector)
+{
+	apic_write(APIC_SELF_IPI, vector);
+}
+
+static void init_x2apic_ldr(void)
+{
+	int cpu = smp_processor_id();
+
+	per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
+	return;
+}
+
+struct genapic apic_x2apic_cluster = {
+	.name = "cluster x2apic",
+	.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+	.int_delivery_mode = dest_LowestPrio,
+	.int_dest_mode = (APIC_DEST_LOGICAL != 0),
+	.target_cpus = x2apic_target_cpus,
+	.vector_allocation_domain = x2apic_vector_allocation_domain,
+	.apic_id_registered = x2apic_apic_id_registered,
+	.init_apic_ldr = init_x2apic_ldr,
+	.send_IPI_all = x2apic_send_IPI_all,
+	.send_IPI_allbutself = x2apic_send_IPI_allbutself,
+	.send_IPI_mask = x2apic_send_IPI_mask,
+	.send_IPI_self = x2apic_send_IPI_self,
+	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+	.phys_pkg_id = phys_pkg_id,
+	.get_apic_id = get_apic_id,
+	.set_apic_id = set_apic_id,
+	.apic_id_mask = (0xFFFFFFFFu),
+};

+ 159 - 0
arch/x86/kernel/genx2apic_phys.c

@@ -0,0 +1,159 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm/genapic.h>
+
+static int x2apic_phys;
+
+static int set_x2apic_phys_mode(char *arg)
+{
+	x2apic_phys = 1;
+	return 0;
+}
+early_param("x2apic_phys", set_x2apic_phys_mode);
+
+static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	if (cpu_has_x2apic && x2apic_phys)
+		return 1;
+
+	return 0;
+}
+
+/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+
+static cpumask_t x2apic_target_cpus(void)
+{
+	return cpumask_of_cpu(0);
+}
+
+static cpumask_t x2apic_vector_allocation_domain(int cpu)
+{
+	cpumask_t domain = CPU_MASK_NONE;
+	cpu_set(cpu, domain);
+	return domain;
+}
+
+static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+				   unsigned int dest)
+{
+	unsigned long cfg;
+
+	cfg = __prepare_ICR(0, vector, dest);
+
+	/*
+	 * send the IPI.
+	 */
+	x2apic_icr_write(cfg, apicid);
+}
+
+static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
+{
+	unsigned long flags;
+	unsigned long query_cpu;
+
+	local_irq_save(flags);
+	for_each_cpu_mask(query_cpu, mask) {
+		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
+				       vector, APIC_DEST_PHYSICAL);
+	}
+	local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+	cpumask_t mask = cpu_online_map;
+
+	cpu_clear(smp_processor_id(), mask);
+
+	if (!cpus_empty(mask))
+		x2apic_send_IPI_mask(mask, vector);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+	x2apic_send_IPI_mask(cpu_online_map, vector);
+}
+
+static int x2apic_apic_id_registered(void)
+{
+	return 1;
+}
+
+static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	cpu = first_cpu(cpumask);
+	if ((unsigned)cpu < NR_CPUS)
+		return per_cpu(x86_cpu_to_apicid, cpu);
+	else
+		return BAD_APICID;
+}
+
+static unsigned int get_apic_id(unsigned long x)
+{
+	unsigned int id;
+
+	id = x;
+	return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	x = id;
+	return x;
+}
+
+static unsigned int x2apic_read_id(void)
+{
+	return apic_read(APIC_ID);
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+	return x2apic_read_id() >> index_msb;
+}
+
+void x2apic_send_IPI_self(int vector)
+{
+	apic_write(APIC_SELF_IPI, vector);
+}
+
+void init_x2apic_ldr(void)
+{
+	return;
+}
+
+struct genapic apic_x2apic_phys = {
+	.name = "physical x2apic",
+	.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+	.int_delivery_mode = dest_Fixed,
+	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
+	.target_cpus = x2apic_target_cpus,
+	.vector_allocation_domain = x2apic_vector_allocation_domain,
+	.apic_id_registered = x2apic_apic_id_registered,
+	.init_apic_ldr = init_x2apic_ldr,
+	.send_IPI_all = x2apic_send_IPI_all,
+	.send_IPI_allbutself = x2apic_send_IPI_allbutself,
+	.send_IPI_mask = x2apic_send_IPI_mask,
+	.send_IPI_self = x2apic_send_IPI_self,
+	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+	.phys_pkg_id = phys_pkg_id,
+	.get_apic_id = get_apic_id,
+	.set_apic_id = set_apic_id,
+	.apic_id_mask = (0xFFFFFFFFu),
+};

+ 67 - 2
arch/x86/kernel/genx2apic_uv_x.c

@@ -12,12 +12,12 @@
 #include <linux/threads.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/cpumask.h>
 #include <linux/string.h>
 #include <linux/string.h>
-#include <linux/kernel.h>
 #include <linux/ctype.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <linux/bootmem.h>
 #include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/module.h>
+#include <linux/hardirq.h>
 #include <asm/smp.h>
 #include <asm/smp.h>
 #include <asm/ipi.h>
 #include <asm/ipi.h>
 #include <asm/genapic.h>
 #include <asm/genapic.h>
@@ -26,6 +26,35 @@
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/bios.h>
 #include <asm/uv/bios.h>
 
 
+DEFINE_PER_CPU(int, x2apic_extra_bits);
+
+static enum uv_system_type uv_system_type;
+
+static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	if (!strcmp(oem_id, "SGI")) {
+		if (!strcmp(oem_table_id, "UVL"))
+			uv_system_type = UV_LEGACY_APIC;
+		else if (!strcmp(oem_table_id, "UVX"))
+			uv_system_type = UV_X2APIC;
+		else if (!strcmp(oem_table_id, "UVH")) {
+			uv_system_type = UV_NON_UNIQUE_APIC;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+enum uv_system_type get_uv_system_type(void)
+{
+	return uv_system_type;
+}
+
+int is_uv_system(void)
+{
+	return uv_system_type != UV_NONE;
+}
+
 DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
 DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
 EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
 EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
 
 
@@ -123,6 +152,10 @@ static int uv_apic_id_registered(void)
 	return 1;
 	return 1;
 }
 }
 
 
+static void uv_init_apic_ldr(void)
+{
+}
+
 static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
 static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
 {
 {
 	int cpu;
 	int cpu;
@@ -138,9 +171,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
 		return BAD_APICID;
 		return BAD_APICID;
 }
 }
 
 
+static unsigned int get_apic_id(unsigned long x)
+{
+	unsigned int id;
+
+	WARN_ON(preemptible() && num_online_cpus() > 1);
+	id = x | __get_cpu_var(x2apic_extra_bits);
+
+	return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	/* maskout x2apic_extra_bits ? */
+	x = id;
+	return x;
+}
+
+static unsigned int uv_read_apic_id(void)
+{
+
+	return get_apic_id(apic_read(APIC_ID));
+}
+
 static unsigned int phys_pkg_id(int index_msb)
 static unsigned int phys_pkg_id(int index_msb)
 {
 {
-	return GET_APIC_ID(read_apic_id()) >> index_msb;
+	return uv_read_apic_id() >> index_msb;
 }
 }
 
 
 #ifdef ZZZ		/* Needs x2apic patch */
 #ifdef ZZZ		/* Needs x2apic patch */
@@ -152,17 +210,22 @@ static void uv_send_IPI_self(int vector)
 
 
 struct genapic apic_x2apic_uv_x = {
 struct genapic apic_x2apic_uv_x = {
 	.name = "UV large system",
 	.name = "UV large system",
+	.acpi_madt_oem_check = uv_acpi_madt_oem_check,
 	.int_delivery_mode = dest_Fixed,
 	.int_delivery_mode = dest_Fixed,
 	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
 	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
 	.target_cpus = uv_target_cpus,
 	.target_cpus = uv_target_cpus,
 	.vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
 	.vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
 	.apic_id_registered = uv_apic_id_registered,
 	.apic_id_registered = uv_apic_id_registered,
+	.init_apic_ldr = uv_init_apic_ldr,
 	.send_IPI_all = uv_send_IPI_all,
 	.send_IPI_all = uv_send_IPI_all,
 	.send_IPI_allbutself = uv_send_IPI_allbutself,
 	.send_IPI_allbutself = uv_send_IPI_allbutself,
 	.send_IPI_mask = uv_send_IPI_mask,
 	.send_IPI_mask = uv_send_IPI_mask,
 	/* ZZZ.send_IPI_self = uv_send_IPI_self, */
 	/* ZZZ.send_IPI_self = uv_send_IPI_self, */
 	.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
 	.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,	/* Fixme ZZZ */
 	.phys_pkg_id = phys_pkg_id,	/* Fixme ZZZ */
+	.get_apic_id = get_apic_id,
+	.set_apic_id = set_apic_id,
+	.apic_id_mask = (0xFFFFFFFFu),
 };
 };
 
 
 static __cpuinit void set_x2apic_extra_bits(int pnode)
 static __cpuinit void set_x2apic_extra_bits(int pnode)
@@ -401,3 +464,5 @@ void __cpuinit uv_cpu_init(void)
 	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
 	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
 		set_x2apic_extra_bits(uv_hub_info->pnode);
 		set_x2apic_extra_bits(uv_hub_info->pnode);
 }
 }
+
+

+ 135 - 19
arch/x86/kernel/i387.c

@@ -21,9 +21,12 @@
 # include <asm/sigcontext32.h>
 # include <asm/sigcontext32.h>
 # include <asm/user32.h>
 # include <asm/user32.h>
 #else
 #else
-# define save_i387_ia32		save_i387
-# define restore_i387_ia32	restore_i387
+# define save_i387_xstate_ia32		save_i387_xstate
+# define restore_i387_xstate_ia32	restore_i387_xstate
 # define _fpstate_ia32		_fpstate
 # define _fpstate_ia32		_fpstate
+# define _xstate_ia32		_xstate
+# define sig_xstate_ia32_size   sig_xstate_size
+# define fx_sw_reserved_ia32	fx_sw_reserved
 # define user_i387_ia32_struct	user_i387_struct
 # define user_i387_ia32_struct	user_i387_struct
 # define user32_fxsr_struct	user_fxsr_struct
 # define user32_fxsr_struct	user_fxsr_struct
 #endif
 #endif
@@ -36,6 +39,7 @@
 
 
 static unsigned int		mxcsr_feature_mask __read_mostly = 0xffffffffu;
 static unsigned int		mxcsr_feature_mask __read_mostly = 0xffffffffu;
 unsigned int xstate_size;
 unsigned int xstate_size;
+unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
 static struct i387_fxsave_struct fx_scratch __cpuinitdata;
 static struct i387_fxsave_struct fx_scratch __cpuinitdata;
 
 
 void __cpuinit mxcsr_feature_mask_init(void)
 void __cpuinit mxcsr_feature_mask_init(void)
@@ -61,6 +65,11 @@ void __init init_thread_xstate(void)
 		return;
 		return;
 	}
 	}
 
 
+	if (cpu_has_xsave) {
+		xsave_cntxt_init();
+		return;
+	}
+
 	if (cpu_has_fxsr)
 	if (cpu_has_fxsr)
 		xstate_size = sizeof(struct i387_fxsave_struct);
 		xstate_size = sizeof(struct i387_fxsave_struct);
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
@@ -83,9 +92,19 @@ void __cpuinit fpu_init(void)
 
 
 	write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
 	write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
 
 
+	/*
+	 * Boot processor to setup the FP and extended state context info.
+	 */
+	if (!smp_processor_id())
+		init_thread_xstate();
+	xsave_init();
+
 	mxcsr_feature_mask_init();
 	mxcsr_feature_mask_init();
 	/* clean state in init */
 	/* clean state in init */
-	current_thread_info()->status = 0;
+	if (cpu_has_xsave)
+		current_thread_info()->status = TS_XSAVE;
+	else
+		current_thread_info()->status = 0;
 	clear_used_math();
 	clear_used_math();
 }
 }
 #endif	/* CONFIG_X86_64 */
 #endif	/* CONFIG_X86_64 */
@@ -195,6 +214,13 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
 	 */
 	 */
 	target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
 	target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
 
 
+	/*
+	 * update the header bits in the xsave header, indicating the
+	 * presence of FP and SSE state.
+	 */
+	if (cpu_has_xsave)
+		target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
+
 	return ret;
 	return ret;
 }
 }
 
 
@@ -395,6 +421,12 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	if (!ret)
 	if (!ret)
 		convert_to_fxsr(target, &env);
 		convert_to_fxsr(target, &env);
 
 
+	/*
+	 * update the header bit in the xsave header, indicating the
+	 * presence of FP.
+	 */
+	if (cpu_has_xsave)
+		target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
 	return ret;
 	return ret;
 }
 }
 
 
@@ -407,7 +439,6 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
 	struct task_struct *tsk = current;
 	struct task_struct *tsk = current;
 	struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
 	struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
 
 
-	unlazy_fpu(tsk);
 	fp->status = fp->swd;
 	fp->status = fp->swd;
 	if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
 	if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
 		return -1;
 		return -1;
@@ -421,8 +452,6 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
 	struct user_i387_ia32_struct env;
 	struct user_i387_ia32_struct env;
 	int err = 0;
 	int err = 0;
 
 
-	unlazy_fpu(tsk);
-
 	convert_from_fxsr(&env, tsk);
 	convert_from_fxsr(&env, tsk);
 	if (__copy_to_user(buf, &env, sizeof(env)))
 	if (__copy_to_user(buf, &env, sizeof(env)))
 		return -1;
 		return -1;
@@ -432,16 +461,40 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
 	if (err)
 	if (err)
 		return -1;
 		return -1;
 
 
-	if (__copy_to_user(&buf->_fxsr_env[0], fx,
-			   sizeof(struct i387_fxsave_struct)))
+	if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
 		return -1;
 		return -1;
 	return 1;
 	return 1;
 }
 }
 
 
-int save_i387_ia32(struct _fpstate_ia32 __user *buf)
+static int save_i387_xsave(void __user *buf)
+{
+	struct _fpstate_ia32 __user *fx = buf;
+	int err = 0;
+
+	if (save_i387_fxsave(fx) < 0)
+		return -1;
+
+	err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
+			     sizeof(struct _fpx_sw_bytes));
+	err |= __put_user(FP_XSTATE_MAGIC2,
+			  (__u32 __user *) (buf + sig_xstate_ia32_size
+					    - FP_XSTATE_MAGIC2_SIZE));
+	if (err)
+		return -1;
+
+	return 1;
+}
+
+int save_i387_xstate_ia32(void __user *buf)
 {
 {
+	struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
+	struct task_struct *tsk = current;
+
 	if (!used_math())
 	if (!used_math())
 		return 0;
 		return 0;
+
+	if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
+		return -EACCES;
 	/*
 	/*
 	 * This will cause a "finit" to be triggered by the next
 	 * This will cause a "finit" to be triggered by the next
 	 * attempted FPU operation by the 'current' process.
 	 * attempted FPU operation by the 'current' process.
@@ -451,13 +504,17 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf)
 	if (!HAVE_HWFP) {
 	if (!HAVE_HWFP) {
 		return fpregs_soft_get(current, NULL,
 		return fpregs_soft_get(current, NULL,
 				       0, sizeof(struct user_i387_ia32_struct),
 				       0, sizeof(struct user_i387_ia32_struct),
-				       NULL, buf) ? -1 : 1;
+				       NULL, fp) ? -1 : 1;
 	}
 	}
 
 
+	unlazy_fpu(tsk);
+
+	if (cpu_has_xsave)
+		return save_i387_xsave(fp);
 	if (cpu_has_fxsr)
 	if (cpu_has_fxsr)
-		return save_i387_fxsave(buf);
+		return save_i387_fxsave(fp);
 	else
 	else
-		return save_i387_fsave(buf);
+		return save_i387_fsave(fp);
 }
 }
 
 
 static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
 static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
@@ -468,14 +525,15 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
 				sizeof(struct i387_fsave_struct));
 				sizeof(struct i387_fsave_struct));
 }
 }
 
 
-static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
+static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
+			       unsigned int size)
 {
 {
 	struct task_struct *tsk = current;
 	struct task_struct *tsk = current;
 	struct user_i387_ia32_struct env;
 	struct user_i387_ia32_struct env;
 	int err;
 	int err;
 
 
 	err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
 	err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
-			       sizeof(struct i387_fxsave_struct));
+			       size);
 	/* mxcsr reserved bits must be masked to zero for security reasons */
 	/* mxcsr reserved bits must be masked to zero for security reasons */
 	tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
 	tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
 	if (err || __copy_from_user(&env, buf, sizeof(env)))
 	if (err || __copy_from_user(&env, buf, sizeof(env)))
@@ -485,14 +543,69 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
 	return 0;
 	return 0;
 }
 }
 
 
-int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
+static int restore_i387_xsave(void __user *buf)
+{
+	struct _fpx_sw_bytes fx_sw_user;
+	struct _fpstate_ia32 __user *fx_user =
+			((struct _fpstate_ia32 __user *) buf);
+	struct i387_fxsave_struct __user *fx =
+		(struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
+	struct xsave_hdr_struct *xsave_hdr =
+				&current->thread.xstate->xsave.xsave_hdr;
+	u64 mask;
+	int err;
+
+	if (check_for_xstate(fx, buf, &fx_sw_user))
+		goto fx_only;
+
+	mask = fx_sw_user.xstate_bv;
+
+	err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
+
+	xsave_hdr->xstate_bv &= pcntxt_mask;
+	/*
+	 * These bits must be zero.
+	 */
+	xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
+
+	/*
+	 * Init the state that is not present in the memory layout
+	 * and enabled by the OS.
+	 */
+	mask = ~(pcntxt_mask & ~mask);
+	xsave_hdr->xstate_bv &= mask;
+
+	return err;
+fx_only:
+	/*
+	 * Couldn't find the extended state information in the memory
+	 * layout. Restore the FP/SSE and init the other extended state
+	 * enabled by the OS.
+	 */
+	xsave_hdr->xstate_bv = XSTATE_FPSSE;
+	return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
+}
+
+int restore_i387_xstate_ia32(void __user *buf)
 {
 {
 	int err;
 	int err;
 	struct task_struct *tsk = current;
 	struct task_struct *tsk = current;
+	struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
 
 
 	if (HAVE_HWFP)
 	if (HAVE_HWFP)
 		clear_fpu(tsk);
 		clear_fpu(tsk);
 
 
+	if (!buf) {
+		if (used_math()) {
+			clear_fpu(tsk);
+			clear_used_math();
+		}
+
+		return 0;
+	} else
+		if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
+			return -EACCES;
+
 	if (!used_math()) {
 	if (!used_math()) {
 		err = init_fpu(tsk);
 		err = init_fpu(tsk);
 		if (err)
 		if (err)
@@ -500,14 +613,17 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
 	}
 	}
 
 
 	if (HAVE_HWFP) {
 	if (HAVE_HWFP) {
-		if (cpu_has_fxsr)
-			err = restore_i387_fxsave(buf);
+		if (cpu_has_xsave)
+			err = restore_i387_xsave(buf);
+		else if (cpu_has_fxsr)
+			err = restore_i387_fxsave(fp, sizeof(struct
+							   i387_fxsave_struct));
 		else
 		else
-			err = restore_i387_fsave(buf);
+			err = restore_i387_fsave(fp);
 	} else {
 	} else {
 		err = fpregs_soft_set(current, NULL,
 		err = fpregs_soft_set(current, NULL,
 				      0, sizeof(struct user_i387_ia32_struct),
 				      0, sizeof(struct user_i387_ia32_struct),
-				      NULL, buf) != 0;
+				      NULL, fp) != 0;
 	}
 	}
 	set_used_math();
 	set_used_math();
 
 

+ 24 - 0
arch/x86/kernel/i8259.c

@@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void)
 
 
 device_initcall(i8259A_init_sysfs);
 device_initcall(i8259A_init_sysfs);
 
 
+void mask_8259A(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+
+	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
+	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
+
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void unmask_8259A(void)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&i8259A_lock, flags);
+
+	outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
+	outb(cached_slave_mask, PIC_SLAVE_IMR);	  /* restore slave IRQ mask */
+
+	spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
 void init_8259A(int auto_eoi)
 void init_8259A(int auto_eoi)
 {
 {
 	unsigned long flags;
 	unsigned long flags;

+ 4 - 6
arch/x86/kernel/io_apic_32.c

@@ -46,6 +46,7 @@
 #include <asm/nmi.h>
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
 #include <asm/hypertransport.h>
+#include <asm/setup.h>
 
 
 #include <mach_apic.h>
 #include <mach_apic.h>
 #include <mach_apicdef.h>
 #include <mach_apicdef.h>
@@ -1490,7 +1491,7 @@ void /*__init*/ print_local_APIC(void *dummy)
 		smp_processor_id(), hard_smp_processor_id());
 		smp_processor_id(), hard_smp_processor_id());
 	v = apic_read(APIC_ID);
 	v = apic_read(APIC_ID);
 	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v,
 	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v,
-			GET_APIC_ID(read_apic_id()));
+			GET_APIC_ID(v));
 	v = apic_read(APIC_LVR);
 	v = apic_read(APIC_LVR);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	ver = GET_APIC_VERSION(v);
 	ver = GET_APIC_VERSION(v);
@@ -1698,8 +1699,7 @@ void disable_IO_APIC(void)
 		entry.dest_mode       = 0; /* Physical */
 		entry.dest_mode       = 0; /* Physical */
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.vector          = 0;
 		entry.vector          = 0;
-		entry.dest.physical.physical_dest =
-					GET_APIC_ID(read_apic_id());
+		entry.dest.physical.physical_dest = read_apic_id();
 
 
 		/*
 		/*
 		 * Add it to the IO-APIC irq-routing table:
 		 * Add it to the IO-APIC irq-routing table:
@@ -1725,10 +1725,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
 	unsigned char old_id;
 	unsigned char old_id;
 	unsigned long flags;
 	unsigned long flags;
 
 
-#ifdef CONFIG_X86_NUMAQ
-	if (found_numaq)
+	if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
 		return;
 		return;
-#endif
 
 
 	/*
 	/*
 	 * Don't check I/O APIC IDs for xAPIC systems.  They have
 	 * Don't check I/O APIC IDs for xAPIC systems.  They have

+ 572 - 36
arch/x86/kernel/io_apic_64.c

@@ -37,6 +37,7 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_bus.h>
 #endif
 #endif
 #include <linux/bootmem.h>
 #include <linux/bootmem.h>
+#include <linux/dmar.h>
 
 
 #include <asm/idle.h>
 #include <asm/idle.h>
 #include <asm/io.h>
 #include <asm/io.h>
@@ -49,6 +50,7 @@
 #include <asm/nmi.h>
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
 #include <asm/hypertransport.h>
+#include <asm/irq_remapping.h>
 
 
 #include <mach_ipi.h>
 #include <mach_ipi.h>
 #include <mach_apic.h>
 #include <mach_apic.h>
@@ -108,6 +110,9 @@ static DEFINE_SPINLOCK(vector_lock);
  */
  */
 int nr_ioapic_registers[MAX_IO_APICS];
 int nr_ioapic_registers[MAX_IO_APICS];
 
 
+/* I/O APIC RTE contents at the OS boot up */
+struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+
 /* I/O APIC entries */
 /* I/O APIC entries */
 struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
 int nr_ioapics;
@@ -303,7 +308,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
 		pin = entry->pin;
 		pin = entry->pin;
 		if (pin == -1)
 		if (pin == -1)
 			break;
 			break;
-		io_apic_write(apic, 0x11 + pin*2, dest);
+		/*
+		 * With interrupt-remapping, destination information comes
+		 * from interrupt-remapping table entry.
+		 */
+		if (!irq_remapped(irq))
+			io_apic_write(apic, 0x11 + pin*2, dest);
 		reg = io_apic_read(apic, 0x10 + pin*2);
 		reg = io_apic_read(apic, 0x10 + pin*2);
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 		reg |= vector;
 		reg |= vector;
@@ -440,6 +450,69 @@ static void clear_IO_APIC (void)
 			clear_IO_APIC_pin(apic, pin);
 			clear_IO_APIC_pin(apic, pin);
 }
 }
 
 
+/*
+ * Saves and masks all the unmasked IO-APIC RTE's
+ */
+int save_mask_IO_APIC_setup(void)
+{
+	union IO_APIC_reg_01 reg_01;
+	unsigned long flags;
+	int apic, pin;
+
+	/*
+	 * The number of IO-APIC IRQ registers (== #pins):
+	 */
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		spin_lock_irqsave(&ioapic_lock, flags);
+		reg_01.raw = io_apic_read(apic, 1);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+	}
+
+	for (apic = 0; apic < nr_ioapics; apic++) {
+		early_ioapic_entries[apic] =
+			kzalloc(sizeof(struct IO_APIC_route_entry) *
+				nr_ioapic_registers[apic], GFP_KERNEL);
+		if (!early_ioapic_entries[apic])
+			return -ENOMEM;
+	}
+
+	for (apic = 0; apic < nr_ioapics; apic++)
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+			struct IO_APIC_route_entry entry;
+
+			entry = early_ioapic_entries[apic][pin] =
+				ioapic_read_entry(apic, pin);
+			if (!entry.mask) {
+				entry.mask = 1;
+				ioapic_write_entry(apic, pin, entry);
+			}
+		}
+	return 0;
+}
+
+void restore_IO_APIC_setup(void)
+{
+	int apic, pin;
+
+	for (apic = 0; apic < nr_ioapics; apic++)
+		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+			ioapic_write_entry(apic, pin,
+					   early_ioapic_entries[apic][pin]);
+}
+
+void reinit_intr_remapped_IO_APIC(int intr_remapping)
+{
+	/*
+	 * for now plain restore of previous settings.
+	 * TBD: In the case of OS enabling interrupt-remapping,
+	 * IO-APIC RTE's need to be setup to point to interrupt-remapping
+	 * table entries. for now, do a plain restore, and wait for
+	 * the setup_IO_APIC_irqs() to do proper initialization.
+	 */
+	restore_IO_APIC_setup();
+}
+
 int skip_ioapic_setup;
 int skip_ioapic_setup;
 int ioapic_force;
 int ioapic_force;
 
 
@@ -839,18 +912,98 @@ void __setup_vector_irq(int cpu)
 }
 }
 
 
 static struct irq_chip ioapic_chip;
 static struct irq_chip ioapic_chip;
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip;
+#endif
 
 
 static void ioapic_register_intr(int irq, unsigned long trigger)
 static void ioapic_register_intr(int irq, unsigned long trigger)
 {
 {
-	if (trigger) {
+	if (trigger)
 		irq_desc[irq].status |= IRQ_LEVEL;
 		irq_desc[irq].status |= IRQ_LEVEL;
-		set_irq_chip_and_handler_name(irq, &ioapic_chip,
-					      handle_fasteoi_irq, "fasteoi");
-	} else {
+	else
 		irq_desc[irq].status &= ~IRQ_LEVEL;
 		irq_desc[irq].status &= ~IRQ_LEVEL;
+
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
+		if (trigger)
+			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+						      handle_fasteoi_irq,
+						     "fasteoi");
+		else
+			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+						      handle_edge_irq, "edge");
+		return;
+	}
+#endif
+	if (trigger)
+		set_irq_chip_and_handler_name(irq, &ioapic_chip,
+					      handle_fasteoi_irq,
+					      "fasteoi");
+	else
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					      handle_edge_irq, "edge");
 					      handle_edge_irq, "edge");
+}
+
+static int setup_ioapic_entry(int apic, int irq,
+			      struct IO_APIC_route_entry *entry,
+			      unsigned int destination, int trigger,
+			      int polarity, int vector)
+{
+	/*
+	 * add it to the IO-APIC irq-routing table:
+	 */
+	memset(entry,0,sizeof(*entry));
+
+#ifdef CONFIG_INTR_REMAP
+	if (intr_remapping_enabled) {
+		struct intel_iommu *iommu = map_ioapic_to_ir(apic);
+		struct irte irte;
+		struct IR_IO_APIC_route_entry *ir_entry =
+			(struct IR_IO_APIC_route_entry *) entry;
+		int index;
+
+		if (!iommu)
+			panic("No mapping iommu for ioapic %d\n", apic);
+
+		index = alloc_irte(iommu, irq, 1);
+		if (index < 0)
+			panic("Failed to allocate IRTE for ioapic %d\n", apic);
+
+		memset(&irte, 0, sizeof(irte));
+
+		irte.present = 1;
+		irte.dst_mode = INT_DEST_MODE;
+		irte.trigger_mode = trigger;
+		irte.dlvry_mode = INT_DELIVERY_MODE;
+		irte.vector = vector;
+		irte.dest_id = IRTE_DEST(destination);
+
+		modify_irte(irq, &irte);
+
+		ir_entry->index2 = (index >> 15) & 0x1;
+		ir_entry->zero = 0;
+		ir_entry->format = 1;
+		ir_entry->index = (index & 0x7fff);
+	} else
+#endif
+	{
+		entry->delivery_mode = INT_DELIVERY_MODE;
+		entry->dest_mode = INT_DEST_MODE;
+		entry->dest = destination;
 	}
 	}
+
+	entry->mask = 0;				/* enable IRQ */
+	entry->trigger = trigger;
+	entry->polarity = polarity;
+	entry->vector = vector;
+
+	/* Mask level triggered irqs.
+	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+	 */
+	if (trigger)
+		entry->mask = 1;
+	return 0;
 }
 }
 
 
 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -875,24 +1028,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
 		    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
 		    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
 		    irq, trigger, polarity);
 		    irq, trigger, polarity);
 
 
-	/*
-	 * add it to the IO-APIC irq-routing table:
-	 */
-	memset(&entry,0,sizeof(entry));
 
 
-	entry.delivery_mode = INT_DELIVERY_MODE;
-	entry.dest_mode = INT_DEST_MODE;
-	entry.dest = cpu_mask_to_apicid(mask);
-	entry.mask = 0;				/* enable IRQ */
-	entry.trigger = trigger;
-	entry.polarity = polarity;
-	entry.vector = cfg->vector;
-
-	/* Mask level triggered irqs.
-	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
-	 */
-	if (trigger)
-		entry.mask = 1;
+	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+			       cpu_mask_to_apicid(mask), trigger, polarity,
+			       cfg->vector)) {
+		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+		       mp_ioapics[apic].mp_apicid, pin);
+		__clear_irq_vector(irq);
+		return;
+	}
 
 
 	ioapic_register_intr(irq, trigger);
 	ioapic_register_intr(irq, trigger);
 	if (irq < 16)
 	if (irq < 16)
@@ -944,6 +1088,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
 {
 {
 	struct IO_APIC_route_entry entry;
 	struct IO_APIC_route_entry entry;
 
 
+	if (intr_remapping_enabled)
+		return;
+
 	memset(&entry, 0, sizeof(entry));
 	memset(&entry, 0, sizeof(entry));
 
 
 	/*
 	/*
@@ -1090,6 +1237,7 @@ static __apicdebuginit void print_APIC_bitfield (int base)
 void __apicdebuginit print_local_APIC(void * dummy)
 void __apicdebuginit print_local_APIC(void * dummy)
 {
 {
 	unsigned int v, ver, maxlvt;
 	unsigned int v, ver, maxlvt;
+	unsigned long icr;
 
 
 	if (apic_verbosity == APIC_QUIET)
 	if (apic_verbosity == APIC_QUIET)
 		return;
 		return;
@@ -1097,7 +1245,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
 	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 		smp_processor_id(), hard_smp_processor_id());
 		smp_processor_id(), hard_smp_processor_id());
 	v = apic_read(APIC_ID);
 	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
+	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
 	v = apic_read(APIC_LVR);
 	v = apic_read(APIC_LVR);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	ver = GET_APIC_VERSION(v);
 	ver = GET_APIC_VERSION(v);
@@ -1133,10 +1281,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
 	v = apic_read(APIC_ESR);
 	v = apic_read(APIC_ESR);
 	printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 	printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
 
 
-	v = apic_read(APIC_ICR);
-	printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
-	v = apic_read(APIC_ICR2);
-	printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+	icr = apic_icr_read();
+	printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
+	printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
 
 
 	v = apic_read(APIC_LVTT);
 	v = apic_read(APIC_LVTT);
 	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
 	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1291,7 +1438,7 @@ void disable_IO_APIC(void)
 		entry.dest_mode       = 0; /* Physical */
 		entry.dest_mode       = 0; /* Physical */
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.vector          = 0;
 		entry.vector          = 0;
-		entry.dest          = GET_APIC_ID(read_apic_id());
+		entry.dest            = read_apic_id();
 
 
 		/*
 		/*
 		 * Add it to the IO-APIC irq-routing table:
 		 * Add it to the IO-APIC irq-routing table:
@@ -1397,6 +1544,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
  */
  */
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
+
+#ifdef CONFIG_INTR_REMAP
+static void ir_irq_migration(struct work_struct *work);
+
+static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
+
+/*
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
+ *
+ * For edge triggered, irq migration is a simple atomic update(of vector
+ * and cpu destination) of IRTE and flush the hardware cache.
+ *
+ * For level triggered, we need to modify the io-apic RTE aswell with the update
+ * vector information, along with modifying IRTE with vector and destination.
+ * So irq migration for level triggered is little  bit more complex compared to
+ * edge triggered migration. But the good news is, we use the same algorithm
+ * for level triggered migration as we have today, only difference being,
+ * we now initiate the irq migration from process context instead of the
+ * interrupt context.
+ *
+ * In future, when we do a directed EOI (combined with cpu EOI broadcast
+ * suppression) to the IO-APIC, level triggered irq migration will also be
+ * as simple as edge triggered migration and we can do the irq migration
+ * with a simple atomic update to IO-APIC RTE.
+ */
+static void migrate_ioapic_irq(int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg = irq_cfg + irq;
+	struct irq_desc *desc = irq_desc + irq;
+	cpumask_t tmp, cleanup_mask;
+	struct irte irte;
+	int modify_ioapic_rte = desc->status & IRQ_LEVEL;
+	unsigned int dest;
+	unsigned long flags;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (get_irte(irq, &irte))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	if (modify_ioapic_rte) {
+		spin_lock_irqsave(&ioapic_lock, flags);
+		__target_IO_APIC_irq(irq, dest, cfg->vector);
+		spin_unlock_irqrestore(&ioapic_lock, flags);
+	}
+
+	irte.vector = cfg->vector;
+	irte.dest_id = IRTE_DEST(dest);
+
+	/*
+	 * Modified the IRTE and flushes the Interrupt entry cache.
+	 */
+	modify_irte(irq, &irte);
+
+	if (cfg->move_in_progress) {
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+
+	irq_desc[irq].affinity = mask;
+}
+
+static int migrate_irq_remapped_level(int irq)
+{
+	int ret = -1;
+
+	mask_IO_APIC_irq(irq);
+
+	if (io_apic_level_ack_pending(irq)) {
+		/*
+	 	 * Interrupt in progress. Migrating irq now will change the
+		 * vector information in the IO-APIC RTE and that will confuse
+		 * the EOI broadcast performed by cpu.
+		 * So, delay the irq migration to the next instance.
+		 */
+		schedule_delayed_work(&ir_migration_work, 1);
+		goto unmask;
+	}
+
+	/* everthing is clear. we have right of way */
+	migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
+
+	ret = 0;
+	irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
+	cpus_clear(irq_desc[irq].pending_mask);
+
+unmask:
+	unmask_IO_APIC_irq(irq);
+	return ret;
+}
+
+static void ir_irq_migration(struct work_struct *work)
+{
+	int irq;
+
+	for (irq = 0; irq < NR_IRQS; irq++) {
+		struct irq_desc *desc = irq_desc + irq;
+		if (desc->status & IRQ_MOVE_PENDING) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&desc->lock, flags);
+			if (!desc->chip->set_affinity ||
+			    !(desc->status & IRQ_MOVE_PENDING)) {
+				desc->status &= ~IRQ_MOVE_PENDING;
+				spin_unlock_irqrestore(&desc->lock, flags);
+				continue;
+			}
+
+			desc->chip->set_affinity(irq,
+					         irq_desc[irq].pending_mask);
+			spin_unlock_irqrestore(&desc->lock, flags);
+		}
+	}
+}
+
+/*
+ * Migrates the IRQ destination in the process context.
+ */
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+	if (irq_desc[irq].status & IRQ_LEVEL) {
+		irq_desc[irq].status |= IRQ_MOVE_PENDING;
+		irq_desc[irq].pending_mask = mask;
+		migrate_irq_remapped_level(irq);
+		return;
+	}
+
+	migrate_ioapic_irq(irq, mask);
+}
+#endif
+
 asmlinkage void smp_irq_move_cleanup_interrupt(void)
 asmlinkage void smp_irq_move_cleanup_interrupt(void)
 {
 {
 	unsigned vector, me;
 	unsigned vector, me;
@@ -1453,6 +1741,17 @@ static void irq_complete_move(unsigned int irq)
 #else
 #else
 static inline void irq_complete_move(unsigned int irq) {}
 static inline void irq_complete_move(unsigned int irq) {}
 #endif
 #endif
+#ifdef CONFIG_INTR_REMAP
+static void ack_x2apic_level(unsigned int irq)
+{
+	ack_x2APIC_irq();
+}
+
+static void ack_x2apic_edge(unsigned int irq)
+{
+	ack_x2APIC_irq();
+}
+#endif
 
 
 static void ack_apic_edge(unsigned int irq)
 static void ack_apic_edge(unsigned int irq)
 {
 {
@@ -1527,6 +1826,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
 	.retrigger	= ioapic_retrigger_irq,
 	.retrigger	= ioapic_retrigger_irq,
 };
 };
 
 
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip ir_ioapic_chip __read_mostly = {
+	.name 		= "IR-IO-APIC",
+	.startup 	= startup_ioapic_irq,
+	.mask	 	= mask_IO_APIC_irq,
+	.unmask	 	= unmask_IO_APIC_irq,
+	.ack 		= ack_x2apic_edge,
+	.eoi 		= ack_x2apic_level,
+#ifdef CONFIG_SMP
+	.set_affinity 	= set_ir_ioapic_affinity_irq,
+#endif
+	.retrigger	= ioapic_retrigger_irq,
+};
+#endif
+
 static inline void init_IO_APIC_traps(void)
 static inline void init_IO_APIC_traps(void)
 {
 {
 	int irq;
 	int irq;
@@ -1712,6 +2026,8 @@ static inline void __init check_timer(void)
 	 * 8259A.
 	 * 8259A.
 	 */
 	 */
 	if (pin1 == -1) {
 	if (pin1 == -1) {
+		if (intr_remapping_enabled)
+			panic("BIOS bug: timer not connected to IO-APIC");
 		pin1 = pin2;
 		pin1 = pin2;
 		apic1 = apic2;
 		apic1 = apic2;
 		no_pin1 = 1;
 		no_pin1 = 1;
@@ -1738,6 +2054,8 @@ static inline void __init check_timer(void)
 				clear_IO_APIC_pin(0, pin1);
 				clear_IO_APIC_pin(0, pin1);
 			goto out;
 			goto out;
 		}
 		}
+		if (intr_remapping_enabled)
+			panic("timer doesn't work through Interrupt-remapped IO-APIC");
 		clear_IO_APIC_pin(apic1, pin1);
 		clear_IO_APIC_pin(apic1, pin1);
 		if (!no_pin1)
 		if (!no_pin1)
 			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
 			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -1977,6 +2295,9 @@ void destroy_irq(unsigned int irq)
 
 
 	dynamic_irq_cleanup(irq);
 	dynamic_irq_cleanup(irq);
 
 
+#ifdef CONFIG_INTR_REMAP
+	free_irte(irq);
+#endif
 	spin_lock_irqsave(&vector_lock, flags);
 	spin_lock_irqsave(&vector_lock, flags);
 	__clear_irq_vector(irq);
 	__clear_irq_vector(irq);
 	spin_unlock_irqrestore(&vector_lock, flags);
 	spin_unlock_irqrestore(&vector_lock, flags);
@@ -1995,10 +2316,41 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 
 
 	tmp = TARGET_CPUS;
 	tmp = TARGET_CPUS;
 	err = assign_irq_vector(irq, tmp);
 	err = assign_irq_vector(irq, tmp);
-	if (!err) {
-		cpus_and(tmp, cfg->domain, tmp);
-		dest = cpu_mask_to_apicid(tmp);
+	if (err)
+		return err;
+
+	cpus_and(tmp, cfg->domain, tmp);
+	dest = cpu_mask_to_apicid(tmp);
+
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		struct irte irte;
+		int ir_index;
+		u16 sub_handle;
+
+		ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+		BUG_ON(ir_index == -1);
+
+		memset (&irte, 0, sizeof(irte));
+
+		irte.present = 1;
+		irte.dst_mode = INT_DEST_MODE;
+		irte.trigger_mode = 0; /* edge */
+		irte.dlvry_mode = INT_DELIVERY_MODE;
+		irte.vector = cfg->vector;
+		irte.dest_id = IRTE_DEST(dest);
+
+		modify_irte(irq, &irte);
 
 
+		msg->address_hi = MSI_ADDR_BASE_HI;
+		msg->data = sub_handle;
+		msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+				  MSI_ADDR_IR_SHV |
+				  MSI_ADDR_IR_INDEX1(ir_index) |
+				  MSI_ADDR_IR_INDEX2(ir_index);
+	} else
+#endif
+	{
 		msg->address_hi = MSI_ADDR_BASE_HI;
 		msg->address_hi = MSI_ADDR_BASE_HI;
 		msg->address_lo =
 		msg->address_lo =
 			MSI_ADDR_BASE_LO |
 			MSI_ADDR_BASE_LO |
@@ -2049,6 +2401,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
 	write_msi_msg(irq, &msg);
 	write_msi_msg(irq, &msg);
 	irq_desc[irq].affinity = mask;
 	irq_desc[irq].affinity = mask;
 }
 }
+
+#ifdef CONFIG_INTR_REMAP
+/*
+ * Migrate the MSI irq to another cpumask. This migration is
+ * done in the process context using interrupt-remapping hardware.
+ */
+static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+{
+	struct irq_cfg *cfg = irq_cfg + irq;
+	unsigned int dest;
+	cpumask_t tmp, cleanup_mask;
+	struct irte irte;
+
+	cpus_and(tmp, mask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	if (get_irte(irq, &irte))
+		return;
+
+	if (assign_irq_vector(irq, mask))
+		return;
+
+	cpus_and(tmp, cfg->domain, mask);
+	dest = cpu_mask_to_apicid(tmp);
+
+	irte.vector = cfg->vector;
+	irte.dest_id = IRTE_DEST(dest);
+
+	/*
+	 * atomically update the IRTE with the new destination and vector.
+	 */
+	modify_irte(irq, &irte);
+
+	/*
+	 * After this point, all the interrupts will start arriving
+	 * at the new destination. So, time to cleanup the previous
+	 * vector allocation.
+	 */
+	if (cfg->move_in_progress) {
+		cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+		cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		cfg->move_in_progress = 0;
+	}
+
+	irq_desc[irq].affinity = mask;
+}
+#endif
 #endif /* CONFIG_SMP */
 #endif /* CONFIG_SMP */
 
 
 /*
 /*
@@ -2066,26 +2467,157 @@ static struct irq_chip msi_chip = {
 	.retrigger	= ioapic_retrigger_irq,
 	.retrigger	= ioapic_retrigger_irq,
 };
 };
 
 
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+#ifdef CONFIG_INTR_REMAP
+static struct irq_chip msi_ir_chip = {
+	.name		= "IR-PCI-MSI",
+	.unmask		= unmask_msi_irq,
+	.mask		= mask_msi_irq,
+	.ack		= ack_x2apic_edge,
+#ifdef CONFIG_SMP
+	.set_affinity	= ir_set_msi_irq_affinity,
+#endif
+	.retrigger	= ioapic_retrigger_irq,
+};
+
+/*
+ * Map the PCI dev to the corresponding remapping hardware unit
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
+ * in it.
+ */
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
+{
+	struct intel_iommu *iommu;
+	int index;
+
+	iommu = map_dev_to_ir(dev);
+	if (!iommu) {
+		printk(KERN_ERR
+		       "Unable to map PCI %s to iommu\n", pci_name(dev));
+		return -ENOENT;
+	}
+
+	index = alloc_irte(iommu, irq, nvec);
+	if (index < 0) {
+		printk(KERN_ERR
+		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
+		        pci_name(dev));
+		return -ENOSPC;
+	}
+	return index;
+}
+#endif
+
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
 {
 {
+	int ret;
 	struct msi_msg msg;
 	struct msi_msg msg;
+
+	ret = msi_compose_msg(dev, irq, &msg);
+	if (ret < 0)
+		return ret;
+
+	set_irq_msi(irq, desc);
+	write_msi_msg(irq, &msg);
+
+#ifdef CONFIG_INTR_REMAP
+	if (irq_remapped(irq)) {
+		struct irq_desc *desc = irq_desc + irq;
+		/*
+		 * irq migration in process context
+		 */
+		desc->status |= IRQ_MOVE_PCNTXT;
+		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+	} else
+#endif
+		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+
+	return 0;
+}
+
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
 	int irq, ret;
 	int irq, ret;
+
 	irq = create_irq();
 	irq = create_irq();
 	if (irq < 0)
 	if (irq < 0)
 		return irq;
 		return irq;
 
 
-	ret = msi_compose_msg(dev, irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+	if (!intr_remapping_enabled)
+		goto no_ir;
+
+	ret = msi_alloc_irte(dev, irq, 1);
+	if (ret < 0)
+		goto error;
+no_ir:
+#endif
+	ret = setup_msi_irq(dev, desc, irq);
 	if (ret < 0) {
 	if (ret < 0) {
 		destroy_irq(irq);
 		destroy_irq(irq);
 		return ret;
 		return ret;
 	}
 	}
+	return 0;
 
 
-	set_irq_msi(irq, desc);
-	write_msi_msg(irq, &msg);
+#ifdef CONFIG_INTR_REMAP
+error:
+	destroy_irq(irq);
+	return ret;
+#endif
+}
 
 
-	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	int irq, ret, sub_handle;
+	struct msi_desc *desc;
+#ifdef CONFIG_INTR_REMAP
+	struct intel_iommu *iommu = 0;
+	int index = 0;
+#endif
 
 
+	sub_handle = 0;
+	list_for_each_entry(desc, &dev->msi_list, list) {
+		irq = create_irq();
+		if (irq < 0)
+			return irq;
+#ifdef CONFIG_INTR_REMAP
+		if (!intr_remapping_enabled)
+			goto no_ir;
+
+		if (!sub_handle) {
+			/*
+			 * allocate the consecutive block of IRTE's
+			 * for 'nvec'
+			 */
+			index = msi_alloc_irte(dev, irq, nvec);
+			if (index < 0) {
+				ret = index;
+				goto error;
+			}
+		} else {
+			iommu = map_dev_to_ir(dev);
+			if (!iommu) {
+				ret = -ENOENT;
+				goto error;
+			}
+			/*
+			 * setup the mapping between the irq and the IRTE
+			 * base index, the sub_handle pointing to the
+			 * appropriate interrupt remap table entry.
+			 */
+			set_irte_irq(irq, iommu, index, sub_handle);
+		}
+no_ir:
+#endif
+		ret = setup_msi_irq(dev, desc, irq);
+		if (ret < 0)
+			goto error;
+		sub_handle++;
+	}
 	return 0;
 	return 0;
+
+error:
+	destroy_irq(irq);
+	return ret;
 }
 }
 
 
 void arch_teardown_msi_irq(unsigned int irq)
 void arch_teardown_msi_irq(unsigned int irq)
@@ -2333,6 +2865,10 @@ void __init setup_ioapic_dest(void)
 				setup_IO_APIC_irq(ioapic, pin, irq,
 				setup_IO_APIC_irq(ioapic, pin, irq,
 						  irq_trigger(irq_entry),
 						  irq_trigger(irq_entry),
 						  irq_polarity(irq_entry));
 						  irq_polarity(irq_entry));
+#ifdef CONFIG_INTR_REMAP
+			else if (intr_remapping_enabled)
+				set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
 			else
 			else
 				set_ioapic_affinity_irq(irq, TARGET_CPUS);
 				set_ioapic_affinity_irq(irq, TARGET_CPUS);
 		}
 		}

+ 1 - 0
arch/x86/kernel/ioport.c

@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/thread_info.h>
 #include <linux/thread_info.h>
 #include <linux/syscalls.h>
 #include <linux/syscalls.h>
+#include <asm/syscalls.h>
 
 
 /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
 /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
 static void set_bitmap(unsigned long *bitmap, unsigned int base,
 static void set_bitmap(unsigned long *bitmap, unsigned int base,

+ 2 - 1
arch/x86/kernel/ipi.c

@@ -20,6 +20,8 @@
 
 
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
 #include <mach_apic.h>
 #include <mach_apic.h>
+#include <mach_ipi.h>
+
 /*
 /*
  * the following functions deal with sending IPIs between CPUs.
  * the following functions deal with sending IPIs between CPUs.
  *
  *
@@ -147,7 +149,6 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
 }
 }
 
 
 /* must come after the send_IPI functions above for inlining */
 /* must come after the send_IPI functions above for inlining */
-#include <mach_ipi.h>
 static int convert_apicid_to_cpu(int apic_id)
 static int convert_apicid_to_cpu(int apic_id)
 {
 {
 	int i;
 	int i;

+ 1 - 1
arch/x86/kernel/irq_32.c

@@ -325,7 +325,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		for_each_online_cpu(j)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ",
 			seq_printf(p, "%10u ",
 				per_cpu(irq_stat,j).irq_call_count);
 				per_cpu(irq_stat,j).irq_call_count);
-		seq_printf(p, "  function call interrupts\n");
+		seq_printf(p, "  Function call interrupts\n");
 		seq_printf(p, "TLB: ");
 		seq_printf(p, "TLB: ");
 		for_each_online_cpu(j)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ",
 			seq_printf(p, "%10u ",

+ 1 - 1
arch/x86/kernel/irq_64.c

@@ -129,7 +129,7 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_printf(p, "CAL: ");
 		seq_printf(p, "CAL: ");
 		for_each_online_cpu(j)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
 			seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count);
-		seq_printf(p, "  function call interrupts\n");
+		seq_printf(p, "  Function call interrupts\n");
 		seq_printf(p, "TLB: ");
 		seq_printf(p, "TLB: ");
 		for_each_online_cpu(j)
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);
 			seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count);

+ 1 - 0
arch/x86/kernel/ldt.c

@@ -18,6 +18,7 @@
 #include <asm/ldt.h>
 #include <asm/ldt.h>
 #include <asm/desc.h>
 #include <asm/desc.h>
 #include <asm/mmu_context.h>
 #include <asm/mmu_context.h>
+#include <asm/syscalls.h>
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 static void flush_ldt(void *current_mm)
 static void flush_ldt(void *current_mm)

+ 2 - 0
arch/x86/kernel/mpparse.c

@@ -397,7 +397,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
        generic_bigsmp_probe();
        generic_bigsmp_probe();
 #endif
 #endif
 
 
+#ifdef CONFIG_X86_32
 	setup_apic_routing();
 	setup_apic_routing();
+#endif
 	if (!num_processors)
 	if (!num_processors)
 		printk(KERN_ERR "MPTABLE: no processors registered!\n");
 		printk(KERN_ERR "MPTABLE: no processors registered!\n");
 	return num_processors;
 	return num_processors;

+ 7 - 0
arch/x86/kernel/numaq_32.c

@@ -229,6 +229,12 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
 	}
 	}
 }
 }
 
 
+static int __init numaq_setup_ioapic_ids(void)
+{
+	/* so can skip it */
+	return 1;
+}
+
 static struct x86_quirks numaq_x86_quirks __initdata = {
 static struct x86_quirks numaq_x86_quirks __initdata = {
 	.arch_pre_time_init	= numaq_pre_time_init,
 	.arch_pre_time_init	= numaq_pre_time_init,
 	.arch_time_init		= NULL,
 	.arch_time_init		= NULL,
@@ -243,6 +249,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
 	.mpc_oem_bus_info	= mpc_oem_bus_info,
 	.mpc_oem_bus_info	= mpc_oem_bus_info,
 	.mpc_oem_pci_bus	= mpc_oem_pci_bus,
 	.mpc_oem_pci_bus	= mpc_oem_pci_bus,
 	.smp_read_mpc_oem	= smp_read_mpc_oem,
 	.smp_read_mpc_oem	= smp_read_mpc_oem,
+	.setup_ioapic_ids	= numaq_setup_ioapic_ids,
 };
 };
 
 
 void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
 void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,

+ 0 - 2
arch/x86/kernel/paravirt.c

@@ -373,8 +373,6 @@ struct pv_cpu_ops pv_cpu_ops = {
 
 
 struct pv_apic_ops pv_apic_ops = {
 struct pv_apic_ops pv_apic_ops = {
 #ifdef CONFIG_X86_LOCAL_APIC
 #ifdef CONFIG_X86_LOCAL_APIC
-	.apic_write = native_apic_write,
-	.apic_read = native_apic_read,
 	.setup_boot_clock = setup_boot_APIC_clock,
 	.setup_boot_clock = setup_boot_APIC_clock,
 	.setup_secondary_clock = setup_secondary_APIC_clock,
 	.setup_secondary_clock = setup_secondary_APIC_clock,
 	.startup_ipi_hook = paravirt_nop,
 	.startup_ipi_hook = paravirt_nop,

+ 1 - 1
arch/x86/kernel/paravirt_patch_32.c

@@ -23,7 +23,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 			start = start_##ops##_##x;		\
 			start = start_##ops##_##x;		\
 			end = end_##ops##_##x;			\
 			end = end_##ops##_##x;			\
 			goto patch_site
 			goto patch_site
-	switch(type) {
+	switch (type) {
 		PATCH_SITE(pv_irq_ops, irq_disable);
 		PATCH_SITE(pv_irq_ops, irq_disable);
 		PATCH_SITE(pv_irq_ops, irq_enable);
 		PATCH_SITE(pv_irq_ops, irq_enable);
 		PATCH_SITE(pv_irq_ops, restore_fl);
 		PATCH_SITE(pv_irq_ops, restore_fl);

+ 1 - 1
arch/x86/kernel/pci-dma.c

@@ -82,7 +82,7 @@ void __init dma32_reserve_bootmem(void)
 	 * using 512M as goal
 	 * using 512M as goal
 	 */
 	 */
 	align = 64ULL<<20;
 	align = 64ULL<<20;
-	size = round_up(dma32_bootmem_size, align);
+	size = roundup(dma32_bootmem_size, align);
 	dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
 	dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
 				 512ULL<<20);
 				 512ULL<<20);
 	if (dma32_bootmem_ptr)
 	if (dma32_bootmem_ptr)

+ 2 - 0
arch/x86/kernel/process_32.c

@@ -55,6 +55,8 @@
 #include <asm/tlbflush.h>
 #include <asm/tlbflush.h>
 #include <asm/cpu.h>
 #include <asm/cpu.h>
 #include <asm/kdebug.h>
 #include <asm/kdebug.h>
+#include <asm/syscalls.h>
+#include <asm/smp.h>
 
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 
 

+ 67 - 65
arch/x86/kernel/process_64.c

@@ -37,11 +37,11 @@
 #include <linux/kdebug.h>
 #include <linux/kdebug.h>
 #include <linux/tick.h>
 #include <linux/tick.h>
 #include <linux/prctl.h>
 #include <linux/prctl.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
 
 
-#include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
 #include <asm/system.h>
-#include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/i387.h>
 #include <asm/i387.h>
 #include <asm/mmu_context.h>
 #include <asm/mmu_context.h>
@@ -51,6 +51,7 @@
 #include <asm/proto.h>
 #include <asm/proto.h>
 #include <asm/ia32.h>
 #include <asm/ia32.h>
 #include <asm/idle.h>
 #include <asm/idle.h>
+#include <asm/syscalls.h>
 
 
 asmlinkage extern void ret_from_fork(void);
 asmlinkage extern void ret_from_fork(void);
 
 
@@ -88,7 +89,7 @@ void exit_idle(void)
 #ifdef CONFIG_HOTPLUG_CPU
 #ifdef CONFIG_HOTPLUG_CPU
 DECLARE_PER_CPU(int, cpu_state);
 DECLARE_PER_CPU(int, cpu_state);
 
 
-#include <asm/nmi.h>
+#include <linux/nmi.h>
 /* We halt the CPU with physical CPU hotplug */
 /* We halt the CPU with physical CPU hotplug */
 static inline void play_dead(void)
 static inline void play_dead(void)
 {
 {
@@ -151,7 +152,7 @@ void cpu_idle(void)
 }
 }
 
 
 /* Prints also some state that isn't saved in the pt_regs */
 /* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs * regs)
+void __show_regs(struct pt_regs *regs)
 {
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
 	unsigned long d0, d1, d2, d3, d6, d7;
 	unsigned long d0, d1, d2, d3, d6, d7;
@@ -160,59 +161,61 @@ void __show_regs(struct pt_regs * regs)
 
 
 	printk("\n");
 	printk("\n");
 	print_modules();
 	print_modules();
-	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+	printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
 		current->pid, current->comm, print_tainted(),
 		current->pid, current->comm, print_tainted(),
 		init_utsname()->release,
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
 		init_utsname()->version);
-	printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
+	printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
 	printk_address(regs->ip, 1);
 	printk_address(regs->ip, 1);
-	printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->sp,
-		regs->flags);
-	printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
+	printk(KERN_INFO "RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss,
+			regs->sp, regs->flags);
+	printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
 	       regs->ax, regs->bx, regs->cx);
 	       regs->ax, regs->bx, regs->cx);
-	printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
+	printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
 	       regs->dx, regs->si, regs->di);
 	       regs->dx, regs->si, regs->di);
-	printk("RBP: %016lx R08: %016lx R09: %016lx\n",
+	printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
 	       regs->bp, regs->r8, regs->r9);
 	       regs->bp, regs->r8, regs->r9);
-	printk("R10: %016lx R11: %016lx R12: %016lx\n",
-	       regs->r10, regs->r11, regs->r12); 
-	printk("R13: %016lx R14: %016lx R15: %016lx\n",
-	       regs->r13, regs->r14, regs->r15); 
-
-	asm("movl %%ds,%0" : "=r" (ds)); 
-	asm("movl %%cs,%0" : "=r" (cs)); 
-	asm("movl %%es,%0" : "=r" (es)); 
+	printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
+	       regs->r10, regs->r11, regs->r12);
+	printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
+	       regs->r13, regs->r14, regs->r15);
+
+	asm("movl %%ds,%0" : "=r" (ds));
+	asm("movl %%cs,%0" : "=r" (cs));
+	asm("movl %%es,%0" : "=r" (es));
 	asm("movl %%fs,%0" : "=r" (fsindex));
 	asm("movl %%fs,%0" : "=r" (fsindex));
 	asm("movl %%gs,%0" : "=r" (gsindex));
 	asm("movl %%gs,%0" : "=r" (gsindex));
 
 
 	rdmsrl(MSR_FS_BASE, fs);
 	rdmsrl(MSR_FS_BASE, fs);
-	rdmsrl(MSR_GS_BASE, gs); 
-	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); 
+	rdmsrl(MSR_GS_BASE, gs);
+	rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
 
 
 	cr0 = read_cr0();
 	cr0 = read_cr0();
 	cr2 = read_cr2();
 	cr2 = read_cr2();
 	cr3 = read_cr3();
 	cr3 = read_cr3();
 	cr4 = read_cr4();
 	cr4 = read_cr4();
 
 
-	printk("FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 
-	       fs,fsindex,gs,gsindex,shadowgs); 
-	printk("CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); 
-	printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
+	printk(KERN_INFO "FS:  %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
+	       fs, fsindex, gs, gsindex, shadowgs);
+	printk(KERN_INFO "CS:  %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
+			es, cr0);
+	printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
+			cr4);
 
 
 	get_debugreg(d0, 0);
 	get_debugreg(d0, 0);
 	get_debugreg(d1, 1);
 	get_debugreg(d1, 1);
 	get_debugreg(d2, 2);
 	get_debugreg(d2, 2);
-	printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
+	printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
 	get_debugreg(d3, 3);
 	get_debugreg(d3, 3);
 	get_debugreg(d6, 6);
 	get_debugreg(d6, 6);
 	get_debugreg(d7, 7);
 	get_debugreg(d7, 7);
-	printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+	printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
 }
 }
 
 
 void show_regs(struct pt_regs *regs)
 void show_regs(struct pt_regs *regs)
 {
 {
-	printk("CPU %d:", smp_processor_id());
+	printk(KERN_INFO "CPU %d:", smp_processor_id());
 	__show_regs(regs);
 	__show_regs(regs);
 	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
 	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
 }
 }
@@ -313,10 +316,10 @@ void prepare_to_copy(struct task_struct *tsk)
 
 
 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 		unsigned long unused,
 		unsigned long unused,
-	struct task_struct * p, struct pt_regs * regs)
+	struct task_struct *p, struct pt_regs *regs)
 {
 {
 	int err;
 	int err;
-	struct pt_regs * childregs;
+	struct pt_regs *childregs;
 	struct task_struct *me = current;
 	struct task_struct *me = current;
 
 
 	childregs = ((struct pt_regs *)
 	childregs = ((struct pt_regs *)
@@ -361,10 +364,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 		if (test_thread_flag(TIF_IA32))
 		if (test_thread_flag(TIF_IA32))
 			err = do_set_thread_area(p, -1,
 			err = do_set_thread_area(p, -1,
 				(struct user_desc __user *)childregs->si, 0);
 				(struct user_desc __user *)childregs->si, 0);
-		else 			
-#endif	 
-			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); 
-		if (err) 
+		else
+#endif
+			err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
+		if (err)
 			goto out;
 			goto out;
 	}
 	}
 	err = 0;
 	err = 0;
@@ -543,7 +546,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	unsigned fsindex, gsindex;
 	unsigned fsindex, gsindex;
 
 
 	/* we're going to use this soon, after a few expensive things */
 	/* we're going to use this soon, after a few expensive things */
-	if (next_p->fpu_counter>5)
+	if (next_p->fpu_counter > 5)
 		prefetch(next->xstate);
 		prefetch(next->xstate);
 
 
 	/*
 	/*
@@ -551,13 +554,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 */
 	 */
 	load_sp0(tss, next);
 	load_sp0(tss, next);
 
 
-	/* 
+	/*
 	 * Switch DS and ES.
 	 * Switch DS and ES.
 	 * This won't pick up thread selector changes, but I guess that is ok.
 	 * This won't pick up thread selector changes, but I guess that is ok.
 	 */
 	 */
 	savesegment(es, prev->es);
 	savesegment(es, prev->es);
 	if (unlikely(next->es | prev->es))
 	if (unlikely(next->es | prev->es))
-		loadsegment(es, next->es); 
+		loadsegment(es, next->es);
 
 
 	savesegment(ds, prev->ds);
 	savesegment(ds, prev->ds);
 	if (unlikely(next->ds | prev->ds))
 	if (unlikely(next->ds | prev->ds))
@@ -583,7 +586,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 */
 	 */
 	arch_leave_lazy_cpu_mode();
 	arch_leave_lazy_cpu_mode();
 
 
-	/* 
+	/*
 	 * Switch FS and GS.
 	 * Switch FS and GS.
 	 *
 	 *
 	 * Segment register != 0 always requires a reload.  Also
 	 * Segment register != 0 always requires a reload.  Also
@@ -592,13 +595,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 */
 	 */
 	if (unlikely(fsindex | next->fsindex | prev->fs)) {
 	if (unlikely(fsindex | next->fsindex | prev->fs)) {
 		loadsegment(fs, next->fsindex);
 		loadsegment(fs, next->fsindex);
-		/* 
+		/*
 		 * Check if the user used a selector != 0; if yes
 		 * Check if the user used a selector != 0; if yes
 		 *  clear 64bit base, since overloaded base is always
 		 *  clear 64bit base, since overloaded base is always
 		 *  mapped to the Null selector
 		 *  mapped to the Null selector
 		 */
 		 */
 		if (fsindex)
 		if (fsindex)
-			prev->fs = 0;				
+			prev->fs = 0;
 	}
 	}
 	/* when next process has a 64bit base use it */
 	/* when next process has a 64bit base use it */
 	if (next->fs)
 	if (next->fs)
@@ -608,7 +611,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	if (unlikely(gsindex | next->gsindex | prev->gs)) {
 	if (unlikely(gsindex | next->gsindex | prev->gs)) {
 		load_gs_index(next->gsindex);
 		load_gs_index(next->gsindex);
 		if (gsindex)
 		if (gsindex)
-			prev->gs = 0;				
+			prev->gs = 0;
 	}
 	}
 	if (next->gs)
 	if (next->gs)
 		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
 		wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
@@ -617,12 +620,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/* Must be after DS reload */
 	/* Must be after DS reload */
 	unlazy_fpu(prev_p);
 	unlazy_fpu(prev_p);
 
 
-	/* 
+	/*
 	 * Switch the PDA and FPU contexts.
 	 * Switch the PDA and FPU contexts.
 	 */
 	 */
 	prev->usersp = read_pda(oldrsp);
 	prev->usersp = read_pda(oldrsp);
 	write_pda(oldrsp, next->usersp);
 	write_pda(oldrsp, next->usersp);
-	write_pda(pcurrent, next_p); 
+	write_pda(pcurrent, next_p);
 
 
 	write_pda(kernelstack,
 	write_pda(kernelstack,
 		  (unsigned long)task_stack_page(next_p) +
 		  (unsigned long)task_stack_page(next_p) +
@@ -663,7 +666,7 @@ long sys_execve(char __user *name, char __user * __user *argv,
 		char __user * __user *envp, struct pt_regs *regs)
 		char __user * __user *envp, struct pt_regs *regs)
 {
 {
 	long error;
 	long error;
-	char * filename;
+	char *filename;
 
 
 	filename = getname(name);
 	filename = getname(name);
 	error = PTR_ERR(filename);
 	error = PTR_ERR(filename);
@@ -721,55 +724,55 @@ asmlinkage long sys_vfork(struct pt_regs *regs)
 unsigned long get_wchan(struct task_struct *p)
 unsigned long get_wchan(struct task_struct *p)
 {
 {
 	unsigned long stack;
 	unsigned long stack;
-	u64 fp,ip;
+	u64 fp, ip;
 	int count = 0;
 	int count = 0;
 
 
-	if (!p || p == current || p->state==TASK_RUNNING)
-		return 0; 
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
 	stack = (unsigned long)task_stack_page(p);
 	stack = (unsigned long)task_stack_page(p);
 	if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
 	if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
 		return 0;
 		return 0;
 	fp = *(u64 *)(p->thread.sp);
 	fp = *(u64 *)(p->thread.sp);
-	do { 
+	do {
 		if (fp < (unsigned long)stack ||
 		if (fp < (unsigned long)stack ||
 		    fp > (unsigned long)stack+THREAD_SIZE)
 		    fp > (unsigned long)stack+THREAD_SIZE)
-			return 0; 
+			return 0;
 		ip = *(u64 *)(fp+8);
 		ip = *(u64 *)(fp+8);
 		if (!in_sched_functions(ip))
 		if (!in_sched_functions(ip))
 			return ip;
 			return ip;
-		fp = *(u64 *)fp; 
-	} while (count++ < 16); 
+		fp = *(u64 *)fp;
+	} while (count++ < 16);
 	return 0;
 	return 0;
 }
 }
 
 
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
-{ 
-	int ret = 0; 
+{
+	int ret = 0;
 	int doit = task == current;
 	int doit = task == current;
 	int cpu;
 	int cpu;
 
 
-	switch (code) { 
+	switch (code) {
 	case ARCH_SET_GS:
 	case ARCH_SET_GS:
 		if (addr >= TASK_SIZE_OF(task))
 		if (addr >= TASK_SIZE_OF(task))
-			return -EPERM; 
+			return -EPERM;
 		cpu = get_cpu();
 		cpu = get_cpu();
-		/* handle small bases via the GDT because that's faster to 
+		/* handle small bases via the GDT because that's faster to
 		   switch. */
 		   switch. */
-		if (addr <= 0xffffffff) {  
-			set_32bit_tls(task, GS_TLS, addr); 
-			if (doit) { 
+		if (addr <= 0xffffffff) {
+			set_32bit_tls(task, GS_TLS, addr);
+			if (doit) {
 				load_TLS(&task->thread, cpu);
 				load_TLS(&task->thread, cpu);
-				load_gs_index(GS_TLS_SEL); 
+				load_gs_index(GS_TLS_SEL);
 			}
 			}
-			task->thread.gsindex = GS_TLS_SEL; 
+			task->thread.gsindex = GS_TLS_SEL;
 			task->thread.gs = 0;
 			task->thread.gs = 0;
-		} else { 
+		} else {
 			task->thread.gsindex = 0;
 			task->thread.gsindex = 0;
 			task->thread.gs = addr;
 			task->thread.gs = addr;
 			if (doit) {
 			if (doit) {
 				load_gs_index(0);
 				load_gs_index(0);
 				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
 				ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
-			} 
+			}
 		}
 		}
 		put_cpu();
 		put_cpu();
 		break;
 		break;
@@ -823,8 +826,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 				rdmsrl(MSR_KERNEL_GS_BASE, base);
 				rdmsrl(MSR_KERNEL_GS_BASE, base);
 			else
 			else
 				base = task->thread.gs;
 				base = task->thread.gs;
-		}
-		else
+		} else
 			base = task->thread.gs;
 			base = task->thread.gs;
 		ret = put_user(base, (unsigned long __user *)addr);
 		ret = put_user(base, (unsigned long __user *)addr);
 		break;
 		break;

+ 7 - 29
arch/x86/kernel/ptrace.c

@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/ptrace.h>
 #include <linux/regset.h>
 #include <linux/regset.h>
+#include <linux/tracehook.h>
 #include <linux/user.h>
 #include <linux/user.h>
 #include <linux/elf.h>
 #include <linux/elf.h>
 #include <linux/security.h>
 #include <linux/security.h>
@@ -69,7 +70,7 @@ static inline bool invalid_selector(u16 value)
 
 
 #define FLAG_MASK		FLAG_MASK_32
 #define FLAG_MASK		FLAG_MASK_32
 
 
-static long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
+static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
 {
 {
 	BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
 	BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
 	regno >>= 2;
 	regno >>= 2;
@@ -1375,30 +1376,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
 	force_sig_info(SIGTRAP, &info, tsk);
 	force_sig_info(SIGTRAP, &info, tsk);
 }
 }
 
 
-static void syscall_trace(struct pt_regs *regs)
-{
-	if (!(current->ptrace & PT_PTRACED))
-		return;
-
-#if 0
-	printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n",
-	       current->comm,
-	       regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0),
-	       current_thread_info()->flags, current->ptrace);
-#endif
-
-	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
-				? 0x80 : 0));
-	/*
-	 * this isn't the same as continuing with a signal, but it will do
-	 * for normal use.  strace only continues with a signal if the
-	 * stopping signal is not SIGTRAP.  -brl
-	 */
-	if (current->exit_code) {
-		send_sig(current->exit_code, current, 1);
-		current->exit_code = 0;
-	}
-}
 
 
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
 # define IS_IA32	1
 # define IS_IA32	1
@@ -1432,8 +1409,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
 	if (unlikely(test_thread_flag(TIF_SYSCALL_EMU)))
 		ret = -1L;
 		ret = -1L;
 
 
-	if (ret || test_thread_flag(TIF_SYSCALL_TRACE))
-		syscall_trace(regs);
+	if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) &&
+	    tracehook_report_syscall_entry(regs))
+		ret = -1L;
 
 
 	if (unlikely(current->audit_context)) {
 	if (unlikely(current->audit_context)) {
 		if (IS_IA32)
 		if (IS_IA32)
@@ -1459,7 +1437,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
 		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
-		syscall_trace(regs);
+		tracehook_report_syscall_exit(regs, 0);
 
 
 	/*
 	/*
 	 * If TIF_SYSCALL_EMU is set, we only get here because of
 	 * If TIF_SYSCALL_EMU is set, we only get here because of
@@ -1475,6 +1453,6 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
 	 * system call instruction.
 	 * system call instruction.
 	 */
 	 */
 	if (test_thread_flag(TIF_SINGLESTEP) &&
 	if (test_thread_flag(TIF_SINGLESTEP) &&
-	    (current->ptrace & PT_PTRACED))
+	    tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL))
 		send_sigtrap(current, regs, 0);
 		send_sigtrap(current, regs, 0);
 }
 }

+ 2 - 0
arch/x86/kernel/setup.c

@@ -742,6 +742,8 @@ void __init setup_arch(char **cmdline_p)
 #else
 #else
 	num_physpages = max_pfn;
 	num_physpages = max_pfn;
 
 
+ 	if (cpu_has_x2apic)
+ 		check_x2apic();
 
 
 	/* How many end-of-memory variables you have, grandma! */
 	/* How many end-of-memory variables you have, grandma! */
 	/* need this before calling reserve_initrd */
 	/* need this before calling reserve_initrd */

+ 17 - 2
arch/x86/kernel/sigframe.h

@@ -3,9 +3,18 @@ struct sigframe {
 	char __user *pretcode;
 	char __user *pretcode;
 	int sig;
 	int sig;
 	struct sigcontext sc;
 	struct sigcontext sc;
-	struct _fpstate fpstate;
+	/*
+	 * fpstate is unused. fpstate is moved/allocated after
+	 * retcode[] below. This movement allows to have the FP state and the
+	 * future state extensions (xsave) stay together.
+	 * And at the same time retaining the unused fpstate, prevents changing
+	 * the offset of extramask[] in the sigframe and thus prevent any
+	 * legacy application accessing/modifying it.
+	 */
+	struct _fpstate fpstate_unused;
 	unsigned long extramask[_NSIG_WORDS-1];
 	unsigned long extramask[_NSIG_WORDS-1];
 	char retcode[8];
 	char retcode[8];
+	/* fp state follows here */
 };
 };
 
 
 struct rt_sigframe {
 struct rt_sigframe {
@@ -15,13 +24,19 @@ struct rt_sigframe {
 	void __user *puc;
 	void __user *puc;
 	struct siginfo info;
 	struct siginfo info;
 	struct ucontext uc;
 	struct ucontext uc;
-	struct _fpstate fpstate;
 	char retcode[8];
 	char retcode[8];
+	/* fp state follows here */
 };
 };
 #else
 #else
 struct rt_sigframe {
 struct rt_sigframe {
 	char __user *pretcode;
 	char __user *pretcode;
 	struct ucontext uc;
 	struct ucontext uc;
 	struct siginfo info;
 	struct siginfo info;
+	/* fp state follows here */
 };
 };
+
+int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+		sigset_t *set, struct pt_regs *regs);
+int ia32_setup_frame(int sig, struct k_sigaction *ka,
+		sigset_t *set, struct pt_regs *regs);
 #endif
 #endif

+ 109 - 82
arch/x86/kernel/signal_32.c

@@ -17,6 +17,7 @@
 #include <linux/errno.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/wait.h>
+#include <linux/tracehook.h>
 #include <linux/elf.h>
 #include <linux/elf.h>
 #include <linux/smp.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
@@ -26,6 +27,8 @@
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/i387.h>
 #include <asm/vdso.h>
 #include <asm/vdso.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
 
 
 #include "sigframe.h"
 #include "sigframe.h"
 
 
@@ -159,28 +162,14 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	}
 	}
 
 
 	{
 	{
-		struct _fpstate __user *buf;
+		void __user *buf;
 
 
 		err |= __get_user(buf, &sc->fpstate);
 		err |= __get_user(buf, &sc->fpstate);
-		if (buf) {
-			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
-				goto badframe;
-			err |= restore_i387(buf);
-		} else {
-			struct task_struct *me = current;
-
-			if (used_math()) {
-				clear_fpu(me);
-				clear_used_math();
-			}
-		}
+		err |= restore_i387_xstate(buf);
 	}
 	}
 
 
 	err |= __get_user(*pax, &sc->ax);
 	err |= __get_user(*pax, &sc->ax);
 	return err;
 	return err;
-
-badframe:
-	return 1;
 }
 }
 
 
 asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
 asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
@@ -226,9 +215,8 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
 	return 0;
 	return 0;
 }
 }
 
 
-asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+static long do_rt_sigreturn(struct pt_regs *regs)
 {
 {
-	struct pt_regs *regs = (struct pt_regs *)&__unused;
 	struct rt_sigframe __user *frame;
 	struct rt_sigframe __user *frame;
 	unsigned long ax;
 	unsigned long ax;
 	sigset_t set;
 	sigset_t set;
@@ -254,15 +242,22 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused)
 	return ax;
 	return ax;
 
 
 badframe:
 badframe:
-	force_sig(SIGSEGV, current);
+	signal_fault(regs, frame, "rt_sigreturn");
 	return 0;
 	return 0;
 }
 }
 
 
+asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+{
+	struct pt_regs *regs = (struct pt_regs *)&__unused;
+
+	return do_rt_sigreturn(regs);
+}
+
 /*
 /*
  * Set up a signal frame.
  * Set up a signal frame.
  */
  */
 static int
 static int
-setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
+setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 		 struct pt_regs *regs, unsigned long mask)
 		 struct pt_regs *regs, unsigned long mask)
 {
 {
 	int tmp, err = 0;
 	int tmp, err = 0;
@@ -289,7 +284,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
 	err |= __put_user(regs->sp, &sc->sp_at_signal);
 	err |= __put_user(regs->sp, &sc->sp_at_signal);
 	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
 	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
 
 
-	tmp = save_i387(fpstate);
+	tmp = save_i387_xstate(fpstate);
 	if (tmp < 0)
 	if (tmp < 0)
 		err = 1;
 		err = 1;
 	else
 	else
@@ -306,7 +301,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
  * Determine which stack to use..
  * Determine which stack to use..
  */
  */
 static inline void __user *
 static inline void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
+get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
+	     void **fpstate)
 {
 {
 	unsigned long sp;
 	unsigned long sp;
 
 
@@ -332,6 +328,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
 			sp = (unsigned long) ka->sa.sa_restorer;
 			sp = (unsigned long) ka->sa.sa_restorer;
 	}
 	}
 
 
+	if (used_math()) {
+		sp = sp - sig_xstate_size;
+		*fpstate = (struct _fpstate *) sp;
+	}
+
 	sp -= frame_size;
 	sp -= frame_size;
 	/*
 	/*
 	 * Align the stack pointer according to the i386 ABI,
 	 * Align the stack pointer according to the i386 ABI,
@@ -343,38 +344,29 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
 }
 }
 
 
 static int
 static int
-setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
-	    struct pt_regs *regs)
+__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
+	      struct pt_regs *regs)
 {
 {
 	struct sigframe __user *frame;
 	struct sigframe __user *frame;
 	void __user *restorer;
 	void __user *restorer;
 	int err = 0;
 	int err = 0;
-	int usig;
+	void __user *fpstate = NULL;
 
 
-	frame = get_sigframe(ka, regs, sizeof(*frame));
+	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
 
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
-
-	usig = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
+		return -EFAULT;
 
 
-	err = __put_user(usig, &frame->sig);
-	if (err)
-		goto give_sigsegv;
+	if (__put_user(sig, &frame->sig))
+		return -EFAULT;
 
 
-	err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
-	if (err)
-		goto give_sigsegv;
+	if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
+		return -EFAULT;
 
 
 	if (_NSIG_WORDS > 1) {
 	if (_NSIG_WORDS > 1) {
-		err = __copy_to_user(&frame->extramask, &set->sig[1],
-				      sizeof(frame->extramask));
-		if (err)
-			goto give_sigsegv;
+		if (__copy_to_user(&frame->extramask, &set->sig[1],
+				   sizeof(frame->extramask)))
+			return -EFAULT;
 	}
 	}
 
 
 	if (current->mm->context.vdso)
 	if (current->mm->context.vdso)
@@ -399,7 +391,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
 	err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
 	err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
 
 
 	if (err)
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 
 	/* Set up registers for signal handler */
 	/* Set up registers for signal handler */
 	regs->sp = (unsigned long)frame;
 	regs->sp = (unsigned long)frame;
@@ -414,50 +406,43 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
 	regs->cs = __USER_CS;
 	regs->cs = __USER_CS;
 
 
 	return 0;
 	return 0;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return -EFAULT;
 }
 }
 
 
-static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			  sigset_t *set, struct pt_regs *regs)
+static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			    sigset_t *set, struct pt_regs *regs)
 {
 {
 	struct rt_sigframe __user *frame;
 	struct rt_sigframe __user *frame;
 	void __user *restorer;
 	void __user *restorer;
 	int err = 0;
 	int err = 0;
-	int usig;
+	void __user *fpstate = NULL;
 
 
-	frame = get_sigframe(ka, regs, sizeof(*frame));
+	frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
 
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
+		return -EFAULT;
 
 
-	usig = current_thread_info()->exec_domain
-		&& current_thread_info()->exec_domain->signal_invmap
-		&& sig < 32
-		? current_thread_info()->exec_domain->signal_invmap[sig]
-		: sig;
-
-	err |= __put_user(usig, &frame->sig);
+	err |= __put_user(sig, &frame->sig);
 	err |= __put_user(&frame->info, &frame->pinfo);
 	err |= __put_user(&frame->info, &frame->pinfo);
 	err |= __put_user(&frame->uc, &frame->puc);
 	err |= __put_user(&frame->uc, &frame->puc);
 	err |= copy_siginfo_to_user(&frame->info, info);
 	err |= copy_siginfo_to_user(&frame->info, info);
 	if (err)
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 
 	/* Create the ucontext.  */
 	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
+	if (cpu_has_xsave)
+		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+	else
+		err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
 	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
 	err |= __put_user(sas_ss_flags(regs->sp),
 	err |= __put_user(sas_ss_flags(regs->sp),
 			  &frame->uc.uc_stack.ss_flags);
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
+	err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
 				regs, set->sig[0]);
 				regs, set->sig[0]);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	if (err)
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 
 	/* Set up to return from userspace.  */
 	/* Set up to return from userspace.  */
 	restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
 	restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -477,12 +462,12 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
 	err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
 
 
 	if (err)
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 
 	/* Set up registers for signal handler */
 	/* Set up registers for signal handler */
 	regs->sp = (unsigned long)frame;
 	regs->sp = (unsigned long)frame;
 	regs->ip = (unsigned long)ka->sa.sa_handler;
 	regs->ip = (unsigned long)ka->sa.sa_handler;
-	regs->ax = (unsigned long)usig;
+	regs->ax = (unsigned long)sig;
 	regs->dx = (unsigned long)&frame->info;
 	regs->dx = (unsigned long)&frame->info;
 	regs->cx = (unsigned long)&frame->uc;
 	regs->cx = (unsigned long)&frame->uc;
 
 
@@ -492,15 +477,38 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->cs = __USER_CS;
 	regs->cs = __USER_CS;
 
 
 	return 0;
 	return 0;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return -EFAULT;
 }
 }
 
 
 /*
 /*
  * OK, we're invoking a handler:
  * OK, we're invoking a handler:
  */
  */
+static int
+setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+	       sigset_t *set, struct pt_regs *regs)
+{
+	int ret;
+	int usig;
+
+	usig = current_thread_info()->exec_domain
+		&& current_thread_info()->exec_domain->signal_invmap
+		&& sig < 32
+		? current_thread_info()->exec_domain->signal_invmap[sig]
+		: sig;
+
+	/* Set up the stack frame */
+	if (ka->sa.sa_flags & SA_SIGINFO)
+		ret = __setup_rt_frame(usig, ka, info, set, regs);
+	else
+		ret = __setup_frame(usig, ka, set, regs);
+
+	if (ret) {
+		force_sigsegv(sig, current);
+		return -EFAULT;
+	}
+
+	return ret;
+}
+
 static int
 static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	      sigset_t *oldset, struct pt_regs *regs)
 	      sigset_t *oldset, struct pt_regs *regs)
@@ -508,9 +516,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	int ret;
 	int ret;
 
 
 	/* Are we from a system call? */
 	/* Are we from a system call? */
-	if ((long)regs->orig_ax >= 0) {
+	if (syscall_get_nr(current, regs) >= 0) {
 		/* If so, check system call restarting.. */
 		/* If so, check system call restarting.. */
-		switch (regs->ax) {
+		switch (syscall_get_error(current, regs)) {
 		case -ERESTART_RESTARTBLOCK:
 		case -ERESTART_RESTARTBLOCK:
 		case -ERESTARTNOHAND:
 		case -ERESTARTNOHAND:
 			regs->ax = -EINTR;
 			regs->ax = -EINTR;
@@ -537,11 +545,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
 	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
 		regs->flags &= ~X86_EFLAGS_TF;
 		regs->flags &= ~X86_EFLAGS_TF;
 
 
-	/* Set up the stack frame */
-	if (ka->sa.sa_flags & SA_SIGINFO)
-		ret = setup_rt_frame(sig, ka, info, oldset, regs);
-	else
-		ret = setup_frame(sig, ka, oldset, regs);
+	ret = setup_rt_frame(sig, ka, info, oldset, regs);
 
 
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
@@ -558,8 +562,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	 * handler too.
 	 * handler too.
 	 */
 	 */
 	regs->flags &= ~X86_EFLAGS_TF;
 	regs->flags &= ~X86_EFLAGS_TF;
-	if (test_thread_flag(TIF_SINGLESTEP))
-		ptrace_notify(SIGTRAP);
 
 
 	spin_lock_irq(&current->sighand->siglock);
 	spin_lock_irq(&current->sighand->siglock);
 	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
 	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
@@ -568,9 +570,13 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	recalc_sigpending();
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 	spin_unlock_irq(&current->sighand->siglock);
 
 
+	tracehook_signal_handler(sig, info, ka, regs,
+				 test_thread_flag(TIF_SINGLESTEP));
+
 	return 0;
 	return 0;
 }
 }
 
 
+#define NR_restart_syscall	__NR_restart_syscall
 /*
 /*
  * Note that 'init' is a special process: it doesn't get signals it doesn't
  * Note that 'init' is a special process: it doesn't get signals it doesn't
  * want to handle. Thus you cannot kill init even with a SIGKILL even by
  * want to handle. Thus you cannot kill init even with a SIGKILL even by
@@ -623,9 +629,9 @@ static void do_signal(struct pt_regs *regs)
 	}
 	}
 
 
 	/* Did we come from a system call? */
 	/* Did we come from a system call? */
-	if ((long)regs->orig_ax >= 0) {
+	if (syscall_get_nr(current, regs) >= 0) {
 		/* Restart the system call - no handlers present */
 		/* Restart the system call - no handlers present */
-		switch (regs->ax) {
+		switch (syscall_get_error(current, regs)) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
 		case -ERESTARTNOINTR:
@@ -634,7 +640,7 @@ static void do_signal(struct pt_regs *regs)
 			break;
 			break;
 
 
 		case -ERESTART_RESTARTBLOCK:
 		case -ERESTART_RESTARTBLOCK:
-			regs->ax = __NR_restart_syscall;
+			regs->ax = NR_restart_syscall;
 			regs->ip -= 2;
 			regs->ip -= 2;
 			break;
 			break;
 		}
 		}
@@ -661,5 +667,26 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 	if (thread_info_flags & _TIF_SIGPENDING)
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(regs);
 		do_signal(regs);
 
 
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
+
 	clear_thread_flag(TIF_IRET);
 	clear_thread_flag(TIF_IRET);
 }
 }
+
+void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
+{
+	struct task_struct *me = current;
+
+	if (show_unhandled_signals && printk_ratelimit()) {
+		printk(KERN_INFO
+		       "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
+		       me->comm, me->pid, where, frame,
+		       regs->ip, regs->sp, regs->orig_ax);
+		print_vma_addr(" in ", regs->ip);
+		printk(KERN_CONT "\n");
+	}
+
+	force_sig(SIGSEGV, me);
+}

+ 114 - 194
arch/x86/kernel/signal_64.c

@@ -15,17 +15,21 @@
 #include <linux/errno.h>
 #include <linux/errno.h>
 #include <linux/wait.h>
 #include <linux/wait.h>
 #include <linux/ptrace.h>
 #include <linux/ptrace.h>
+#include <linux/tracehook.h>
 #include <linux/unistd.h>
 #include <linux/unistd.h>
 #include <linux/stddef.h>
 #include <linux/stddef.h>
 #include <linux/personality.h>
 #include <linux/personality.h>
 #include <linux/compiler.h>
 #include <linux/compiler.h>
+#include <linux/uaccess.h>
+
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/ucontext.h>
 #include <asm/ucontext.h>
-#include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/i387.h>
 #include <asm/proto.h>
 #include <asm/proto.h>
 #include <asm/ia32_unistd.h>
 #include <asm/ia32_unistd.h>
 #include <asm/mce.h>
 #include <asm/mce.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
 #include "sigframe.h"
 #include "sigframe.h"
 
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
@@ -41,11 +45,6 @@
 # define FIX_EFLAGS	__FIX_EFLAGS
 # define FIX_EFLAGS	__FIX_EFLAGS
 #endif
 #endif
 
 
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-               sigset_t *set, struct pt_regs * regs); 
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
-            sigset_t *set, struct pt_regs * regs); 
-
 asmlinkage long
 asmlinkage long
 sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 		struct pt_regs *regs)
 		struct pt_regs *regs)
@@ -53,69 +52,6 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 	return do_sigaltstack(uss, uoss, regs->sp);
 	return do_sigaltstack(uss, uoss, regs->sp);
 }
 }
 
 
-/*
- * Signal frame handlers.
- */
-
-static inline int save_i387(struct _fpstate __user *buf)
-{
-	struct task_struct *tsk = current;
-	int err = 0;
-
-	BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
-			sizeof(tsk->thread.xstate->fxsave));
-
-	if ((unsigned long)buf % 16)
-		printk("save_i387: bad fpstate %p\n", buf);
-
-	if (!used_math())
-		return 0;
-	clear_used_math(); /* trigger finit */
-	if (task_thread_info(tsk)->status & TS_USEDFPU) {
-		err = save_i387_checking((struct i387_fxsave_struct __user *)
-					 buf);
-		if (err)
-			return err;
-		task_thread_info(tsk)->status &= ~TS_USEDFPU;
-		stts();
-	} else {
-		if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
-				   sizeof(struct i387_fxsave_struct)))
-			return -1;
-	}
-	return 1;
-}
-
-/*
- * This restores directly out of user space. Exceptions are handled.
- */
-static inline int restore_i387(struct _fpstate __user *buf)
-{
-	struct task_struct *tsk = current;
-	int err;
-
-	if (!used_math()) {
-		err = init_fpu(tsk);
-		if (err)
-			return err;
-	}
-
-	if (!(task_thread_info(current)->status & TS_USEDFPU)) {
-		clts();
-		task_thread_info(current)->status |= TS_USEDFPU;
-	}
-	err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
-	if (unlikely(err)) {
-		/*
-		 * Encountered an error while doing the restore from the
-		 * user buffer, clear the fpu state.
-		 */
-		clear_fpu(tsk);
-		clear_used_math();
-	}
-	return err;
-}
-
 /*
 /*
  * Do a signal return; undo the signal stack.
  * Do a signal return; undo the signal stack.
  */
  */
@@ -128,7 +64,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	/* Always make any pending restarted system calls return -EINTR */
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
 
-#define COPY(x)		err |= __get_user(regs->x, &sc->x)
+#define COPY(x)		(err |= __get_user(regs->x, &sc->x))
 
 
 	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
 	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
 	COPY(dx); COPY(cx); COPY(ip);
 	COPY(dx); COPY(cx); COPY(ip);
@@ -158,34 +94,21 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 	}
 	}
 
 
 	{
 	{
-		struct _fpstate __user * buf;
-		err |= __get_user(buf, &sc->fpstate);
+		void __user *buf;
 
 
-		if (buf) {
-			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
-				goto badframe;
-			err |= restore_i387(buf);
-		} else {
-			struct task_struct *me = current;
-			if (used_math()) {
-				clear_fpu(me);
-				clear_used_math();
-			}
-		}
+		err |= __get_user(buf, &sc->fpstate);
+		err |= restore_i387_xstate(buf);
 	}
 	}
 
 
 	err |= __get_user(*pax, &sc->ax);
 	err |= __get_user(*pax, &sc->ax);
 	return err;
 	return err;
-
-badframe:
-	return 1;
 }
 }
 
 
-asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+static long do_rt_sigreturn(struct pt_regs *regs)
 {
 {
 	struct rt_sigframe __user *frame;
 	struct rt_sigframe __user *frame;
-	sigset_t set;
 	unsigned long ax;
 	unsigned long ax;
+	sigset_t set;
 
 
 	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
 	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@@ -198,7 +121,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 	current->blocked = set;
 	current->blocked = set;
 	recalc_sigpending();
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
 	spin_unlock_irq(&current->sighand->siglock);
-	
+
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 		goto badframe;
 
 
@@ -208,16 +131,22 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 	return ax;
 	return ax;
 
 
 badframe:
 badframe:
-	signal_fault(regs,frame,"sigreturn");
+	signal_fault(regs, frame, "rt_sigreturn");
 	return 0;
 	return 0;
-}	
+}
+
+asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+{
+	return do_rt_sigreturn(regs);
+}
 
 
 /*
 /*
  * Set up a signal frame.
  * Set up a signal frame.
  */
  */
 
 
 static inline int
 static inline int
-setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me)
+setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
+		unsigned long mask, struct task_struct *me)
 {
 {
 	int err = 0;
 	int err = 0;
 
 
@@ -269,41 +198,40 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
 			sp = current->sas_ss_sp + current->sas_ss_size;
 			sp = current->sas_ss_sp + current->sas_ss_size;
 	}
 	}
 
 
-	return (void __user *)round_down(sp - size, 16);
+	return (void __user *)round_down(sp - size, 64);
 }
 }
 
 
-static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			   sigset_t *set, struct pt_regs * regs)
+static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+			    sigset_t *set, struct pt_regs *regs)
 {
 {
 	struct rt_sigframe __user *frame;
 	struct rt_sigframe __user *frame;
-	struct _fpstate __user *fp = NULL; 
+	void __user *fp = NULL;
 	int err = 0;
 	int err = 0;
 	struct task_struct *me = current;
 	struct task_struct *me = current;
 
 
 	if (used_math()) {
 	if (used_math()) {
-		fp = get_stack(ka, regs, sizeof(struct _fpstate)); 
+		fp = get_stack(ka, regs, sig_xstate_size);
 		frame = (void __user *)round_down(
 		frame = (void __user *)round_down(
 			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
 			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
 
 
-		if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
-			goto give_sigsegv;
-
-		if (save_i387(fp) < 0) 
-			err |= -1; 
+		if (save_i387_xstate(fp) < 0)
+			return -EFAULT;
 	} else
 	} else
 		frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
 		frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
 
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
+		return -EFAULT;
 
 
-	if (ka->sa.sa_flags & SA_SIGINFO) { 
-		err |= copy_siginfo_to_user(&frame->info, info);
-		if (err)
-			goto give_sigsegv;
+	if (ka->sa.sa_flags & SA_SIGINFO) {
+		if (copy_siginfo_to_user(&frame->info, info))
+			return -EFAULT;
 	}
 	}
-		
+
 	/* Create the ucontext.  */
 	/* Create the ucontext.  */
-	err |= __put_user(0, &frame->uc.uc_flags);
+	if (cpu_has_xsave)
+		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+	else
+		err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
 	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
 	err |= __put_user(sas_ss_flags(regs->sp),
 	err |= __put_user(sas_ss_flags(regs->sp),
@@ -311,9 +239,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me);
 	err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
 	err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
-	if (sizeof(*set) == 16) { 
+	if (sizeof(*set) == 16) {
 		__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
 		__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
-		__put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); 
+		__put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]);
 	} else
 	} else
 		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 
 
@@ -324,15 +252,15 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
 		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
 	} else {
 	} else {
 		/* could use a vstub here */
 		/* could use a vstub here */
-		goto give_sigsegv; 
+		return -EFAULT;
 	}
 	}
 
 
 	if (err)
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 
 	/* Set up registers for signal handler */
 	/* Set up registers for signal handler */
 	regs->di = sig;
 	regs->di = sig;
-	/* In case the signal handler was declared without prototypes */ 
+	/* In case the signal handler was declared without prototypes */
 	regs->ax = 0;
 	regs->ax = 0;
 
 
 	/* This also works for non SA_SIGINFO handlers because they expect the
 	/* This also works for non SA_SIGINFO handlers because they expect the
@@ -348,44 +276,34 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->cs = __USER_CS;
 	regs->cs = __USER_CS;
 
 
 	return 0;
 	return 0;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return -EFAULT;
 }
 }
 
 
 /*
 /*
- * Return -1L or the syscall number that @regs is executing.
+ * OK, we're invoking a handler
  */
  */
-static long current_syscall(struct pt_regs *regs)
+static int
+setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+	       sigset_t *set, struct pt_regs *regs)
 {
 {
-	/*
-	 * We always sign-extend a -1 value being set here,
-	 * so this is always either -1L or a syscall number.
-	 */
-	return regs->orig_ax;
-}
+	int ret;
 
 
-/*
- * Return a value that is -EFOO if the system call in @regs->orig_ax
- * returned an error.  This only works for @regs from @current.
- */
-static long current_syscall_ret(struct pt_regs *regs)
-{
 #ifdef CONFIG_IA32_EMULATION
 #ifdef CONFIG_IA32_EMULATION
-	if (test_thread_flag(TIF_IA32))
-		/*
-		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
-		 * and will match correctly in comparisons.
-		 */
-		return (int) regs->ax;
+	if (test_thread_flag(TIF_IA32)) {
+		if (ka->sa.sa_flags & SA_SIGINFO)
+			ret = ia32_setup_rt_frame(sig, ka, info, set, regs);
+		else
+			ret = ia32_setup_frame(sig, ka, set, regs);
+	} else
 #endif
 #endif
-	return regs->ax;
-}
+	ret = __setup_rt_frame(sig, ka, info, set, regs);
 
 
-/*
- * OK, we're invoking a handler
- */	
+	if (ret) {
+		force_sigsegv(sig, current);
+		return -EFAULT;
+	}
+
+	return ret;
+}
 
 
 static int
 static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
@@ -394,9 +312,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	int ret;
 	int ret;
 
 
 	/* Are we from a system call? */
 	/* Are we from a system call? */
-	if (current_syscall(regs) >= 0) {
+	if (syscall_get_nr(current, regs) >= 0) {
 		/* If so, check system call restarting.. */
 		/* If so, check system call restarting.. */
-		switch (current_syscall_ret(regs)) {
+		switch (syscall_get_error(current, regs)) {
 		case -ERESTART_RESTARTBLOCK:
 		case -ERESTART_RESTARTBLOCK:
 		case -ERESTARTNOHAND:
 		case -ERESTARTNOHAND:
 			regs->ax = -EINTR;
 			regs->ax = -EINTR;
@@ -423,50 +341,46 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
 	    likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
 		regs->flags &= ~X86_EFLAGS_TF;
 		regs->flags &= ~X86_EFLAGS_TF;
 
 
-#ifdef CONFIG_IA32_EMULATION
-	if (test_thread_flag(TIF_IA32)) {
-		if (ka->sa.sa_flags & SA_SIGINFO)
-			ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs);
-		else
-			ret = ia32_setup_frame(sig, ka, oldset, regs);
-	} else 
-#endif
 	ret = setup_rt_frame(sig, ka, info, oldset, regs);
 	ret = setup_rt_frame(sig, ka, info, oldset, regs);
 
 
-	if (ret == 0) {
-		/*
-		 * This has nothing to do with segment registers,
-		 * despite the name.  This magic affects uaccess.h
-		 * macros' behavior.  Reset it to the normal setting.
-		 */
-		set_fs(USER_DS);
+	if (ret)
+		return ret;
 
 
-		/*
-		 * Clear the direction flag as per the ABI for function entry.
-		 */
-		regs->flags &= ~X86_EFLAGS_DF;
+	/*
+	 * This has nothing to do with segment registers,
+	 * despite the name.  This magic affects uaccess.h
+	 * macros' behavior.  Reset it to the normal setting.
+	 */
+	set_fs(USER_DS);
 
 
-		/*
-		 * Clear TF when entering the signal handler, but
-		 * notify any tracer that was single-stepping it.
-		 * The tracer may want to single-step inside the
-		 * handler too.
-		 */
-		regs->flags &= ~X86_EFLAGS_TF;
-		if (test_thread_flag(TIF_SINGLESTEP))
-			ptrace_notify(SIGTRAP);
-
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
-		if (!(ka->sa.sa_flags & SA_NODEFER))
-			sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	/*
+	 * Clear the direction flag as per the ABI for function entry.
+	 */
+	regs->flags &= ~X86_EFLAGS_DF;
 
 
-	return ret;
+	/*
+	 * Clear TF when entering the signal handler, but
+	 * notify any tracer that was single-stepping it.
+	 * The tracer may want to single-step inside the
+	 * handler too.
+	 */
+	regs->flags &= ~X86_EFLAGS_TF;
+
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
+		sigaddset(&current->blocked, sig);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	tracehook_signal_handler(sig, info, ka, regs,
+				 test_thread_flag(TIF_SINGLESTEP));
+
+	return 0;
 }
 }
 
 
+#define NR_restart_syscall	\
+	test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
 /*
 /*
  * Note that 'init' is a special process: it doesn't get signals it doesn't
  * Note that 'init' is a special process: it doesn't get signals it doesn't
  * want to handle. Thus you cannot kill init even with a SIGKILL even by
  * want to handle. Thus you cannot kill init even with a SIGKILL even by
@@ -518,9 +432,9 @@ static void do_signal(struct pt_regs *regs)
 	}
 	}
 
 
 	/* Did we come from a system call? */
 	/* Did we come from a system call? */
-	if (current_syscall(regs) >= 0) {
+	if (syscall_get_nr(current, regs) >= 0) {
 		/* Restart the system call - no handlers present */
 		/* Restart the system call - no handlers present */
-		switch (current_syscall_ret(regs)) {
+		switch (syscall_get_error(current, regs)) {
 		case -ERESTARTNOHAND:
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
 		case -ERESTARTNOINTR:
@@ -528,9 +442,7 @@ static void do_signal(struct pt_regs *regs)
 			regs->ip -= 2;
 			regs->ip -= 2;
 			break;
 			break;
 		case -ERESTART_RESTARTBLOCK:
 		case -ERESTART_RESTARTBLOCK:
-			regs->ax = test_thread_flag(TIF_IA32) ?
-					__NR_ia32_restart_syscall :
-					__NR_restart_syscall;
+			regs->ax = NR_restart_syscall;
 			regs->ip -= 2;
 			regs->ip -= 2;
 			break;
 			break;
 		}
 		}
@@ -558,17 +470,25 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
 	/* deal with pending signal delivery */
 	/* deal with pending signal delivery */
 	if (thread_info_flags & _TIF_SIGPENDING)
 	if (thread_info_flags & _TIF_SIGPENDING)
 		do_signal(regs);
 		do_signal(regs);
+
+	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+		clear_thread_flag(TIF_NOTIFY_RESUME);
+		tracehook_notify_resume(regs);
+	}
 }
 }
 
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
-{ 
-	struct task_struct *me = current; 
+{
+	struct task_struct *me = current;
+
 	if (show_unhandled_signals && printk_ratelimit()) {
 	if (show_unhandled_signals && printk_ratelimit()) {
-		printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
-	       me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax);
+		printk(KERN_INFO
+		       "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
+		       me->comm, me->pid, where, frame,
+		       regs->ip, regs->sp, regs->orig_ax);
 		print_vma_addr(" in ", regs->ip);
 		print_vma_addr(" in ", regs->ip);
-		printk("\n");
+		printk(KERN_CONT "\n");
 	}
 	}
 
 
-	force_sig(SIGSEGV, me); 
-} 
+	force_sig(SIGSEGV, me);
+}

+ 22 - 25
arch/x86/kernel/smpboot.c

@@ -88,7 +88,7 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
 #define get_idle_for_cpu(x)      (per_cpu(idle_thread_array, x))
 #define get_idle_for_cpu(x)      (per_cpu(idle_thread_array, x))
 #define set_idle_for_cpu(x, p)   (per_cpu(idle_thread_array, x) = (p))
 #define set_idle_for_cpu(x, p)   (per_cpu(idle_thread_array, x) = (p))
 #else
 #else
-struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
+static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
 #define get_idle_for_cpu(x)      (idle_thread_array[(x)])
 #define get_idle_for_cpu(x)      (idle_thread_array[(x)])
 #define set_idle_for_cpu(x, p)   (idle_thread_array[(x)] = (p))
 #define set_idle_for_cpu(x, p)   (idle_thread_array[(x)] = (p))
 #endif
 #endif
@@ -123,13 +123,12 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
 
 
 static atomic_t init_deasserted;
 static atomic_t init_deasserted;
 
 
-static int boot_cpu_logical_apicid;
 
 
 /* representing cpus for which sibling maps can be computed */
 /* representing cpus for which sibling maps can be computed */
 static cpumask_t cpu_sibling_setup_map;
 static cpumask_t cpu_sibling_setup_map;
 
 
 /* Set if we find a B stepping CPU */
 /* Set if we find a B stepping CPU */
-int __cpuinitdata smp_b_stepping;
+static int __cpuinitdata smp_b_stepping;
 
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
 
 
@@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu)
 #endif
 #endif
 
 
 #ifdef CONFIG_X86_32
 #ifdef CONFIG_X86_32
+static int boot_cpu_logical_apicid;
+
 u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
 u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
 					{ [0 ... NR_CPUS-1] = BAD_APICID };
 					{ [0 ... NR_CPUS-1] = BAD_APICID };
 
 
@@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void)
 	/*
 	/*
 	 * (This works even if the APIC is not enabled.)
 	 * (This works even if the APIC is not enabled.)
 	 */
 	 */
-	phys_id = GET_APIC_ID(read_apic_id());
+	phys_id = read_apic_id();
 	cpuid = smp_processor_id();
 	cpuid = smp_processor_id();
 	if (cpu_isset(cpuid, cpu_callin_map)) {
 	if (cpu_isset(cpuid, cpu_callin_map)) {
 		panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
 		panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@ -550,8 +551,7 @@ static inline void __inquire_remote_apic(int apicid)
 			printk(KERN_CONT
 			printk(KERN_CONT
 			       "a previous APIC delivery may have failed\n");
 			       "a previous APIC delivery may have failed\n");
 
 
-		apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
-		apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
+		apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
 
 
 		timeout = 0;
 		timeout = 0;
 		do {
 		do {
@@ -583,11 +583,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
 	int maxlvt;
 	int maxlvt;
 
 
 	/* Target chip */
 	/* Target chip */
-	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
-
 	/* Boot on the stack */
 	/* Boot on the stack */
 	/* Kick the second */
 	/* Kick the second */
-	apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
+	apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
 
 
 	pr_debug("Waiting for send to finish...\n");
 	pr_debug("Waiting for send to finish...\n");
 	send_status = safe_apic_wait_icr_idle();
 	send_status = safe_apic_wait_icr_idle();
@@ -640,13 +638,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 	/*
 	/*
 	 * Turn INIT on target chip
 	 * Turn INIT on target chip
 	 */
 	 */
-	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
 	/*
 	/*
 	 * Send IPI
 	 * Send IPI
 	 */
 	 */
-	apic_write(APIC_ICR,
-		   APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
+	apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
+		       phys_apicid);
 
 
 	pr_debug("Waiting for send to finish...\n");
 	pr_debug("Waiting for send to finish...\n");
 	send_status = safe_apic_wait_icr_idle();
 	send_status = safe_apic_wait_icr_idle();
@@ -656,10 +652,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 	pr_debug("Deasserting INIT.\n");
 	pr_debug("Deasserting INIT.\n");
 
 
 	/* Target chip */
 	/* Target chip */
-	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
 	/* Send IPI */
 	/* Send IPI */
-	apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+	apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
 
 
 	pr_debug("Waiting for send to finish...\n");
 	pr_debug("Waiting for send to finish...\n");
 	send_status = safe_apic_wait_icr_idle();
 	send_status = safe_apic_wait_icr_idle();
@@ -702,11 +696,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 		 */
 		 */
 
 
 		/* Target chip */
 		/* Target chip */
-		apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
 		/* Boot on the stack */
 		/* Boot on the stack */
 		/* Kick the second */
 		/* Kick the second */
-		apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12));
+		apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
+			       phys_apicid);
 
 
 		/*
 		/*
 		 * Give the other CPU some time to accept the IPI.
 		 * Give the other CPU some time to accept the IPI.
@@ -1175,10 +1168,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	 * Setup boot CPU information
 	 * Setup boot CPU information
 	 */
 	 */
 	smp_store_cpu_info(0); /* Final full version of the data */
 	smp_store_cpu_info(0); /* Final full version of the data */
+#ifdef CONFIG_X86_32
 	boot_cpu_logical_apicid = logical_smp_processor_id();
 	boot_cpu_logical_apicid = logical_smp_processor_id();
+#endif
 	current_thread_info()->cpu = 0;  /* needed? */
 	current_thread_info()->cpu = 0;  /* needed? */
 	set_cpu_sibling_map(0);
 	set_cpu_sibling_map(0);
 
 
+#ifdef CONFIG_X86_64
+	enable_IR_x2apic();
+	setup_apic_routing();
+#endif
+
 	if (smp_sanity_check(max_cpus) < 0) {
 	if (smp_sanity_check(max_cpus) < 0) {
 		printk(KERN_INFO "SMP disabled\n");
 		printk(KERN_INFO "SMP disabled\n");
 		disable_smp();
 		disable_smp();
@@ -1186,9 +1186,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 	}
 	}
 
 
 	preempt_disable();
 	preempt_disable();
-	if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) {
+	if (read_apic_id() != boot_cpu_physical_apicid) {
 		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
 		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
-		     GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid);
+		     read_apic_id(), boot_cpu_physical_apicid);
 		/* Or can we switch back to PIC here? */
 		/* Or can we switch back to PIC here? */
 	}
 	}
 	preempt_enable();
 	preempt_enable();
@@ -1313,16 +1313,13 @@ __init void prefill_possible_map(void)
 	if (!num_processors)
 	if (!num_processors)
 		num_processors = 1;
 		num_processors = 1;
 
 
-#ifdef CONFIG_HOTPLUG_CPU
 	if (additional_cpus == -1) {
 	if (additional_cpus == -1) {
 		if (disabled_cpus > 0)
 		if (disabled_cpus > 0)
 			additional_cpus = disabled_cpus;
 			additional_cpus = disabled_cpus;
 		else
 		else
 			additional_cpus = 0;
 			additional_cpus = 0;
 	}
 	}
-#else
-	additional_cpus = 0;
-#endif
+
 	possible = num_processors + additional_cpus;
 	possible = num_processors + additional_cpus;
 	if (possible > NR_CPUS)
 	if (possible > NR_CPUS)
 		possible = NR_CPUS;
 		possible = NR_CPUS;

+ 1 - 1
arch/x86/kernel/summit_32.c

@@ -30,7 +30,7 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <asm/io.h>
 #include <asm/io.h>
 #include <asm/bios_ebda.h>
 #include <asm/bios_ebda.h>
-#include <asm/mach-summit/mach_mpparse.h>
+#include <asm/summit/mpparse.h>
 
 
 static struct rio_table_hdr *rio_table_hdr __initdata;
 static struct rio_table_hdr *rio_table_hdr __initdata;
 static struct scal_detail   *scal_devs[MAX_NUMNODES] __initdata;
 static struct scal_detail   *scal_devs[MAX_NUMNODES] __initdata;

+ 2 - 0
arch/x86/kernel/sys_i386_32.c

@@ -22,6 +22,8 @@
 #include <linux/uaccess.h>
 #include <linux/uaccess.h>
 #include <linux/unistd.h>
 #include <linux/unistd.h>
 
 
+#include <asm/syscalls.h>
+
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			  unsigned long prot, unsigned long flags,
 			  unsigned long prot, unsigned long flags,
 			  unsigned long fd, unsigned long pgoff)
 			  unsigned long fd, unsigned long pgoff)

+ 23 - 21
arch/x86/kernel/sys_x86_64.c

@@ -13,15 +13,17 @@
 #include <linux/utsname.h>
 #include <linux/utsname.h>
 #include <linux/personality.h>
 #include <linux/personality.h>
 #include <linux/random.h>
 #include <linux/random.h>
+#include <linux/uaccess.h>
 
 
-#include <asm/uaccess.h>
 #include <asm/ia32.h>
 #include <asm/ia32.h>
+#include <asm/syscalls.h>
 
 
-asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
-	unsigned long fd, unsigned long off)
+asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
+		unsigned long prot, unsigned long flags,
+		unsigned long fd, unsigned long off)
 {
 {
 	long error;
 	long error;
-	struct file * file;
+	struct file *file;
 
 
 	error = -EINVAL;
 	error = -EINVAL;
 	if (off & ~PAGE_MASK)
 	if (off & ~PAGE_MASK)
@@ -56,9 +58,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
 		   unmapped base down for this case. This can give
 		   unmapped base down for this case. This can give
 		   conflicts with the heap, but we assume that glibc
 		   conflicts with the heap, but we assume that glibc
 		   malloc knows how to fall back to mmap. Give it 1GB
 		   malloc knows how to fall back to mmap. Give it 1GB
-		   of playground for now. -AK */ 
-		*begin = 0x40000000; 
-		*end = 0x80000000;		
+		   of playground for now. -AK */
+		*begin = 0x40000000;
+		*end = 0x80000000;
 		if (current->flags & PF_RANDOMIZE) {
 		if (current->flags & PF_RANDOMIZE) {
 			new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
 			new_begin = randomize_range(*begin, *begin + 0x02000000, 0);
 			if (new_begin)
 			if (new_begin)
@@ -66,9 +68,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
 		}
 		}
 	} else {
 	} else {
 		*begin = TASK_UNMAPPED_BASE;
 		*begin = TASK_UNMAPPED_BASE;
-		*end = TASK_SIZE; 
+		*end = TASK_SIZE;
 	}
 	}
-} 
+}
 
 
 unsigned long
 unsigned long
 arch_get_unmapped_area(struct file *filp, unsigned long addr,
 arch_get_unmapped_area(struct file *filp, unsigned long addr,
@@ -78,11 +80,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	struct vm_area_struct *vma;
 	struct vm_area_struct *vma;
 	unsigned long start_addr;
 	unsigned long start_addr;
 	unsigned long begin, end;
 	unsigned long begin, end;
-	
+
 	if (flags & MAP_FIXED)
 	if (flags & MAP_FIXED)
 		return addr;
 		return addr;
 
 
-	find_start_end(flags, &begin, &end); 
+	find_start_end(flags, &begin, &end);
 
 
 	if (len > end)
 	if (len > end)
 		return -ENOMEM;
 		return -ENOMEM;
@@ -96,12 +98,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 	}
 	}
 	if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32))
 	if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32))
 	    && len <= mm->cached_hole_size) {
 	    && len <= mm->cached_hole_size) {
-	        mm->cached_hole_size = 0;
+		mm->cached_hole_size = 0;
 		mm->free_area_cache = begin;
 		mm->free_area_cache = begin;
 	}
 	}
 	addr = mm->free_area_cache;
 	addr = mm->free_area_cache;
-	if (addr < begin) 
-		addr = begin; 
+	if (addr < begin)
+		addr = begin;
 	start_addr = addr;
 	start_addr = addr;
 
 
 full_search:
 full_search:
@@ -127,7 +129,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 			return addr;
 			return addr;
 		}
 		}
 		if (addr + mm->cached_hole_size < vma->vm_start)
 		if (addr + mm->cached_hole_size < vma->vm_start)
-		        mm->cached_hole_size = vma->vm_start - addr;
+			mm->cached_hole_size = vma->vm_start - addr;
 
 
 		addr = vma->vm_end;
 		addr = vma->vm_end;
 	}
 	}
@@ -177,7 +179,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 		vma = find_vma(mm, addr-len);
 		vma = find_vma(mm, addr-len);
 		if (!vma || addr <= vma->vm_start)
 		if (!vma || addr <= vma->vm_start)
 			/* remember the address as a hint for next time */
 			/* remember the address as a hint for next time */
-			return (mm->free_area_cache = addr-len);
+			return mm->free_area_cache = addr-len;
 	}
 	}
 
 
 	if (mm->mmap_base < len)
 	if (mm->mmap_base < len)
@@ -194,7 +196,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 		vma = find_vma(mm, addr);
 		vma = find_vma(mm, addr);
 		if (!vma || addr+len <= vma->vm_start)
 		if (!vma || addr+len <= vma->vm_start)
 			/* remember the address as a hint for next time */
 			/* remember the address as a hint for next time */
-			return (mm->free_area_cache = addr);
+			return mm->free_area_cache = addr;
 
 
 		/* remember the largest hole we saw so far */
 		/* remember the largest hole we saw so far */
 		if (addr + mm->cached_hole_size < vma->vm_start)
 		if (addr + mm->cached_hole_size < vma->vm_start)
@@ -224,13 +226,13 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 }
 }
 
 
 
 
-asmlinkage long sys_uname(struct new_utsname __user * name)
+asmlinkage long sys_uname(struct new_utsname __user *name)
 {
 {
 	int err;
 	int err;
 	down_read(&uts_sem);
 	down_read(&uts_sem);
-	err = copy_to_user(name, utsname(), sizeof (*name));
+	err = copy_to_user(name, utsname(), sizeof(*name));
 	up_read(&uts_sem);
 	up_read(&uts_sem);
-	if (personality(current->personality) == PER_LINUX32) 
-		err |= copy_to_user(&name->machine, "i686", 5); 		
+	if (personality(current->personality) == PER_LINUX32)
+		err |= copy_to_user(&name->machine, "i686", 5);
 	return err ? -EFAULT : 0;
 	return err ? -EFAULT : 0;
 }
 }

+ 2 - 2
arch/x86/kernel/syscall_64.c

@@ -8,12 +8,12 @@
 #define __NO_STUBS
 #define __NO_STUBS
 
 
 #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
 #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
 #include <asm/unistd_64.h>
 #include <asm/unistd_64.h>
 
 
 #undef __SYSCALL
 #undef __SYSCALL
 #define __SYSCALL(nr, sym) [nr] = sym,
 #define __SYSCALL(nr, sym) [nr] = sym,
-#undef _ASM_X86_64_UNISTD_H_
+#undef ASM_X86__UNISTD_64_H
 
 
 typedef void (*sys_call_ptr_t)(void);
 typedef void (*sys_call_ptr_t)(void);
 
 

+ 1 - 0
arch/x86/kernel/time_32.c

@@ -36,6 +36,7 @@
 #include <asm/arch_hooks.h>
 #include <asm/arch_hooks.h>
 #include <asm/hpet.h>
 #include <asm/hpet.h>
 #include <asm/time.h>
 #include <asm/time.h>
+#include <asm/timer.h>
 
 
 #include "do_timer.h"
 #include "do_timer.h"
 
 

+ 1 - 0
arch/x86/kernel/tls.c

@@ -10,6 +10,7 @@
 #include <asm/ldt.h>
 #include <asm/ldt.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/proto.h>
 #include <asm/proto.h>
+#include <asm/syscalls.h>
 
 
 #include "tls.h"
 #include "tls.h"
 
 

+ 0 - 1
arch/x86/kernel/traps_32.c

@@ -1228,7 +1228,6 @@ void __init trap_init(void)
 
 
 	set_bit(SYSCALL_VECTOR, used_vectors);
 	set_bit(SYSCALL_VECTOR, used_vectors);
 
 
-	init_thread_xstate();
 	/*
 	/*
 	 * Should be a barrier for any external CPU state:
 	 * Should be a barrier for any external CPU state:
 	 */
 	 */

+ 35 - 32
arch/x86/kernel/traps_64.c

@@ -32,6 +32,8 @@
 #include <linux/bug.h>
 #include <linux/bug.h>
 #include <linux/nmi.h>
 #include <linux/nmi.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/io.h>
 
 
 #if defined(CONFIG_EDAC)
 #if defined(CONFIG_EDAC)
 #include <linux/edac.h>
 #include <linux/edac.h>
@@ -45,9 +47,6 @@
 #include <asm/unwind.h>
 #include <asm/unwind.h>
 #include <asm/desc.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
 #include <asm/i387.h>
-#include <asm/nmi.h>
-#include <asm/smp.h>
-#include <asm/io.h>
 #include <asm/pgalloc.h>
 #include <asm/pgalloc.h>
 #include <asm/proto.h>
 #include <asm/proto.h>
 #include <asm/pda.h>
 #include <asm/pda.h>
@@ -85,7 +84,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
 
 
 void printk_address(unsigned long address, int reliable)
 void printk_address(unsigned long address, int reliable)
 {
 {
-	printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
+	printk(" [<%016lx>] %s%pS\n",
+			address, reliable ?	"" : "? ", (void *) address);
 }
 }
 
 
 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -98,7 +98,8 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 		[STACKFAULT_STACK - 1] = "#SS",
 		[STACKFAULT_STACK - 1] = "#SS",
 		[MCE_STACK - 1] = "#MC",
 		[MCE_STACK - 1] = "#MC",
 #if DEBUG_STKSZ > EXCEPTION_STKSZ
 #if DEBUG_STKSZ > EXCEPTION_STKSZ
-		[N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
+		[N_EXCEPTION_STACKS ...
+			N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
 #endif
 #endif
 	};
 	};
 	unsigned k;
 	unsigned k;
@@ -163,7 +164,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 }
 }
 
 
 /*
 /*
- * x86-64 can have up to three kernel stacks: 
+ * x86-64 can have up to three kernel stacks:
  * process stack
  * process stack
  * interrupt stack
  * interrupt stack
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
@@ -219,7 +220,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		const struct stacktrace_ops *ops, void *data)
 		const struct stacktrace_ops *ops, void *data)
 {
 {
 	const unsigned cpu = get_cpu();
 	const unsigned cpu = get_cpu();
-	unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
+	unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
 	unsigned used = 0;
 	unsigned used = 0;
 	struct thread_info *tinfo;
 	struct thread_info *tinfo;
 
 
@@ -237,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 	if (!bp) {
 	if (!bp) {
 		if (task == current) {
 		if (task == current) {
 			/* Grab bp right from our regs */
 			/* Grab bp right from our regs */
-			asm("movq %%rbp, %0" : "=r" (bp) :);
+			asm("movq %%rbp, %0" : "=r" (bp) : );
 		} else {
 		} else {
 			/* bp is the last reg pushed by switch_to */
 			/* bp is the last reg pushed by switch_to */
 			bp = *(unsigned long *) task->thread.sp;
 			bp = *(unsigned long *) task->thread.sp;
@@ -357,11 +358,15 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	unsigned long *stack;
 	unsigned long *stack;
 	int i;
 	int i;
 	const int cpu = smp_processor_id();
 	const int cpu = smp_processor_id();
-	unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
-	unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+	unsigned long *irqstack_end =
+		(unsigned long *) (cpu_pda(cpu)->irqstackptr);
+	unsigned long *irqstack =
+		(unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
 
 
-	// debugging aid: "show_stack(NULL, NULL);" prints the
-	// back trace for this cpu.
+	/*
+	 * debugging aid: "show_stack(NULL, NULL);" prints the
+	 * back trace for this cpu.
+	 */
 
 
 	if (sp == NULL) {
 	if (sp == NULL) {
 		if (task)
 		if (task)
@@ -404,7 +409,7 @@ void dump_stack(void)
 
 
 #ifdef CONFIG_FRAME_POINTER
 #ifdef CONFIG_FRAME_POINTER
 	if (!bp)
 	if (!bp)
-		asm("movq %%rbp, %0" : "=r" (bp):);
+		asm("movq %%rbp, %0" : "=r" (bp) : );
 #endif
 #endif
 
 
 	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
 	printk("Pid: %d, comm: %.20s %s %s %.*s\n",
@@ -414,7 +419,6 @@ void dump_stack(void)
 		init_utsname()->version);
 		init_utsname()->version);
 	show_trace(NULL, NULL, &stack, bp);
 	show_trace(NULL, NULL, &stack, bp);
 }
 }
-
 EXPORT_SYMBOL(dump_stack);
 EXPORT_SYMBOL(dump_stack);
 
 
 void show_registers(struct pt_regs *regs)
 void show_registers(struct pt_regs *regs)
@@ -493,7 +497,7 @@ unsigned __kprobes long oops_begin(void)
 	raw_local_irq_save(flags);
 	raw_local_irq_save(flags);
 	cpu = smp_processor_id();
 	cpu = smp_processor_id();
 	if (!__raw_spin_trylock(&die_lock)) {
 	if (!__raw_spin_trylock(&die_lock)) {
-		if (cpu == die_owner) 
+		if (cpu == die_owner)
 			/* nested oops. should stop eventually */;
 			/* nested oops. should stop eventually */;
 		else
 		else
 			__raw_spin_lock(&die_lock);
 			__raw_spin_lock(&die_lock);
@@ -638,7 +642,7 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
 }
 }
 
 
 #define DO_ERROR(trapnr, signr, str, name) \
 #define DO_ERROR(trapnr, signr, str, name) \
-asmlinkage void do_##name(struct pt_regs * regs, long error_code)	\
+asmlinkage void do_##name(struct pt_regs *regs, long error_code)	\
 {									\
 {									\
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
 							== NOTIFY_STOP)	\
 							== NOTIFY_STOP)	\
@@ -648,7 +652,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code)	\
 }
 }
 
 
 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)		\
 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)		\
-asmlinkage void do_##name(struct pt_regs * regs, long error_code)	\
+asmlinkage void do_##name(struct pt_regs *regs, long error_code)	\
 {									\
 {									\
 	siginfo_t info;							\
 	siginfo_t info;							\
 	info.si_signo = signr;						\
 	info.si_signo = signr;						\
@@ -683,7 +687,7 @@ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
 	preempt_conditional_cli(regs);
 	preempt_conditional_cli(regs);
 }
 }
 
 
-asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
+asmlinkage void do_double_fault(struct pt_regs *regs, long error_code)
 {
 {
 	static const char str[] = "double fault";
 	static const char str[] = "double fault";
 	struct task_struct *tsk = current;
 	struct task_struct *tsk = current;
@@ -778,9 +782,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
 }
 }
 
 
 static notrace __kprobes void
 static notrace __kprobes void
-unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
 {
-	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
+			NOTIFY_STOP)
 		return;
 		return;
 	printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
 	printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
 		reason);
 		reason);
@@ -882,7 +887,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 	else if (user_mode(eregs))
 	else if (user_mode(eregs))
 		regs = task_pt_regs(current);
 		regs = task_pt_regs(current);
 	/* Exception from kernel and interrupts are enabled. Move to
 	/* Exception from kernel and interrupts are enabled. Move to
- 	   kernel process stack. */
+	   kernel process stack. */
 	else if (eregs->flags & X86_EFLAGS_IF)
 	else if (eregs->flags & X86_EFLAGS_IF)
 		regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
 		regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
 	if (eregs != regs)
 	if (eregs != regs)
@@ -891,7 +896,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
 }
 }
 
 
 /* runs on IST stack. */
 /* runs on IST stack. */
-asmlinkage void __kprobes do_debug(struct pt_regs * regs,
+asmlinkage void __kprobes do_debug(struct pt_regs *regs,
 				   unsigned long error_code)
 				   unsigned long error_code)
 {
 {
 	struct task_struct *tsk = current;
 	struct task_struct *tsk = current;
@@ -1035,7 +1040,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs)
 
 
 asmlinkage void bad_intr(void)
 asmlinkage void bad_intr(void)
 {
 {
-	printk("bad interrupt"); 
+	printk("bad interrupt");
 }
 }
 
 
 asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
 asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
@@ -1047,7 +1052,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
 
 
 	conditional_sti(regs);
 	conditional_sti(regs);
 	if (!user_mode(regs) &&
 	if (!user_mode(regs) &&
-        	kernel_math_error(regs, "kernel simd math error", 19))
+			kernel_math_error(regs, "kernel simd math error", 19))
 		return;
 		return;
 
 
 	/*
 	/*
@@ -1092,7 +1097,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
 	force_sig_info(SIGFPE, &info, task);
 	force_sig_info(SIGFPE, &info, task);
 }
 }
 
 
-asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs)
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs)
 {
 {
 }
 }
 
 
@@ -1134,7 +1139,7 @@ asmlinkage void math_state_restore(void)
 	/*
 	/*
 	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
 	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
 	 */
 	 */
-	if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) {
+	if (unlikely(restore_fpu_checking(me))) {
 		stts();
 		stts();
 		force_sig(SIGSEGV, me);
 		force_sig(SIGSEGV, me);
 		return;
 		return;
@@ -1149,8 +1154,10 @@ void __init trap_init(void)
 	set_intr_gate(0, &divide_error);
 	set_intr_gate(0, &divide_error);
 	set_intr_gate_ist(1, &debug, DEBUG_STACK);
 	set_intr_gate_ist(1, &debug, DEBUG_STACK);
 	set_intr_gate_ist(2, &nmi, NMI_STACK);
 	set_intr_gate_ist(2, &nmi, NMI_STACK);
- 	set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */
-	set_system_gate(4, &overflow); /* int4 can be called from all */
+	/* int3 can be called from all */
+	set_system_gate_ist(3, &int3, DEBUG_STACK);
+	/* int4 can be called from all */
+	set_system_gate(4, &overflow);
 	set_intr_gate(5, &bounds);
 	set_intr_gate(5, &bounds);
 	set_intr_gate(6, &invalid_op);
 	set_intr_gate(6, &invalid_op);
 	set_intr_gate(7, &device_not_available);
 	set_intr_gate(7, &device_not_available);
@@ -1172,10 +1179,6 @@ void __init trap_init(void)
 #ifdef CONFIG_IA32_EMULATION
 #ifdef CONFIG_IA32_EMULATION
 	set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
 	set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
 #endif
 #endif
-	/*
-	 * initialize the per thread extended state:
-	 */
-	init_thread_xstate();
 	/*
 	/*
 	 * Should be a barrier for any external CPU state:
 	 * Should be a barrier for any external CPU state:
 	 */
 	 */

+ 1 - 15
arch/x86/kernel/visws_quirks.c

@@ -25,45 +25,31 @@
 #include <asm/visws/cobalt.h>
 #include <asm/visws/cobalt.h>
 #include <asm/visws/piix4.h>
 #include <asm/visws/piix4.h>
 #include <asm/arch_hooks.h>
 #include <asm/arch_hooks.h>
+#include <asm/io_apic.h>
 #include <asm/fixmap.h>
 #include <asm/fixmap.h>
 #include <asm/reboot.h>
 #include <asm/reboot.h>
 #include <asm/setup.h>
 #include <asm/setup.h>
 #include <asm/e820.h>
 #include <asm/e820.h>
-#include <asm/smp.h>
 #include <asm/io.h>
 #include <asm/io.h>
 
 
 #include <mach_ipi.h>
 #include <mach_ipi.h>
 
 
 #include "mach_apic.h"
 #include "mach_apic.h"
 
 
-#include <linux/init.h>
-#include <linux/smp.h>
-
 #include <linux/kernel_stat.h>
 #include <linux/kernel_stat.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
 
 
-#include <asm/io.h>
-#include <asm/apic.h>
 #include <asm/i8259.h>
 #include <asm/i8259.h>
 #include <asm/irq_vectors.h>
 #include <asm/irq_vectors.h>
-#include <asm/visws/cobalt.h>
 #include <asm/visws/lithium.h>
 #include <asm/visws/lithium.h>
-#include <asm/visws/piix4.h>
 
 
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/pci.h>
 #include <linux/pci_ids.h>
 #include <linux/pci_ids.h>
 
 
 extern int no_broadcast;
 extern int no_broadcast;
 
 
-#include <asm/io.h>
 #include <asm/apic.h>
 #include <asm/apic.h>
-#include <asm/arch_hooks.h>
-#include <asm/visws/cobalt.h>
-#include <asm/visws/lithium.h>
 
 
 char visws_board_type	= -1;
 char visws_board_type	= -1;
 char visws_board_rev	= -1;
 char visws_board_rev	= -1;

+ 1 - 0
arch/x86/kernel/vm86_32.c

@@ -46,6 +46,7 @@
 #include <asm/io.h>
 #include <asm/io.h>
 #include <asm/tlbflush.h>
 #include <asm/tlbflush.h>
 #include <asm/irq.h>
 #include <asm/irq.h>
+#include <asm/syscalls.h>
 
 
 /*
 /*
  * Known problems:
  * Known problems:

+ 2 - 2
arch/x86/kernel/vmi_32.c

@@ -905,8 +905,8 @@ static inline int __init activate_vmi(void)
 #endif
 #endif
 
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #ifdef CONFIG_X86_LOCAL_APIC
-	para_fill(pv_apic_ops.apic_read, APICRead);
-	para_fill(pv_apic_ops.apic_write, APICWrite);
+       para_fill(apic_ops->read, APICRead);
+       para_fill(apic_ops->write, APICWrite);
 #endif
 #endif
 
 
 	/*
 	/*

+ 316 - 0
arch/x86/kernel/xsave.c

@@ -0,0 +1,316 @@
+/*
+ * xsave/xrstor support.
+ *
+ * Author: Suresh Siddha <suresh.b.siddha@intel.com>
+ */
+#include <linux/bootmem.h>
+#include <linux/compat.h>
+#include <asm/i387.h>
+#ifdef CONFIG_IA32_EMULATION
+#include <asm/sigcontext32.h>
+#endif
+#include <asm/xcr.h>
+
+/*
+ * Supported feature mask by the CPU and the kernel.
+ */
+u64 pcntxt_mask;
+
+struct _fpx_sw_bytes fx_sw_reserved;
+#ifdef CONFIG_IA32_EMULATION
+struct _fpx_sw_bytes fx_sw_reserved_ia32;
+#endif
+
+/*
+ * Check for the presence of extended state information in the
+ * user fpstate pointer in the sigcontext.
+ */
+int check_for_xstate(struct i387_fxsave_struct __user *buf,
+		     void __user *fpstate,
+		     struct _fpx_sw_bytes *fx_sw_user)
+{
+	int min_xstate_size = sizeof(struct i387_fxsave_struct) +
+			      sizeof(struct xsave_hdr_struct);
+	unsigned int magic2;
+	int err;
+
+	err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],
+			       sizeof(struct _fpx_sw_bytes));
+
+	if (err)
+		return err;
+
+	/*
+	 * First Magic check failed.
+	 */
+	if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1)
+		return -1;
+
+	/*
+	 * Check for error scenarios.
+	 */
+	if (fx_sw_user->xstate_size < min_xstate_size ||
+	    fx_sw_user->xstate_size > xstate_size ||
+	    fx_sw_user->xstate_size > fx_sw_user->extended_size)
+		return -1;
+
+	err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
+					    fx_sw_user->extended_size -
+					    FP_XSTATE_MAGIC2_SIZE));
+	/*
+	 * Check for the presence of second magic word at the end of memory
+	 * layout. This detects the case where the user just copied the legacy
+	 * fpstate layout with out copying the extended state information
+	 * in the memory layout.
+	 */
+	if (err || magic2 != FP_XSTATE_MAGIC2)
+		return -1;
+
+	return 0;
+}
+
+#ifdef CONFIG_X86_64
+/*
+ * Signal frame handlers.
+ */
+
+int save_i387_xstate(void __user *buf)
+{
+	struct task_struct *tsk = current;
+	int err = 0;
+
+	if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size))
+		return -EACCES;
+
+	BUG_ON(sig_xstate_size < xstate_size);
+
+	if ((unsigned long)buf % 64)
+		printk("save_i387_xstate: bad fpstate %p\n", buf);
+
+	if (!used_math())
+		return 0;
+	clear_used_math(); /* trigger finit */
+	if (task_thread_info(tsk)->status & TS_USEDFPU) {
+		/*
+	 	 * Start with clearing the user buffer. This will present a
+	 	 * clean context for the bytes not touched by the fxsave/xsave.
+		 */
+		__clear_user(buf, sig_xstate_size);
+
+		if (task_thread_info(tsk)->status & TS_XSAVE)
+			err = xsave_user(buf);
+		else
+			err = fxsave_user(buf);
+
+		if (err)
+			return err;
+		task_thread_info(tsk)->status &= ~TS_USEDFPU;
+		stts();
+	} else {
+		if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
+				   xstate_size))
+			return -1;
+	}
+
+	if (task_thread_info(tsk)->status & TS_XSAVE) {
+		struct _fpstate __user *fx = buf;
+
+		err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
+				     sizeof(struct _fpx_sw_bytes));
+
+		err |= __put_user(FP_XSTATE_MAGIC2,
+				  (__u32 __user *) (buf + sig_xstate_size
+						    - FP_XSTATE_MAGIC2_SIZE));
+	}
+
+	return 1;
+}
+
+/*
+ * Restore the extended state if present. Otherwise, restore the FP/SSE
+ * state.
+ */
+int restore_user_xstate(void __user *buf)
+{
+	struct _fpx_sw_bytes fx_sw_user;
+	u64 mask;
+	int err;
+
+	if (((unsigned long)buf % 64) ||
+	     check_for_xstate(buf, buf, &fx_sw_user))
+		goto fx_only;
+
+	mask = fx_sw_user.xstate_bv;
+
+	/*
+	 * restore the state passed by the user.
+	 */
+	err = xrestore_user(buf, mask);
+	if (err)
+		return err;
+
+	/*
+	 * init the state skipped by the user.
+	 */
+	mask = pcntxt_mask & ~mask;
+
+	xrstor_state(init_xstate_buf, mask);
+
+	return 0;
+
+fx_only:
+	/*
+	 * couldn't find the extended state information in the
+	 * memory layout. Restore just the FP/SSE and init all
+	 * the other extended state.
+	 */
+	xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE);
+	return fxrstor_checking((__force struct i387_fxsave_struct *)buf);
+}
+
+/*
+ * This restores directly out of user space. Exceptions are handled.
+ */
+int restore_i387_xstate(void __user *buf)
+{
+	struct task_struct *tsk = current;
+	int err = 0;
+
+	if (!buf) {
+		if (used_math())
+			goto clear;
+		return 0;
+	} else
+		if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
+			return -EACCES;
+
+	if (!used_math()) {
+		err = init_fpu(tsk);
+		if (err)
+			return err;
+	}
+
+	if (!(task_thread_info(current)->status & TS_USEDFPU)) {
+		clts();
+		task_thread_info(current)->status |= TS_USEDFPU;
+	}
+	if (task_thread_info(tsk)->status & TS_XSAVE)
+		err = restore_user_xstate(buf);
+	else
+		err = fxrstor_checking((__force struct i387_fxsave_struct *)
+				       buf);
+	if (unlikely(err)) {
+		/*
+		 * Encountered an error while doing the restore from the
+		 * user buffer, clear the fpu state.
+		 */
+clear:
+		clear_fpu(tsk);
+		clear_used_math();
+	}
+	return err;
+}
+#endif
+
+/*
+ * Prepare the SW reserved portion of the fxsave memory layout, indicating
+ * the presence of the extended state information in the memory layout
+ * pointed by the fpstate pointer in the sigcontext.
+ * This will be saved when ever the FP and extended state context is
+ * saved on the user stack during the signal handler delivery to the user.
+ */
+void prepare_fx_sw_frame(void)
+{
+	int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) +
+			     FP_XSTATE_MAGIC2_SIZE;
+
+	sig_xstate_size = sizeof(struct _fpstate) + size_extended;
+
+#ifdef CONFIG_IA32_EMULATION
+	sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
+#endif
+
+	memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
+
+	fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
+	fx_sw_reserved.extended_size = sig_xstate_size;
+	fx_sw_reserved.xstate_bv = pcntxt_mask;
+	fx_sw_reserved.xstate_size = xstate_size;
+#ifdef CONFIG_IA32_EMULATION
+	memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
+	       sizeof(struct _fpx_sw_bytes));
+	fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
+#endif
+}
+
+/*
+ * Represents init state for the supported extended state.
+ */
+struct xsave_struct *init_xstate_buf;
+
+#ifdef CONFIG_X86_64
+unsigned int sig_xstate_size = sizeof(struct _fpstate);
+#endif
+
+/*
+ * Enable the extended processor state save/restore feature
+ */
+void __cpuinit xsave_init(void)
+{
+	if (!cpu_has_xsave)
+		return;
+
+	set_in_cr4(X86_CR4_OSXSAVE);
+
+	/*
+	 * Enable all the features that the HW is capable of
+	 * and the Linux kernel is aware of.
+	 */
+	xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
+}
+
+/*
+ * setup the xstate image representing the init state
+ */
+void setup_xstate_init(void)
+{
+	init_xstate_buf = alloc_bootmem(xstate_size);
+	init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
+}
+
+/*
+ * Enable and initialize the xsave feature.
+ */
+void __init xsave_cntxt_init(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+	pcntxt_mask = eax + ((u64)edx << 32);
+
+	if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
+		printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n",
+		       pcntxt_mask);
+		BUG();
+	}
+
+	/*
+	 * for now OS knows only about FP/SSE
+	 */
+	pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
+	xsave_init();
+
+	/*
+	 * Recompute the context size for enabled features
+	 */
+	cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+	xstate_size = ebx;
+
+	prepare_fx_sw_frame();
+
+	setup_xstate_init();
+
+	printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, "
+	       "cntxt size 0x%x\n",
+	       pcntxt_mask, xstate_size);
+}

+ 34 - 4
arch/x86/lguest/boot.c

@@ -55,6 +55,7 @@
 #include <linux/lguest_launcher.h>
 #include <linux/lguest_launcher.h>
 #include <linux/virtio_console.h>
 #include <linux/virtio_console.h>
 #include <linux/pm.h>
 #include <linux/pm.h>
+#include <asm/apic.h>
 #include <asm/lguest.h>
 #include <asm/lguest.h>
 #include <asm/paravirt.h>
 #include <asm/paravirt.h>
 #include <asm/param.h>
 #include <asm/param.h>
@@ -783,14 +784,44 @@ static void lguest_wbinvd(void)
  * code qualifies for Advanced.  It will also never interrupt anything.  It
  * code qualifies for Advanced.  It will also never interrupt anything.  It
  * does, however, allow us to get through the Linux boot code. */
  * does, however, allow us to get through the Linux boot code. */
 #ifdef CONFIG_X86_LOCAL_APIC
 #ifdef CONFIG_X86_LOCAL_APIC
-static void lguest_apic_write(unsigned long reg, u32 v)
+static void lguest_apic_write(u32 reg, u32 v)
 {
 {
 }
 }
 
 
-static u32 lguest_apic_read(unsigned long reg)
+static u32 lguest_apic_read(u32 reg)
 {
 {
 	return 0;
 	return 0;
 }
 }
+
+static u64 lguest_apic_icr_read(void)
+{
+	return 0;
+}
+
+static void lguest_apic_icr_write(u32 low, u32 id)
+{
+	/* Warn to see if there's any stray references */
+	WARN_ON(1);
+}
+
+static void lguest_apic_wait_icr_idle(void)
+{
+	return;
+}
+
+static u32 lguest_apic_safe_wait_icr_idle(void)
+{
+	return 0;
+}
+
+static struct apic_ops lguest_basic_apic_ops = {
+	.read = lguest_apic_read,
+	.write = lguest_apic_write,
+	.icr_read = lguest_apic_icr_read,
+	.icr_write = lguest_apic_icr_write,
+	.wait_icr_idle = lguest_apic_wait_icr_idle,
+	.safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle,
+};
 #endif
 #endif
 
 
 /* STOP!  Until an interrupt comes in. */
 /* STOP!  Until an interrupt comes in. */
@@ -990,8 +1021,7 @@ __init void lguest_init(void)
 
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #ifdef CONFIG_X86_LOCAL_APIC
 	/* apic read/write intercepts */
 	/* apic read/write intercepts */
-	pv_apic_ops.apic_write = lguest_apic_write;
-	pv_apic_ops.apic_read = lguest_apic_read;
+	apic_ops = &lguest_basic_apic_ops;
 #endif
 #endif
 
 
 	/* time operations */
 	/* time operations */

+ 36 - 42
arch/x86/lib/msr-on-cpu.c

@@ -16,37 +16,46 @@ static void __rdmsr_on_cpu(void *info)
 	rdmsr(rv->msr_no, rv->l, rv->h);
 	rdmsr(rv->msr_no, rv->l, rv->h);
 }
 }
 
 
-static void __rdmsr_safe_on_cpu(void *info)
+static void __wrmsr_on_cpu(void *info)
 {
 {
 	struct msr_info *rv = info;
 	struct msr_info *rv = info;
 
 
-	rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h);
+	wrmsr(rv->msr_no, rv->l, rv->h);
 }
 }
 
 
-static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe)
+int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 {
 {
-	int err = 0;
+	int err;
 	struct msr_info rv;
 	struct msr_info rv;
 
 
 	rv.msr_no = msr_no;
 	rv.msr_no = msr_no;
-	if (safe) {
-		err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu,
-					       &rv, 1);
-		err = err ? err : rv.err;
-	} else {
-		err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
-	}
+	err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
 	*l = rv.l;
 	*l = rv.l;
 	*h = rv.h;
 	*h = rv.h;
 
 
 	return err;
 	return err;
 }
 }
 
 
-static void __wrmsr_on_cpu(void *info)
+int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+	int err;
+	struct msr_info rv;
+
+	rv.msr_no = msr_no;
+	rv.l = l;
+	rv.h = h;
+	err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
+
+	return err;
+}
+
+/* These "safe" variants are slower and should be used when the target MSR
+   may not actually exist. */
+static void __rdmsr_safe_on_cpu(void *info)
 {
 {
 	struct msr_info *rv = info;
 	struct msr_info *rv = info;
 
 
-	wrmsr(rv->msr_no, rv->l, rv->h);
+	rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h);
 }
 }
 
 
 static void __wrmsr_safe_on_cpu(void *info)
 static void __wrmsr_safe_on_cpu(void *info)
@@ -56,45 +65,30 @@ static void __wrmsr_safe_on_cpu(void *info)
 	rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h);
 	rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h);
 }
 }
 
 
-static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe)
+int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 {
 {
-	int err = 0;
+	int err;
 	struct msr_info rv;
 	struct msr_info rv;
 
 
 	rv.msr_no = msr_no;
 	rv.msr_no = msr_no;
-	rv.l = l;
-	rv.h = h;
-	if (safe) {
-		err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu,
-					       &rv, 1);
-		err = err ? err : rv.err;
-	} else {
-		err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
-	}
-
-	return err;
-}
+	err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
+	*l = rv.l;
+	*h = rv.h;
 
 
-int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
-	return _wrmsr_on_cpu(cpu, msr_no, l, h, 0);
+	return err ? err : rv.err;
 }
 }
 
 
-int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
-	return _rdmsr_on_cpu(cpu, msr_no, l, h, 0);
-}
-
-/* These "safe" variants are slower and should be used when the target MSR
-   may not actually exist. */
 int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
 int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
 {
 {
-	return _wrmsr_on_cpu(cpu, msr_no, l, h, 1);
-}
+	int err;
+	struct msr_info rv;
 
 
-int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
-	return _rdmsr_on_cpu(cpu, msr_no, l, h, 1);
+	rv.msr_no = msr_no;
+	rv.l = l;
+	rv.h = h;
+	err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
+
+	return err ? err : rv.err;
 }
 }
 
 
 EXPORT_SYMBOL(rdmsr_on_cpu);
 EXPORT_SYMBOL(rdmsr_on_cpu);

+ 21 - 21
arch/x86/lib/string_32.c

@@ -22,7 +22,7 @@ char *strcpy(char *dest, const char *src)
 		"testb %%al,%%al\n\t"
 		"testb %%al,%%al\n\t"
 		"jne 1b"
 		"jne 1b"
 		: "=&S" (d0), "=&D" (d1), "=&a" (d2)
 		: "=&S" (d0), "=&D" (d1), "=&a" (d2)
-		:"0" (src), "1" (dest) : "memory");
+		: "0" (src), "1" (dest) : "memory");
 	return dest;
 	return dest;
 }
 }
 EXPORT_SYMBOL(strcpy);
 EXPORT_SYMBOL(strcpy);
@@ -42,7 +42,7 @@ char *strncpy(char *dest, const char *src, size_t count)
 		"stosb\n"
 		"stosb\n"
 		"2:"
 		"2:"
 		: "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
 		: "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
-		:"0" (src), "1" (dest), "2" (count) : "memory");
+		: "0" (src), "1" (dest), "2" (count) : "memory");
 	return dest;
 	return dest;
 }
 }
 EXPORT_SYMBOL(strncpy);
 EXPORT_SYMBOL(strncpy);
@@ -60,7 +60,7 @@ char *strcat(char *dest, const char *src)
 		"testb %%al,%%al\n\t"
 		"testb %%al,%%al\n\t"
 		"jne 1b"
 		"jne 1b"
 		: "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
 		: "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
-		: "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu): "memory");
+		: "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu) : "memory");
 	return dest;
 	return dest;
 }
 }
 EXPORT_SYMBOL(strcat);
 EXPORT_SYMBOL(strcat);
@@ -105,9 +105,9 @@ int strcmp(const char *cs, const char *ct)
 		"2:\tsbbl %%eax,%%eax\n\t"
 		"2:\tsbbl %%eax,%%eax\n\t"
 		"orb $1,%%al\n"
 		"orb $1,%%al\n"
 		"3:"
 		"3:"
-		:"=a" (res), "=&S" (d0), "=&D" (d1)
-		:"1" (cs), "2" (ct)
-		:"memory");
+		: "=a" (res), "=&S" (d0), "=&D" (d1)
+		: "1" (cs), "2" (ct)
+		: "memory");
 	return res;
 	return res;
 }
 }
 EXPORT_SYMBOL(strcmp);
 EXPORT_SYMBOL(strcmp);
@@ -130,9 +130,9 @@ int strncmp(const char *cs, const char *ct, size_t count)
 		"3:\tsbbl %%eax,%%eax\n\t"
 		"3:\tsbbl %%eax,%%eax\n\t"
 		"orb $1,%%al\n"
 		"orb $1,%%al\n"
 		"4:"
 		"4:"
-		:"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
-		:"1" (cs), "2" (ct), "3" (count)
-		:"memory");
+		: "=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
+		: "1" (cs), "2" (ct), "3" (count)
+		: "memory");
 	return res;
 	return res;
 }
 }
 EXPORT_SYMBOL(strncmp);
 EXPORT_SYMBOL(strncmp);
@@ -152,9 +152,9 @@ char *strchr(const char *s, int c)
 		"movl $1,%1\n"
 		"movl $1,%1\n"
 		"2:\tmovl %1,%0\n\t"
 		"2:\tmovl %1,%0\n\t"
 		"decl %0"
 		"decl %0"
-		:"=a" (res), "=&S" (d0)
-		:"1" (s), "0" (c)
-		:"memory");
+		: "=a" (res), "=&S" (d0)
+		: "1" (s), "0" (c)
+		: "memory");
 	return res;
 	return res;
 }
 }
 EXPORT_SYMBOL(strchr);
 EXPORT_SYMBOL(strchr);
@@ -169,9 +169,9 @@ size_t strlen(const char *s)
 		"scasb\n\t"
 		"scasb\n\t"
 		"notl %0\n\t"
 		"notl %0\n\t"
 		"decl %0"
 		"decl %0"
-		:"=c" (res), "=&D" (d0)
-		:"1" (s), "a" (0), "0" (0xffffffffu)
-		:"memory");
+		: "=c" (res), "=&D" (d0)
+		: "1" (s), "a" (0), "0" (0xffffffffu)
+		: "memory");
 	return res;
 	return res;
 }
 }
 EXPORT_SYMBOL(strlen);
 EXPORT_SYMBOL(strlen);
@@ -189,9 +189,9 @@ void *memchr(const void *cs, int c, size_t count)
 		"je 1f\n\t"
 		"je 1f\n\t"
 		"movl $1,%0\n"
 		"movl $1,%0\n"
 		"1:\tdecl %0"
 		"1:\tdecl %0"
-		:"=D" (res), "=&c" (d0)
-		:"a" (c), "0" (cs), "1" (count)
-		:"memory");
+		: "=D" (res), "=&c" (d0)
+		: "a" (c), "0" (cs), "1" (count)
+		: "memory");
 	return res;
 	return res;
 }
 }
 EXPORT_SYMBOL(memchr);
 EXPORT_SYMBOL(memchr);
@@ -228,9 +228,9 @@ size_t strnlen(const char *s, size_t count)
 		"cmpl $-1,%1\n\t"
 		"cmpl $-1,%1\n\t"
 		"jne 1b\n"
 		"jne 1b\n"
 		"3:\tsubl %2,%0"
 		"3:\tsubl %2,%0"
-		:"=a" (res), "=&d" (d0)
-		:"c" (s), "1" (count)
-		:"memory");
+		: "=a" (res), "=&d" (d0)
+		: "c" (s), "1" (count)
+		: "memory");
 	return res;
 	return res;
 }
 }
 EXPORT_SYMBOL(strnlen);
 EXPORT_SYMBOL(strnlen);

+ 3 - 3
arch/x86/lib/strstr_32.c

@@ -23,9 +23,9 @@ __asm__ __volatile__(
 	"jne 1b\n\t"
 	"jne 1b\n\t"
 	"xorl %%eax,%%eax\n\t"
 	"xorl %%eax,%%eax\n\t"
 	"2:"
 	"2:"
-	:"=a" (__res), "=&c" (d0), "=&S" (d1)
-	:"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
-	:"dx", "di");
+	: "=a" (__res), "=&c" (d0), "=&S" (d1)
+	: "0" (0), "1" (0xffffffff), "2" (cs), "g" (ct)
+	: "dx", "di");
 return __res;
 return __res;
 }
 }
 
 

+ 7 - 0
arch/x86/lib/usercopy_32.c

@@ -14,6 +14,13 @@
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include <asm/mmx.h>
 #include <asm/mmx.h>
 
 
+#ifdef CONFIG_X86_INTEL_USERCOPY
+/*
+ * Alignment at which movsl is preferred for bulk memory copies.
+ */
+struct movsl_mask movsl_mask __read_mostly;
+#endif
+
 static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n)
 static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n)
 {
 {
 #ifdef CONFIG_X86_INTEL_USERCOPY
 #ifdef CONFIG_X86_INTEL_USERCOPY

+ 3 - 1
arch/x86/mach-default/setup.c

@@ -10,13 +10,15 @@
 #include <asm/e820.h>
 #include <asm/e820.h>
 #include <asm/setup.h>
 #include <asm/setup.h>
 
 
+#include <mach_ipi.h>
+
 #ifdef CONFIG_HOTPLUG_CPU
 #ifdef CONFIG_HOTPLUG_CPU
 #define DEFAULT_SEND_IPI	(1)
 #define DEFAULT_SEND_IPI	(1)
 #else
 #else
 #define DEFAULT_SEND_IPI	(0)
 #define DEFAULT_SEND_IPI	(0)
 #endif
 #endif
 
 
-int no_broadcast=DEFAULT_SEND_IPI;
+int no_broadcast = DEFAULT_SEND_IPI;
 
 
 /**
 /**
  * pre_intr_init_hook - initialisation prior to setting up interrupt vectors
  * pre_intr_init_hook - initialisation prior to setting up interrupt vectors

+ 1 - 1
arch/x86/mach-generic/Makefile

@@ -9,4 +9,4 @@ obj-$(CONFIG_X86_NUMAQ)		+= numaq.o
 obj-$(CONFIG_X86_SUMMIT)	+= summit.o
 obj-$(CONFIG_X86_SUMMIT)	+= summit.o
 obj-$(CONFIG_X86_BIGSMP)	+= bigsmp.o
 obj-$(CONFIG_X86_BIGSMP)	+= bigsmp.o
 obj-$(CONFIG_X86_ES7000)	+= es7000.o
 obj-$(CONFIG_X86_ES7000)	+= es7000.o
-obj-$(CONFIG_X86_ES7000)	+= ../../x86/mach-es7000/
+obj-$(CONFIG_X86_ES7000)	+= ../../x86/es7000/

+ 4 - 5
arch/x86/mach-generic/bigsmp.c

@@ -5,18 +5,17 @@
 #define APIC_DEFINITION 1
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/cpumask.h>
-#include <asm/smp.h>
 #include <asm/mpspec.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/fixmap.h>
 #include <asm/apicdef.h>
 #include <asm/apicdef.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
-#include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/dmi.h>
 #include <linux/dmi.h>
-#include <asm/mach-bigsmp/mach_apic.h>
-#include <asm/mach-bigsmp/mach_apicdef.h>
-#include <asm/mach-bigsmp/mach_ipi.h>
+#include <asm/bigsmp/apicdef.h>
+#include <linux/smp.h>
+#include <asm/bigsmp/apic.h>
+#include <asm/bigsmp/ipi.h>
 #include <asm/mach-default/mach_mpparse.h>
 #include <asm/mach-default/mach_mpparse.h>
 
 
 static int dmi_bigsmp; /* can be set by dmi scanners */
 static int dmi_bigsmp; /* can be set by dmi scanners */

+ 6 - 7
arch/x86/mach-generic/es7000.c

@@ -4,20 +4,19 @@
 #define APIC_DEFINITION 1
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/cpumask.h>
-#include <asm/smp.h>
 #include <asm/mpspec.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/fixmap.h>
 #include <asm/apicdef.h>
 #include <asm/apicdef.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/string.h>
-#include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/init.h>
-#include <asm/mach-es7000/mach_apicdef.h>
-#include <asm/mach-es7000/mach_apic.h>
-#include <asm/mach-es7000/mach_ipi.h>
-#include <asm/mach-es7000/mach_mpparse.h>
-#include <asm/mach-es7000/mach_wakecpu.h>
+#include <asm/es7000/apicdef.h>
+#include <linux/smp.h>
+#include <asm/es7000/apic.h>
+#include <asm/es7000/ipi.h>
+#include <asm/es7000/mpparse.h>
+#include <asm/es7000/wakecpu.h>
 
 
 static int probe_es7000(void)
 static int probe_es7000(void)
 {
 {

+ 6 - 6
arch/x86/mach-generic/numaq.c

@@ -4,7 +4,6 @@
 #define APIC_DEFINITION 1
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/cpumask.h>
-#include <linux/smp.h>
 #include <asm/mpspec.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/fixmap.h>
@@ -12,11 +11,12 @@
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/init.h>
-#include <asm/mach-numaq/mach_apic.h>
-#include <asm/mach-numaq/mach_apicdef.h>
-#include <asm/mach-numaq/mach_ipi.h>
-#include <asm/mach-numaq/mach_mpparse.h>
-#include <asm/mach-numaq/mach_wakecpu.h>
+#include <asm/numaq/apicdef.h>
+#include <linux/smp.h>
+#include <asm/numaq/apic.h>
+#include <asm/numaq/ipi.h>
+#include <asm/numaq/mpparse.h>
+#include <asm/numaq/wakecpu.h>
 #include <asm/numaq.h>
 #include <asm/numaq.h>
 
 
 static int mps_oem_check(struct mp_config_table *mpc, char *oem,
 static int mps_oem_check(struct mp_config_table *mpc, char *oem,

+ 5 - 6
arch/x86/mach-generic/summit.c

@@ -4,19 +4,18 @@
 #define APIC_DEFINITION 1
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/cpumask.h>
-#include <asm/smp.h>
 #include <asm/mpspec.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/fixmap.h>
 #include <asm/apicdef.h>
 #include <asm/apicdef.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/string.h>
-#include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/init.h>
-#include <asm/mach-summit/mach_apic.h>
-#include <asm/mach-summit/mach_apicdef.h>
-#include <asm/mach-summit/mach_ipi.h>
-#include <asm/mach-summit/mach_mpparse.h>
+#include <asm/summit/apicdef.h>
+#include <linux/smp.h>
+#include <asm/summit/apic.h>
+#include <asm/summit/ipi.h>
+#include <asm/summit/mpparse.h>
 
 
 static int probe_summit(void)
 static int probe_summit(void)
 {
 {

+ 1 - 1
arch/x86/mm/discontig_32.c

@@ -328,7 +328,7 @@ void __init initmem_init(unsigned long start_pfn,
 
 
 	get_memcfg_numa();
 	get_memcfg_numa();
 
 
-	kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
+	kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE);
 
 
 	kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
 	kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
 	do {
 	do {

+ 2 - 2
arch/x86/mm/dump_pagetables.c

@@ -148,8 +148,8 @@ static void note_page(struct seq_file *m, struct pg_state *st,
 	 * we have now. "break" is either changing perms, levels or
 	 * we have now. "break" is either changing perms, levels or
 	 * address space marker.
 	 * address space marker.
 	 */
 	 */
-	prot = pgprot_val(new_prot) & ~(PTE_PFN_MASK);
-	cur = pgprot_val(st->current_prot) & ~(PTE_PFN_MASK);
+	prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
+	cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
 
 
 	if (!st->level) {
 	if (!st->level) {
 		/* First entry */
 		/* First entry */

+ 1 - 2
arch/x86/mm/fault.c

@@ -35,6 +35,7 @@
 #include <asm/tlbflush.h>
 #include <asm/tlbflush.h>
 #include <asm/proto.h>
 #include <asm/proto.h>
 #include <asm-generic/sections.h>
 #include <asm-generic/sections.h>
+#include <asm/traps.h>
 
 
 /*
 /*
  * Page fault error code bits
  * Page fault error code bits
@@ -357,8 +358,6 @@ static int is_errata100(struct pt_regs *regs, unsigned long address)
 	return 0;
 	return 0;
 }
 }
 
 
-void do_invalid_op(struct pt_regs *, unsigned long);
-
 static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
 static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
 {
 {
 #ifdef CONFIG_X86_F00F_BUG
 #ifdef CONFIG_X86_F00F_BUG

+ 1 - 0
arch/x86/mm/init_32.c

@@ -47,6 +47,7 @@
 #include <asm/paravirt.h>
 #include <asm/paravirt.h>
 #include <asm/setup.h>
 #include <asm/setup.h>
 #include <asm/cacheflush.h>
 #include <asm/cacheflush.h>
+#include <asm/smp.h>
 
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
 

+ 4 - 4
arch/x86/mm/init_64.c

@@ -225,7 +225,7 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
 void __init cleanup_highmap(void)
 void __init cleanup_highmap(void)
 {
 {
 	unsigned long vaddr = __START_KERNEL_map;
 	unsigned long vaddr = __START_KERNEL_map;
-	unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1;
+	unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1;
 	pmd_t *pmd = level2_kernel_pgt;
 	pmd_t *pmd = level2_kernel_pgt;
 	pmd_t *last_pmd = pmd + PTRS_PER_PMD;
 	pmd_t *last_pmd = pmd + PTRS_PER_PMD;
 
 
@@ -451,14 +451,14 @@ static void __init find_early_table_space(unsigned long end)
 	unsigned long puds, pmds, ptes, tables, start;
 	unsigned long puds, pmds, ptes, tables, start;
 
 
 	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
 	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
-	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
+	tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
 	if (direct_gbpages) {
 	if (direct_gbpages) {
 		unsigned long extra;
 		unsigned long extra;
 		extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
 		extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
 		pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
 		pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
 	} else
 	} else
 		pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
 		pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
-	tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+	tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
 
 
 	if (cpu_has_pse) {
 	if (cpu_has_pse) {
 		unsigned long extra;
 		unsigned long extra;
@@ -466,7 +466,7 @@ static void __init find_early_table_space(unsigned long end)
 		ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	} else
 	} else
 		ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
+	tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
 
 
 	/*
 	/*
 	 * RED-PEN putting page tables only on node 0 could
 	 * RED-PEN putting page tables only on node 0 could

+ 2 - 2
arch/x86/mm/ioremap.c

@@ -421,7 +421,7 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
 	return;
 	return;
 }
 }
 
 
-int __initdata early_ioremap_debug;
+static int __initdata early_ioremap_debug;
 
 
 static int __init early_ioremap_debug_setup(char *str)
 static int __init early_ioremap_debug_setup(char *str)
 {
 {
@@ -547,7 +547,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx)
 }
 }
 
 
 
 
-int __initdata early_ioremap_nested;
+static int __initdata early_ioremap_nested;
 
 
 static int __init check_early_ioremap_leak(void)
 static int __init check_early_ioremap_leak(void)
 {
 {

+ 5 - 5
arch/x86/mm/numa_64.c

@@ -79,7 +79,7 @@ static int __init allocate_cachealigned_memnodemap(void)
 		return 0;
 		return 0;
 
 
 	addr = 0x8000;
 	addr = 0x8000;
-	nodemap_size = round_up(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
+	nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
 	nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT,
 	nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT,
 				      nodemap_size, L1_CACHE_BYTES);
 				      nodemap_size, L1_CACHE_BYTES);
 	if (nodemap_addr == -1UL) {
 	if (nodemap_addr == -1UL) {
@@ -176,10 +176,10 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 	unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
 	unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
 	unsigned long bootmap_start, nodedata_phys;
 	unsigned long bootmap_start, nodedata_phys;
 	void *bootmap;
 	void *bootmap;
-	const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
+	const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
 	int nid;
 	int nid;
 
 
-	start = round_up(start, ZONE_ALIGN);
+	start = roundup(start, ZONE_ALIGN);
 
 
 	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
 	printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
 	       start, end);
 	       start, end);
@@ -210,9 +210,9 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 	bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn);
 	bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn);
 	nid = phys_to_nid(nodedata_phys);
 	nid = phys_to_nid(nodedata_phys);
 	if (nid == nodeid)
 	if (nid == nodeid)
-		bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
+		bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
 	else
 	else
-		bootmap_start = round_up(start, PAGE_SIZE);
+		bootmap_start = roundup(start, PAGE_SIZE);
 	/*
 	/*
 	 * SMP_CACHE_BYTES could be enough, but init_bootmem_node like
 	 * SMP_CACHE_BYTES could be enough, but init_bootmem_node like
 	 * to use that to align to PAGE_SIZE
 	 * to use that to align to PAGE_SIZE

+ 1 - 1
arch/x86/mm/pageattr.c

@@ -84,7 +84,7 @@ static inline unsigned long highmap_start_pfn(void)
 
 
 static inline unsigned long highmap_end_pfn(void)
 static inline unsigned long highmap_end_pfn(void)
 {
 {
-	return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
+	return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
 }
 }
 
 
 #endif
 #endif

+ 1 - 1
arch/x86/pci/amd_bus.c

@@ -580,7 +580,7 @@ static int __cpuinit amd_cpu_notify(struct notifier_block *self,
 				    unsigned long action, void *hcpu)
 				    unsigned long action, void *hcpu)
 {
 {
 	int cpu = (long)hcpu;
 	int cpu = (long)hcpu;
-	switch(action) {
+	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
 	case CPU_ONLINE_FROZEN:
 		smp_call_function_single(cpu, enable_pci_io_ecs, NULL, 0);
 		smp_call_function_single(cpu, enable_pci_io_ecs, NULL, 0);

+ 38 - 29
arch/x86/pci/irq.c

@@ -1043,35 +1043,44 @@ static void __init pcibios_fixup_irqs(void)
 		if (io_apic_assign_pci_irqs) {
 		if (io_apic_assign_pci_irqs) {
 			int irq;
 			int irq;
 
 
-			if (pin) {
-				/*
-				 * interrupt pins are numbered starting
-				 * from 1
-				 */
-				pin--;
-				irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
-					PCI_SLOT(dev->devfn), pin);
-	/*
-	 * Busses behind bridges are typically not listed in the MP-table.
-	 * In this case we have to look up the IRQ based on the parent bus,
-	 * parent slot, and pin number. The SMP code detects such bridged
-	 * busses itself so we should get into this branch reliably.
-	 */
-				if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
-					struct pci_dev *bridge = dev->bus->self;
-
-					pin = (pin + PCI_SLOT(dev->devfn)) % 4;
-					irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
-							PCI_SLOT(bridge->devfn), pin);
-					if (irq >= 0)
-						dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n",
-							 pci_name(bridge),
-							 'A' + pin, irq);
-				}
-				if (irq >= 0) {
-					dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq);
-					dev->irq = irq;
-				}
+			if (!pin)
+				continue;
+
+			/*
+			 * interrupt pins are numbered starting from 1
+			 */
+			pin--;
+			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
+				PCI_SLOT(dev->devfn), pin);
+			/*
+			 * Busses behind bridges are typically not listed in the
+			 * MP-table.  In this case we have to look up the IRQ
+			 * based on the parent bus, parent slot, and pin number.
+			 * The SMP code detects such bridged busses itself so we
+			 * should get into this branch reliably.
+			 */
+			if (irq < 0 && dev->bus->parent) {
+				/* go back to the bridge */
+				struct pci_dev *bridge = dev->bus->self;
+				int bus;
+
+				pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+				bus = bridge->bus->number;
+				irq = IO_APIC_get_PCI_irq_vector(bus,
+						PCI_SLOT(bridge->devfn), pin);
+				if (irq >= 0)
+					dev_warn(&dev->dev,
+						"using bridge %s INT %c to "
+							"get IRQ %d\n",
+						 pci_name(bridge),
+						 'A' + pin, irq);
+			}
+			if (irq >= 0) {
+				dev_info(&dev->dev,
+					"PCI->APIC IRQ transform: INT %c "
+						"-> IRQ %d\n",
+					'A' + pin, irq);
+				dev->irq = irq;
 			}
 			}
 		}
 		}
 #endif
 #endif

+ 7 - 0
arch/x86/power/cpu_32.c

@@ -11,6 +11,7 @@
 #include <linux/suspend.h>
 #include <linux/suspend.h>
 #include <asm/mtrr.h>
 #include <asm/mtrr.h>
 #include <asm/mce.h>
 #include <asm/mce.h>
+#include <asm/xcr.h>
 
 
 static struct saved_context saved_context;
 static struct saved_context saved_context;
 
 
@@ -126,6 +127,12 @@ static void __restore_processor_state(struct saved_context *ctxt)
 	if (boot_cpu_has(X86_FEATURE_SEP))
 	if (boot_cpu_has(X86_FEATURE_SEP))
 		enable_sep_cpu();
 		enable_sep_cpu();
 
 
+	/*
+	 * restore XCR0 for xsave capable cpu's.
+	 */
+	if (cpu_has_xsave)
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
+
 	fix_processor_context();
 	fix_processor_context();
 	do_fpu_end();
 	do_fpu_end();
 	mtrr_ap_init();
 	mtrr_ap_init();

+ 7 - 0
arch/x86/power/cpu_64.c

@@ -14,6 +14,7 @@
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/mtrr.h>
 #include <asm/mtrr.h>
+#include <asm/xcr.h>
 
 
 static void fix_processor_context(void);
 static void fix_processor_context(void);
 
 
@@ -122,6 +123,12 @@ static void __restore_processor_state(struct saved_context *ctxt)
 	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
 	wrmsrl(MSR_GS_BASE, ctxt->gs_base);
 	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
 	wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
 
 
+	/*
+	 * restore XCR0 for xsave capable cpu's.
+	 */
+	if (cpu_has_xsave)
+		xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
+
 	fix_processor_context();
 	fix_processor_context();
 
 
 	do_fpu_end();
 	do_fpu_end();

+ 7 - 7
arch/x86/power/hibernate_asm_32.S

@@ -1,5 +1,3 @@
-.text
-
 /*
 /*
  * This may not use any stack, nor any variable that is not "NoSave":
  * This may not use any stack, nor any variable that is not "NoSave":
  *
  *
@@ -12,17 +10,18 @@
 #include <asm/segment.h>
 #include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/asm-offsets.h>
 #include <asm/asm-offsets.h>
+#include <asm/processor-flags.h>
 
 
-	.text
+.text
 
 
 ENTRY(swsusp_arch_suspend)
 ENTRY(swsusp_arch_suspend)
-
 	movl %esp, saved_context_esp
 	movl %esp, saved_context_esp
 	movl %ebx, saved_context_ebx
 	movl %ebx, saved_context_ebx
 	movl %ebp, saved_context_ebp
 	movl %ebp, saved_context_ebp
 	movl %esi, saved_context_esi
 	movl %esi, saved_context_esi
 	movl %edi, saved_context_edi
 	movl %edi, saved_context_edi
-	pushfl ; popl saved_context_eflags
+	pushfl
+	popl saved_context_eflags
 
 
 	call swsusp_save
 	call swsusp_save
 	ret
 	ret
@@ -59,7 +58,7 @@ done:
 	movl	mmu_cr4_features, %ecx
 	movl	mmu_cr4_features, %ecx
 	jecxz	1f	# cr4 Pentium and higher, skip if zero
 	jecxz	1f	# cr4 Pentium and higher, skip if zero
 	movl	%ecx, %edx
 	movl	%ecx, %edx
-	andl	$~(1<<7), %edx;  # PGE
+	andl	$~(X86_CR4_PGE), %edx
 	movl	%edx, %cr4;  # turn off PGE
 	movl	%edx, %cr4;  # turn off PGE
 1:
 1:
 	movl	%cr3, %eax;  # flush TLB
 	movl	%cr3, %eax;  # flush TLB
@@ -74,7 +73,8 @@ done:
 	movl saved_context_esi, %esi
 	movl saved_context_esi, %esi
 	movl saved_context_edi, %edi
 	movl saved_context_edi, %edi
 
 
-	pushl saved_context_eflags ; popfl
+	pushl saved_context_eflags
+	popfl
 
 
 	xorl	%eax, %eax
 	xorl	%eax, %eax
 
 

+ 41 - 4
arch/x86/xen/enlighten.c

@@ -36,6 +36,7 @@
 #include <xen/hvc-console.h>
 #include <xen/hvc-console.h>
 
 
 #include <asm/paravirt.h>
 #include <asm/paravirt.h>
+#include <asm/apic.h>
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypervisor.h>
@@ -580,16 +581,47 @@ static void xen_io_delay(void)
 }
 }
 
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #ifdef CONFIG_X86_LOCAL_APIC
-static u32 xen_apic_read(unsigned long reg)
+static u32 xen_apic_read(u32 reg)
 {
 {
 	return 0;
 	return 0;
 }
 }
 
 
-static void xen_apic_write(unsigned long reg, u32 val)
+static void xen_apic_write(u32 reg, u32 val)
 {
 {
 	/* Warn to see if there's any stray references */
 	/* Warn to see if there's any stray references */
 	WARN_ON(1);
 	WARN_ON(1);
 }
 }
+
+static u64 xen_apic_icr_read(void)
+{
+	return 0;
+}
+
+static void xen_apic_icr_write(u32 low, u32 id)
+{
+	/* Warn to see if there's any stray references */
+	WARN_ON(1);
+}
+
+static void xen_apic_wait_icr_idle(void)
+{
+        return;
+}
+
+static u32 xen_safe_apic_wait_icr_idle(void)
+{
+        return 0;
+}
+
+static struct apic_ops xen_basic_apic_ops = {
+	.read = xen_apic_read,
+	.write = xen_apic_write,
+	.icr_read = xen_apic_icr_read,
+	.icr_write = xen_apic_icr_write,
+	.wait_icr_idle = xen_apic_wait_icr_idle,
+	.safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
+};
+
 #endif
 #endif
 
 
 static void xen_flush_tlb(void)
 static void xen_flush_tlb(void)
@@ -1273,8 +1305,6 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
 
 
 static const struct pv_apic_ops xen_apic_ops __initdata = {
 static const struct pv_apic_ops xen_apic_ops __initdata = {
 #ifdef CONFIG_X86_LOCAL_APIC
 #ifdef CONFIG_X86_LOCAL_APIC
-	.apic_write = xen_apic_write,
-	.apic_read = xen_apic_read,
 	.setup_boot_clock = paravirt_nop,
 	.setup_boot_clock = paravirt_nop,
 	.setup_secondary_clock = paravirt_nop,
 	.setup_secondary_clock = paravirt_nop,
 	.startup_ipi_hook = paravirt_nop,
 	.startup_ipi_hook = paravirt_nop,
@@ -1677,6 +1707,13 @@ asmlinkage void __init xen_start_kernel(void)
 	pv_apic_ops = xen_apic_ops;
 	pv_apic_ops = xen_apic_ops;
 	pv_mmu_ops = xen_mmu_ops;
 	pv_mmu_ops = xen_mmu_ops;
 
 
+#ifdef CONFIG_X86_LOCAL_APIC
+	/*
+	 * set up the basic apic ops.
+	 */
+	apic_ops = &xen_basic_apic_ops;
+#endif
+
 	if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
 	if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
 		pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
 		pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
 		pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
 		pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;

+ 2 - 0
drivers/pci/Makefile

@@ -26,6 +26,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o
 # Build Intel IOMMU support
 # Build Intel IOMMU support
 obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
 obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
 
 
+obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
+
 #
 #
 # Some architectures use the generic PCI setup functions
 # Some architectures use the generic PCI setup functions
 #
 #

+ 157 - 0
drivers/pci/dma_remapping.h

@@ -0,0 +1,157 @@
+#ifndef _DMA_REMAPPING_H
+#define _DMA_REMAPPING_H
+
+/*
+ * We need a fixed PAGE_SIZE of 4K irrespective of
+ * arch PAGE_SIZE for IOMMU page tables.
+ */
+#define PAGE_SHIFT_4K		(12)
+#define PAGE_SIZE_4K		(1UL << PAGE_SHIFT_4K)
+#define PAGE_MASK_4K		(((u64)-1) << PAGE_SHIFT_4K)
+#define PAGE_ALIGN_4K(addr)	(((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
+
+#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT_4K)
+#define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
+#define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
+
+
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+	u64	val;
+	u64	rsvd1;
+};
+#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
+static inline bool root_present(struct root_entry *root)
+{
+	return (root->val & 1);
+}
+static inline void set_root_present(struct root_entry *root)
+{
+	root->val |= 1;
+}
+static inline void set_root_value(struct root_entry *root, unsigned long value)
+{
+	root->val |= value & PAGE_MASK_4K;
+}
+
+struct context_entry;
+static inline struct context_entry *
+get_context_addr_from_root(struct root_entry *root)
+{
+	return (struct context_entry *)
+		(root_present(root)?phys_to_virt(
+		root->val & PAGE_MASK_4K):
+		NULL);
+}
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+	u64 lo;
+	u64 hi;
+};
+#define context_present(c) ((c).lo & 1)
+#define context_fault_disable(c) (((c).lo >> 1) & 1)
+#define context_translation_type(c) (((c).lo >> 2) & 3)
+#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
+#define context_address_width(c) ((c).hi &  7)
+#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
+
+#define context_set_present(c) do {(c).lo |= 1;} while (0)
+#define context_set_fault_enable(c) \
+	do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
+#define context_set_translation_type(c, val) \
+	do { \
+		(c).lo &= (((u64)-1) << 4) | 3; \
+		(c).lo |= ((val) & 3) << 2; \
+	} while (0)
+#define CONTEXT_TT_MULTI_LEVEL 0
+#define context_set_address_root(c, val) \
+	do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
+#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
+#define context_set_domain_id(c, val) \
+	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
+#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+
+/*
+ * 0: readable
+ * 1: writable
+ * 2-6: reserved
+ * 7: super page
+ * 8-11: available
+ * 12-63: Host physcial address
+ */
+struct dma_pte {
+	u64 val;
+};
+#define dma_clear_pte(p)	do {(p).val = 0;} while (0)
+
+#define DMA_PTE_READ (1)
+#define DMA_PTE_WRITE (2)
+
+#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
+#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
+#define dma_set_pte_prot(p, prot) \
+		do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
+#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
+#define dma_set_pte_addr(p, addr) do {\
+		(p).val |= ((addr) & PAGE_MASK_4K); } while (0)
+#define dma_pte_present(p) (((p).val & 3) != 0)
+
+struct intel_iommu;
+
+struct dmar_domain {
+	int	id;			/* domain id */
+	struct intel_iommu *iommu;	/* back pointer to owning iommu */
+
+	struct list_head devices; 	/* all devices' list */
+	struct iova_domain iovad;	/* iova's that belong to this domain */
+
+	struct dma_pte	*pgd;		/* virtual address */
+	spinlock_t	mapping_lock;	/* page table lock */
+	int		gaw;		/* max guest address width */
+
+	/* adjusted guest address width, 0 is level 2 30-bit */
+	int		agaw;
+
+#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
+	int		flags;
+};
+
+/* PCI domain-device relationship */
+struct device_domain_info {
+	struct list_head link;	/* link to domain siblings */
+	struct list_head global; /* link to global list */
+	u8 bus;			/* PCI bus numer */
+	u8 devfn;		/* PCI devfn number */
+	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
+	struct dmar_domain *domain; /* pointer to domain */
+};
+
+extern int init_dmars(void);
+extern void free_dmar_iommu(struct intel_iommu *iommu);
+
+extern int dmar_disabled;
+
+#ifndef CONFIG_DMAR_GFX_WA
+static inline void iommu_prepare_gfx_mapping(void)
+{
+	return;
+}
+#endif /* !CONFIG_DMAR_GFX_WA */
+
+#endif

+ 378 - 19
drivers/pci/dmar.c

@@ -19,13 +19,16 @@
  * Author: Shaohua Li <shaohua.li@intel.com>
  * Author: Shaohua Li <shaohua.li@intel.com>
  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  *
  *
- * This file implements early detection/parsing of DMA Remapping Devices
+ * This file implements early detection/parsing of Remapping Devices
  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
  * tables.
  * tables.
+ *
+ * These routines are used by both DMA-remapping and Interrupt-remapping
  */
  */
 
 
 #include <linux/pci.h>
 #include <linux/pci.h>
 #include <linux/dmar.h>
 #include <linux/dmar.h>
+#include <linux/timer.h>
 #include "iova.h"
 #include "iova.h"
 #include "intel-iommu.h"
 #include "intel-iommu.h"
 
 
@@ -37,7 +40,6 @@
  * these units are not supported by the architecture.
  * these units are not supported by the architecture.
  */
  */
 LIST_HEAD(dmar_drhd_units);
 LIST_HEAD(dmar_drhd_units);
-LIST_HEAD(dmar_rmrr_units);
 
 
 static struct acpi_table_header * __initdata dmar_tbl;
 static struct acpi_table_header * __initdata dmar_tbl;
 
 
@@ -53,11 +55,6 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
 		list_add(&drhd->list, &dmar_drhd_units);
 		list_add(&drhd->list, &dmar_drhd_units);
 }
 }
 
 
-static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
-{
-	list_add(&rmrr->list, &dmar_rmrr_units);
-}
-
 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
 					   struct pci_dev **dev, u16 segment)
 					   struct pci_dev **dev, u16 segment)
 {
 {
@@ -172,19 +169,37 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
 	struct acpi_dmar_hardware_unit *drhd;
 	struct acpi_dmar_hardware_unit *drhd;
 	struct dmar_drhd_unit *dmaru;
 	struct dmar_drhd_unit *dmaru;
 	int ret = 0;
 	int ret = 0;
-	static int include_all;
 
 
 	dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
 	dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
 	if (!dmaru)
 	if (!dmaru)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
+	dmaru->hdr = header;
 	drhd = (struct acpi_dmar_hardware_unit *)header;
 	drhd = (struct acpi_dmar_hardware_unit *)header;
 	dmaru->reg_base_addr = drhd->address;
 	dmaru->reg_base_addr = drhd->address;
 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
 
 
+	ret = alloc_iommu(dmaru);
+	if (ret) {
+		kfree(dmaru);
+		return ret;
+	}
+	dmar_register_drhd_unit(dmaru);
+	return 0;
+}
+
+static int __init
+dmar_parse_dev(struct dmar_drhd_unit *dmaru)
+{
+	struct acpi_dmar_hardware_unit *drhd;
+	static int include_all;
+	int ret;
+
+	drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
+
 	if (!dmaru->include_all)
 	if (!dmaru->include_all)
 		ret = dmar_parse_dev_scope((void *)(drhd + 1),
 		ret = dmar_parse_dev_scope((void *)(drhd + 1),
-				((void *)drhd) + header->length,
+				((void *)drhd) + drhd->header.length,
 				&dmaru->devices_cnt, &dmaru->devices,
 				&dmaru->devices_cnt, &dmaru->devices,
 				drhd->segment);
 				drhd->segment);
 	else {
 	else {
@@ -197,37 +212,59 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
 		include_all = 1;
 		include_all = 1;
 	}
 	}
 
 
-	if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all))
+	if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) {
+		list_del(&dmaru->list);
 		kfree(dmaru);
 		kfree(dmaru);
-	else
-		dmar_register_drhd_unit(dmaru);
+	}
 	return ret;
 	return ret;
 }
 }
 
 
+#ifdef CONFIG_DMAR
+LIST_HEAD(dmar_rmrr_units);
+
+static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
+{
+	list_add(&rmrr->list, &dmar_rmrr_units);
+}
+
+
 static int __init
 static int __init
 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
 {
 {
 	struct acpi_dmar_reserved_memory *rmrr;
 	struct acpi_dmar_reserved_memory *rmrr;
 	struct dmar_rmrr_unit *rmrru;
 	struct dmar_rmrr_unit *rmrru;
-	int ret = 0;
 
 
 	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
 	rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
 	if (!rmrru)
 	if (!rmrru)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
+	rmrru->hdr = header;
 	rmrr = (struct acpi_dmar_reserved_memory *)header;
 	rmrr = (struct acpi_dmar_reserved_memory *)header;
 	rmrru->base_address = rmrr->base_address;
 	rmrru->base_address = rmrr->base_address;
 	rmrru->end_address = rmrr->end_address;
 	rmrru->end_address = rmrr->end_address;
+
+	dmar_register_rmrr_unit(rmrru);
+	return 0;
+}
+
+static int __init
+rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
+{
+	struct acpi_dmar_reserved_memory *rmrr;
+	int ret;
+
+	rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
 	ret = dmar_parse_dev_scope((void *)(rmrr + 1),
 	ret = dmar_parse_dev_scope((void *)(rmrr + 1),
-		((void *)rmrr) + header->length,
+		((void *)rmrr) + rmrr->header.length,
 		&rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
 		&rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
 
 
-	if (ret || (rmrru->devices_cnt == 0))
+	if (ret || (rmrru->devices_cnt == 0)) {
+		list_del(&rmrru->list);
 		kfree(rmrru);
 		kfree(rmrru);
-	else
-		dmar_register_rmrr_unit(rmrru);
+	}
 	return ret;
 	return ret;
 }
 }
+#endif
 
 
 static void __init
 static void __init
 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
@@ -252,6 +289,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 	}
 	}
 }
 }
 
 
+
 /**
 /**
  * parse_dmar_table - parses the DMA reporting table
  * parse_dmar_table - parses the DMA reporting table
  */
  */
@@ -284,7 +322,9 @@ parse_dmar_table(void)
 			ret = dmar_parse_one_drhd(entry_header);
 			ret = dmar_parse_one_drhd(entry_header);
 			break;
 			break;
 		case ACPI_DMAR_TYPE_RESERVED_MEMORY:
 		case ACPI_DMAR_TYPE_RESERVED_MEMORY:
+#ifdef CONFIG_DMAR
 			ret = dmar_parse_one_rmrr(entry_header);
 			ret = dmar_parse_one_rmrr(entry_header);
+#endif
 			break;
 			break;
 		default:
 		default:
 			printk(KERN_WARNING PREFIX
 			printk(KERN_WARNING PREFIX
@@ -300,15 +340,77 @@ parse_dmar_table(void)
 	return ret;
 	return ret;
 }
 }
 
 
+int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
+			  struct pci_dev *dev)
+{
+	int index;
+
+	while (dev) {
+		for (index = 0; index < cnt; index++)
+			if (dev == devices[index])
+				return 1;
 
 
-int __init dmar_table_init(void)
+		/* Check our parent */
+		dev = dev->bus->self;
+	}
+
+	return 0;
+}
+
+struct dmar_drhd_unit *
+dmar_find_matched_drhd_unit(struct pci_dev *dev)
 {
 {
+	struct dmar_drhd_unit *drhd = NULL;
+
+	list_for_each_entry(drhd, &dmar_drhd_units, list) {
+		if (drhd->include_all || dmar_pci_device_match(drhd->devices,
+						drhd->devices_cnt, dev))
+			return drhd;
+	}
+
+	return NULL;
+}
+
+int __init dmar_dev_scope_init(void)
+{
+	struct dmar_drhd_unit *drhd;
+	int ret = -ENODEV;
+
+	for_each_drhd_unit(drhd) {
+		ret = dmar_parse_dev(drhd);
+		if (ret)
+			return ret;
+	}
+
+#ifdef CONFIG_DMAR
+	{
+		struct dmar_rmrr_unit *rmrr;
+		for_each_rmrr_units(rmrr) {
+			ret = rmrr_parse_dev(rmrr);
+			if (ret)
+				return ret;
+		}
+	}
+#endif
+
+	return ret;
+}
 
 
+
+int __init dmar_table_init(void)
+{
+	static int dmar_table_initialized;
 	int ret;
 	int ret;
 
 
+	if (dmar_table_initialized)
+		return 0;
+
+	dmar_table_initialized = 1;
+
 	ret = parse_dmar_table();
 	ret = parse_dmar_table();
 	if (ret) {
 	if (ret) {
-		printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
+		if (ret != -ENODEV)
+			printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
 		return ret;
 		return ret;
 	}
 	}
 
 
@@ -317,9 +419,14 @@ int __init dmar_table_init(void)
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}
 
 
+#ifdef CONFIG_DMAR
 	if (list_empty(&dmar_rmrr_units))
 	if (list_empty(&dmar_rmrr_units))
 		printk(KERN_INFO PREFIX "No RMRR found\n");
 		printk(KERN_INFO PREFIX "No RMRR found\n");
+#endif
 
 
+#ifdef CONFIG_INTR_REMAP
+	parse_ioapics_under_ir();
+#endif
 	return 0;
 	return 0;
 }
 }
 
 
@@ -341,3 +448,255 @@ int __init early_dmar_detect(void)
 
 
 	return (ACPI_SUCCESS(status) ? 1 : 0);
 	return (ACPI_SUCCESS(status) ? 1 : 0);
 }
 }
+
+void __init detect_intel_iommu(void)
+{
+	int ret;
+
+	ret = early_dmar_detect();
+
+#ifdef CONFIG_DMAR
+	{
+		struct acpi_table_dmar *dmar;
+		/*
+		 * for now we will disable dma-remapping when interrupt
+		 * remapping is enabled.
+		 * When support for queued invalidation for IOTLB invalidation
+		 * is added, we will not need this any more.
+		 */
+		dmar = (struct acpi_table_dmar *) dmar_tbl;
+		if (ret && cpu_has_x2apic && dmar->flags & 0x1) {
+			printk(KERN_INFO
+			       "Queued invalidation will be enabled to support "
+			       "x2apic and Intr-remapping.\n");
+			printk(KERN_INFO
+			       "Disabling IOMMU detection, because of missing "
+			       "queued invalidation support for IOTLB "
+			       "invalidation\n");
+			printk(KERN_INFO
+			       "Use \"nox2apic\", if you want to use Intel "
+			       " IOMMU for DMA-remapping and don't care about "
+			       " x2apic support\n");
+
+			dmar_disabled = 1;
+			return;
+		}
+
+		if (ret && !no_iommu && !iommu_detected && !swiotlb &&
+		    !dmar_disabled)
+			iommu_detected = 1;
+	}
+#endif
+}
+
+
+int alloc_iommu(struct dmar_drhd_unit *drhd)
+{
+	struct intel_iommu *iommu;
+	int map_size;
+	u32 ver;
+	static int iommu_allocated = 0;
+
+	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
+	if (!iommu)
+		return -ENOMEM;
+
+	iommu->seq_id = iommu_allocated++;
+
+	iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
+	if (!iommu->reg) {
+		printk(KERN_ERR "IOMMU: can't map the region\n");
+		goto error;
+	}
+	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
+	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
+
+	/* the registers might be more than one page */
+	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
+		cap_max_fault_reg_offset(iommu->cap));
+	map_size = PAGE_ALIGN_4K(map_size);
+	if (map_size > PAGE_SIZE_4K) {
+		iounmap(iommu->reg);
+		iommu->reg = ioremap(drhd->reg_base_addr, map_size);
+		if (!iommu->reg) {
+			printk(KERN_ERR "IOMMU: can't map the region\n");
+			goto error;
+		}
+	}
+
+	ver = readl(iommu->reg + DMAR_VER_REG);
+	pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
+		drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
+		iommu->cap, iommu->ecap);
+
+	spin_lock_init(&iommu->register_lock);
+
+	drhd->iommu = iommu;
+	return 0;
+error:
+	kfree(iommu);
+	return -1;
+}
+
+void free_iommu(struct intel_iommu *iommu)
+{
+	if (!iommu)
+		return;
+
+#ifdef CONFIG_DMAR
+	free_dmar_iommu(iommu);
+#endif
+
+	if (iommu->reg)
+		iounmap(iommu->reg);
+	kfree(iommu);
+}
+
+/*
+ * Reclaim all the submitted descriptors which have completed its work.
+ */
+static inline void reclaim_free_desc(struct q_inval *qi)
+{
+	while (qi->desc_status[qi->free_tail] == QI_DONE) {
+		qi->desc_status[qi->free_tail] = QI_FREE;
+		qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
+		qi->free_cnt++;
+	}
+}
+
+/*
+ * Submit the queued invalidation descriptor to the remapping
+ * hardware unit and wait for its completion.
+ */
+void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
+{
+	struct q_inval *qi = iommu->qi;
+	struct qi_desc *hw, wait_desc;
+	int wait_index, index;
+	unsigned long flags;
+
+	if (!qi)
+		return;
+
+	hw = qi->desc;
+
+	spin_lock(&qi->q_lock);
+	while (qi->free_cnt < 3) {
+		spin_unlock(&qi->q_lock);
+		cpu_relax();
+		spin_lock(&qi->q_lock);
+	}
+
+	index = qi->free_head;
+	wait_index = (index + 1) % QI_LENGTH;
+
+	qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
+
+	hw[index] = *desc;
+
+	wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
+	wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
+
+	hw[wait_index] = wait_desc;
+
+	__iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
+	__iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
+
+	qi->free_head = (qi->free_head + 2) % QI_LENGTH;
+	qi->free_cnt -= 2;
+
+	spin_lock_irqsave(&iommu->register_lock, flags);
+	/*
+	 * update the HW tail register indicating the presence of
+	 * new descriptors.
+	 */
+	writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
+	spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+	while (qi->desc_status[wait_index] != QI_DONE) {
+		spin_unlock(&qi->q_lock);
+		cpu_relax();
+		spin_lock(&qi->q_lock);
+	}
+
+	qi->desc_status[index] = QI_DONE;
+
+	reclaim_free_desc(qi);
+	spin_unlock(&qi->q_lock);
+}
+
+/*
+ * Flush the global interrupt entry cache.
+ */
+void qi_global_iec(struct intel_iommu *iommu)
+{
+	struct qi_desc desc;
+
+	desc.low = QI_IEC_TYPE;
+	desc.high = 0;
+
+	qi_submit_sync(&desc, iommu);
+}
+
+/*
+ * Enable Queued Invalidation interface. This is a must to support
+ * interrupt-remapping. Also used by DMA-remapping, which replaces
+ * register based IOTLB invalidation.
+ */
+int dmar_enable_qi(struct intel_iommu *iommu)
+{
+	u32 cmd, sts;
+	unsigned long flags;
+	struct q_inval *qi;
+
+	if (!ecap_qis(iommu->ecap))
+		return -ENOENT;
+
+	/*
+	 * queued invalidation is already setup and enabled.
+	 */
+	if (iommu->qi)
+		return 0;
+
+	iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
+	if (!iommu->qi)
+		return -ENOMEM;
+
+	qi = iommu->qi;
+
+	qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
+	if (!qi->desc) {
+		kfree(qi);
+		iommu->qi = 0;
+		return -ENOMEM;
+	}
+
+	qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
+	if (!qi->desc_status) {
+		free_page((unsigned long) qi->desc);
+		kfree(qi);
+		iommu->qi = 0;
+		return -ENOMEM;
+	}
+
+	qi->free_head = qi->free_tail = 0;
+	qi->free_cnt = QI_LENGTH;
+
+	spin_lock_init(&qi->q_lock);
+
+	spin_lock_irqsave(&iommu->register_lock, flags);
+	/* write zero to the tail reg */
+	writel(0, iommu->reg + DMAR_IQT_REG);
+
+	dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
+
+	cmd = iommu->gcmd | DMA_GCMD_QIE;
+	iommu->gcmd |= DMA_GCMD_QIE;
+	writel(cmd, iommu->reg + DMAR_GCMD_REG);
+
+	/* Make sure hardware complete it */
+	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
+	spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+	return 0;
+}

+ 26 - 161
drivers/pci/intel-iommu.c

@@ -49,8 +49,6 @@
 
 
 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
 
 
-#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
-
 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
 
 
 
 
@@ -58,8 +56,6 @@ static void flush_unmaps_timeout(unsigned long data);
 
 
 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
 
 
-static struct intel_iommu *g_iommus;
-
 #define HIGH_WATER_MARK 250
 #define HIGH_WATER_MARK 250
 struct deferred_flush_tables {
 struct deferred_flush_tables {
 	int next;
 	int next;
@@ -80,7 +76,7 @@ static long list_size;
 
 
 static void domain_remove_dev_info(struct dmar_domain *domain);
 static void domain_remove_dev_info(struct dmar_domain *domain);
 
 
-static int dmar_disabled;
+int dmar_disabled;
 static int __initdata dmar_map_gfx = 1;
 static int __initdata dmar_map_gfx = 1;
 static int dmar_forcedac;
 static int dmar_forcedac;
 static int intel_iommu_strict;
 static int intel_iommu_strict;
@@ -185,13 +181,6 @@ void free_iova_mem(struct iova *iova)
 	kmem_cache_free(iommu_iova_cache, iova);
 	kmem_cache_free(iommu_iova_cache, iova);
 }
 }
 
 
-static inline void __iommu_flush_cache(
-	struct intel_iommu *iommu, void *addr, int size)
-{
-	if (!ecap_coherent(iommu->ecap))
-		clflush_cache_range(addr, size);
-}
-
 /* Gets context entry for a given bus and devfn */
 /* Gets context entry for a given bus and devfn */
 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
 		u8 bus, u8 devfn)
 		u8 bus, u8 devfn)
@@ -488,19 +477,6 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
 	return 0;
 	return 0;
 }
 }
 
 
-#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
-{\
-	cycles_t start_time = get_cycles();\
-	while (1) {\
-		sts = op (iommu->reg + offset);\
-		if (cond)\
-			break;\
-		if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
-			panic("DMAR hardware is malfunctioning\n");\
-		cpu_relax();\
-	}\
-}
-
 static void iommu_set_root_entry(struct intel_iommu *iommu)
 static void iommu_set_root_entry(struct intel_iommu *iommu)
 {
 {
 	void *addr;
 	void *addr;
@@ -990,6 +966,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 
 
+	spin_lock_init(&iommu->lock);
+
 	/*
 	/*
 	 * if Caching mode is set, then invalid translations are tagged
 	 * if Caching mode is set, then invalid translations are tagged
 	 * with domainid 0. Hence we need to pre-allocate it.
 	 * with domainid 0. Hence we need to pre-allocate it.
@@ -998,62 +976,15 @@ static int iommu_init_domains(struct intel_iommu *iommu)
 		set_bit(0, iommu->domain_ids);
 		set_bit(0, iommu->domain_ids);
 	return 0;
 	return 0;
 }
 }
-static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
-					struct dmar_drhd_unit *drhd)
-{
-	int ret;
-	int map_size;
-	u32 ver;
-
-	iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
-	if (!iommu->reg) {
-		printk(KERN_ERR "IOMMU: can't map the region\n");
-		goto error;
-	}
-	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
-	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
-
-	/* the registers might be more than one page */
-	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
-		cap_max_fault_reg_offset(iommu->cap));
-	map_size = PAGE_ALIGN_4K(map_size);
-	if (map_size > PAGE_SIZE_4K) {
-		iounmap(iommu->reg);
-		iommu->reg = ioremap(drhd->reg_base_addr, map_size);
-		if (!iommu->reg) {
-			printk(KERN_ERR "IOMMU: can't map the region\n");
-			goto error;
-		}
-	}
-
-	ver = readl(iommu->reg + DMAR_VER_REG);
-	pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
-		drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
-		iommu->cap, iommu->ecap);
-	ret = iommu_init_domains(iommu);
-	if (ret)
-		goto error_unmap;
-	spin_lock_init(&iommu->lock);
-	spin_lock_init(&iommu->register_lock);
 
 
-	drhd->iommu = iommu;
-	return iommu;
-error_unmap:
-	iounmap(iommu->reg);
-error:
-	kfree(iommu);
-	return NULL;
-}
 
 
 static void domain_exit(struct dmar_domain *domain);
 static void domain_exit(struct dmar_domain *domain);
-static void free_iommu(struct intel_iommu *iommu)
+
+void free_dmar_iommu(struct intel_iommu *iommu)
 {
 {
 	struct dmar_domain *domain;
 	struct dmar_domain *domain;
 	int i;
 	int i;
 
 
-	if (!iommu)
-		return;
-
 	i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
 	i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
 	for (; i < cap_ndoms(iommu->cap); ) {
 	for (; i < cap_ndoms(iommu->cap); ) {
 		domain = iommu->domains[i];
 		domain = iommu->domains[i];
@@ -1078,10 +1009,6 @@ static void free_iommu(struct intel_iommu *iommu)
 
 
 	/* free context mapping */
 	/* free context mapping */
 	free_context_table(iommu);
 	free_context_table(iommu);
-
-	if (iommu->reg)
-		iounmap(iommu->reg);
-	kfree(iommu);
 }
 }
 
 
 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
@@ -1426,37 +1353,6 @@ find_domain(struct pci_dev *pdev)
 	return NULL;
 	return NULL;
 }
 }
 
 
-static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
-     struct pci_dev *dev)
-{
-	int index;
-
-	while (dev) {
-		for (index = 0; index < cnt; index++)
-			if (dev == devices[index])
-				return 1;
-
-		/* Check our parent */
-		dev = dev->bus->self;
-	}
-
-	return 0;
-}
-
-static struct dmar_drhd_unit *
-dmar_find_matched_drhd_unit(struct pci_dev *dev)
-{
-	struct dmar_drhd_unit *drhd = NULL;
-
-	list_for_each_entry(drhd, &dmar_drhd_units, list) {
-		if (drhd->include_all || dmar_pci_device_match(drhd->devices,
-						drhd->devices_cnt, dev))
-			return drhd;
-	}
-
-	return NULL;
-}
-
 /* domain is initialized */
 /* domain is initialized */
 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
 {
 {
@@ -1729,8 +1625,6 @@ int __init init_dmars(void)
 	 * endfor
 	 * endfor
 	 */
 	 */
 	for_each_drhd_unit(drhd) {
 	for_each_drhd_unit(drhd) {
-		if (drhd->ignored)
-			continue;
 		g_num_of_iommus++;
 		g_num_of_iommus++;
 		/*
 		/*
 		 * lock not needed as this is only incremented in the single
 		 * lock not needed as this is only incremented in the single
@@ -1739,12 +1633,6 @@ int __init init_dmars(void)
 		 */
 		 */
 	}
 	}
 
 
-	g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
-	if (!g_iommus) {
-		ret = -ENOMEM;
-		goto error;
-	}
-
 	deferred_flush = kzalloc(g_num_of_iommus *
 	deferred_flush = kzalloc(g_num_of_iommus *
 		sizeof(struct deferred_flush_tables), GFP_KERNEL);
 		sizeof(struct deferred_flush_tables), GFP_KERNEL);
 	if (!deferred_flush) {
 	if (!deferred_flush) {
@@ -1752,16 +1640,15 @@ int __init init_dmars(void)
 		goto error;
 		goto error;
 	}
 	}
 
 
-	i = 0;
 	for_each_drhd_unit(drhd) {
 	for_each_drhd_unit(drhd) {
 		if (drhd->ignored)
 		if (drhd->ignored)
 			continue;
 			continue;
-		iommu = alloc_iommu(&g_iommus[i], drhd);
-		i++;
-		if (!iommu) {
-			ret = -ENOMEM;
+
+		iommu = drhd->iommu;
+
+		ret = iommu_init_domains(iommu);
+		if (ret)
 			goto error;
 			goto error;
-		}
 
 
 		/*
 		/*
 		 * TBD:
 		 * TBD:
@@ -1845,7 +1732,6 @@ int __init init_dmars(void)
 		iommu = drhd->iommu;
 		iommu = drhd->iommu;
 		free_iommu(iommu);
 		free_iommu(iommu);
 	}
 	}
-	kfree(g_iommus);
 	return ret;
 	return ret;
 }
 }
 
 
@@ -2002,7 +1888,10 @@ static void flush_unmaps(void)
 	/* just flush them all */
 	/* just flush them all */
 	for (i = 0; i < g_num_of_iommus; i++) {
 	for (i = 0; i < g_num_of_iommus; i++) {
 		if (deferred_flush[i].next) {
 		if (deferred_flush[i].next) {
-			iommu_flush_iotlb_global(&g_iommus[i], 0);
+			struct intel_iommu *iommu =
+				deferred_flush[i].domain[0]->iommu;
+
+			iommu_flush_iotlb_global(iommu, 0);
 			for (j = 0; j < deferred_flush[i].next; j++) {
 			for (j = 0; j < deferred_flush[i].next; j++) {
 				__free_iova(&deferred_flush[i].domain[j]->iovad,
 				__free_iova(&deferred_flush[i].domain[j]->iovad,
 						deferred_flush[i].iova[j]);
 						deferred_flush[i].iova[j]);
@@ -2032,7 +1921,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
 	if (list_size == HIGH_WATER_MARK)
 	if (list_size == HIGH_WATER_MARK)
 		flush_unmaps();
 		flush_unmaps();
 
 
-	iommu_id = dom->iommu - g_iommus;
+	iommu_id = dom->iommu->seq_id;
+
 	next = deferred_flush[iommu_id].next;
 	next = deferred_flush[iommu_id].next;
 	deferred_flush[iommu_id].domain[next] = dom;
 	deferred_flush[iommu_id].domain[next] = dom;
 	deferred_flush[iommu_id].iova[next] = iova;
 	deferred_flush[iommu_id].iova[next] = iova;
@@ -2348,38 +2238,6 @@ static void __init iommu_exit_mempool(void)
 
 
 }
 }
 
 
-static int blacklist_iommu(const struct dmi_system_id *id)
-{
-	printk(KERN_INFO "%s detected; disabling IOMMU\n",
-	       id->ident);
-	dmar_disabled = 1;
-	return 0;
-}
-
-static struct dmi_system_id __initdata intel_iommu_dmi_table[] = {
-	{	/* Some DG33BU BIOS revisions advertised non-existent VT-d */
-		.callback = blacklist_iommu,
-		.ident = "Intel DG33BU",
-		{	DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"),
-			DMI_MATCH(DMI_BOARD_NAME, "DG33BU"),
-		}
-	},
-	{ }
-};
-
-
-void __init detect_intel_iommu(void)
-{
-	if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
-		return;
-	if (early_dmar_detect()) {
-		dmi_check_system(intel_iommu_dmi_table);
-		if (dmar_disabled)
-			return;
-		iommu_detected = 1;
-	}
-}
-
 static void __init init_no_remapping_devices(void)
 static void __init init_no_remapping_devices(void)
 {
 {
 	struct dmar_drhd_unit *drhd;
 	struct dmar_drhd_unit *drhd;
@@ -2426,12 +2284,19 @@ int __init intel_iommu_init(void)
 {
 {
 	int ret = 0;
 	int ret = 0;
 
 
-	if (no_iommu || swiotlb || dmar_disabled)
-		return -ENODEV;
-
 	if (dmar_table_init())
 	if (dmar_table_init())
 		return 	-ENODEV;
 		return 	-ENODEV;
 
 
+	if (dmar_dev_scope_init())
+		return 	-ENODEV;
+
+	/*
+	 * Check the need for DMA-remapping initialization now.
+	 * Above initialization will also be used by Interrupt-remapping.
+	 */
+	if (no_iommu || swiotlb || dmar_disabled)
+		return -ENODEV;
+
 	iommu_init_mempool();
 	iommu_init_mempool();
 	dmar_init_reserved_ranges();
 	dmar_init_reserved_ranges();
 
 

+ 98 - 135
drivers/pci/intel-iommu.h

@@ -27,19 +27,8 @@
 #include <linux/sysdev.h>
 #include <linux/sysdev.h>
 #include "iova.h"
 #include "iova.h"
 #include <linux/io.h>
 #include <linux/io.h>
-
-/*
- * We need a fixed PAGE_SIZE of 4K irrespective of
- * arch PAGE_SIZE for IOMMU page tables.
- */
-#define PAGE_SHIFT_4K		(12)
-#define PAGE_SIZE_4K		(1UL << PAGE_SHIFT_4K)
-#define PAGE_MASK_4K		(((u64)-1) << PAGE_SHIFT_4K)
-#define PAGE_ALIGN_4K(addr)	(((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
-
-#define IOVA_PFN(addr)		((addr) >> PAGE_SHIFT_4K)
-#define DMA_32BIT_PFN		IOVA_PFN(DMA_32BIT_MASK)
-#define DMA_64BIT_PFN		IOVA_PFN(DMA_64BIT_MASK)
+#include <asm/cacheflush.h>
+#include "dma_remapping.h"
 
 
 /*
 /*
  * Intel IOMMU register specification per version 1.0 public spec.
  * Intel IOMMU register specification per version 1.0 public spec.
@@ -63,6 +52,11 @@
 #define	DMAR_PLMLIMIT_REG 0x6c	/* PMRR low limit */
 #define	DMAR_PLMLIMIT_REG 0x6c	/* PMRR low limit */
 #define	DMAR_PHMBASE_REG 0x70	/* pmrr high base addr */
 #define	DMAR_PHMBASE_REG 0x70	/* pmrr high base addr */
 #define	DMAR_PHMLIMIT_REG 0x78	/* pmrr high limit */
 #define	DMAR_PHMLIMIT_REG 0x78	/* pmrr high limit */
+#define DMAR_IQH_REG	0x80	/* Invalidation queue head register */
+#define DMAR_IQT_REG	0x88	/* Invalidation queue tail register */
+#define DMAR_IQA_REG	0x90	/* Invalidation queue addr register */
+#define DMAR_ICS_REG	0x98	/* Invalidation complete status register */
+#define DMAR_IRTA_REG	0xb8    /* Interrupt remapping table addr register */
 
 
 #define OFFSET_STRIDE		(9)
 #define OFFSET_STRIDE		(9)
 /*
 /*
@@ -126,6 +120,10 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define ecap_max_iotlb_offset(e) \
 #define ecap_max_iotlb_offset(e) \
 	(ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
 	(ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
 #define ecap_coherent(e)	((e) & 0x1)
 #define ecap_coherent(e)	((e) & 0x1)
+#define ecap_qis(e)		((e) & 0x2)
+#define ecap_eim_support(e)	((e >> 4) & 0x1)
+#define ecap_ir_support(e)	((e >> 3) & 0x1)
+#define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
 
 
 
 
 /* IOTLB_REG */
 /* IOTLB_REG */
@@ -141,6 +139,17 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
 #define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
 #define DMA_TLB_MAX_SIZE (0x3f)
 #define DMA_TLB_MAX_SIZE (0x3f)
 
 
+/* INVALID_DESC */
+#define DMA_ID_TLB_GLOBAL_FLUSH	(((u64)1) << 3)
+#define DMA_ID_TLB_DSI_FLUSH	(((u64)2) << 3)
+#define DMA_ID_TLB_PSI_FLUSH	(((u64)3) << 3)
+#define DMA_ID_TLB_READ_DRAIN	(((u64)1) << 7)
+#define DMA_ID_TLB_WRITE_DRAIN	(((u64)1) << 6)
+#define DMA_ID_TLB_DID(id)	(((u64)((id & 0xffff) << 16)))
+#define DMA_ID_TLB_IH_NONLEAF	(((u64)1) << 6)
+#define DMA_ID_TLB_ADDR(addr)	(addr)
+#define DMA_ID_TLB_ADDR_MASK(mask)	(mask)
+
 /* PMEN_REG */
 /* PMEN_REG */
 #define DMA_PMEN_EPM (((u32)1)<<31)
 #define DMA_PMEN_EPM (((u32)1)<<31)
 #define DMA_PMEN_PRS (((u32)1)<<0)
 #define DMA_PMEN_PRS (((u32)1)<<0)
@@ -151,6 +160,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define DMA_GCMD_SFL (((u32)1) << 29)
 #define DMA_GCMD_SFL (((u32)1) << 29)
 #define DMA_GCMD_EAFL (((u32)1) << 28)
 #define DMA_GCMD_EAFL (((u32)1) << 28)
 #define DMA_GCMD_WBF (((u32)1) << 27)
 #define DMA_GCMD_WBF (((u32)1) << 27)
+#define DMA_GCMD_QIE (((u32)1) << 26)
+#define DMA_GCMD_SIRTP (((u32)1) << 24)
+#define DMA_GCMD_IRE (((u32) 1) << 25)
 
 
 /* GSTS_REG */
 /* GSTS_REG */
 #define DMA_GSTS_TES (((u32)1) << 31)
 #define DMA_GSTS_TES (((u32)1) << 31)
@@ -158,6 +170,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define DMA_GSTS_FLS (((u32)1) << 29)
 #define DMA_GSTS_FLS (((u32)1) << 29)
 #define DMA_GSTS_AFLS (((u32)1) << 28)
 #define DMA_GSTS_AFLS (((u32)1) << 28)
 #define DMA_GSTS_WBFS (((u32)1) << 27)
 #define DMA_GSTS_WBFS (((u32)1) << 27)
+#define DMA_GSTS_QIES (((u32)1) << 26)
+#define DMA_GSTS_IRTPS (((u32)1) << 24)
+#define DMA_GSTS_IRES (((u32)1) << 25)
 
 
 /* CCMD_REG */
 /* CCMD_REG */
 #define DMA_CCMD_ICC (((u64)1) << 63)
 #define DMA_CCMD_ICC (((u64)1) << 63)
@@ -187,158 +202,106 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define dma_frcd_source_id(c) (c & 0xffff)
 #define dma_frcd_source_id(c) (c & 0xffff)
 #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
 #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
 
 
-/*
- * 0: Present
- * 1-11: Reserved
- * 12-63: Context Ptr (12 - (haw-1))
- * 64-127: Reserved
- */
-struct root_entry {
-	u64	val;
-	u64	rsvd1;
-};
-#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
-static inline bool root_present(struct root_entry *root)
-{
-	return (root->val & 1);
-}
-static inline void set_root_present(struct root_entry *root)
-{
-	root->val |= 1;
-}
-static inline void set_root_value(struct root_entry *root, unsigned long value)
-{
-	root->val |= value & PAGE_MASK_4K;
+#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
+
+#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
+{\
+	cycles_t start_time = get_cycles();\
+	while (1) {\
+		sts = op (iommu->reg + offset);\
+		if (cond)\
+			break;\
+		if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
+			panic("DMAR hardware is malfunctioning\n");\
+		cpu_relax();\
+	}\
 }
 }
 
 
-struct context_entry;
-static inline struct context_entry *
-get_context_addr_from_root(struct root_entry *root)
-{
-	return (struct context_entry *)
-		(root_present(root)?phys_to_virt(
-		root->val & PAGE_MASK_4K):
-		NULL);
-}
-
-/*
- * low 64 bits:
- * 0: present
- * 1: fault processing disable
- * 2-3: translation type
- * 12-63: address space root
- * high 64 bits:
- * 0-2: address width
- * 3-6: aval
- * 8-23: domain id
- */
-struct context_entry {
-	u64 lo;
-	u64 hi;
-};
-#define context_present(c) ((c).lo & 1)
-#define context_fault_disable(c) (((c).lo >> 1) & 1)
-#define context_translation_type(c) (((c).lo >> 2) & 3)
-#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
-#define context_address_width(c) ((c).hi &  7)
-#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
-
-#define context_set_present(c) do {(c).lo |= 1;} while (0)
-#define context_set_fault_enable(c) \
-	do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
-#define context_set_translation_type(c, val) \
-	do { \
-		(c).lo &= (((u64)-1) << 4) | 3; \
-		(c).lo |= ((val) & 3) << 2; \
-	} while (0)
-#define CONTEXT_TT_MULTI_LEVEL 0
-#define context_set_address_root(c, val) \
-	do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
-#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
-#define context_set_domain_id(c, val) \
-	do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
-#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+#define QI_LENGTH	256	/* queue length */
 
 
-/*
- * 0: readable
- * 1: writable
- * 2-6: reserved
- * 7: super page
- * 8-11: available
- * 12-63: Host physcial address
- */
-struct dma_pte {
-	u64 val;
+enum {
+	QI_FREE,
+	QI_IN_USE,
+	QI_DONE
 };
 };
-#define dma_clear_pte(p)	do {(p).val = 0;} while (0)
-
-#define DMA_PTE_READ (1)
-#define DMA_PTE_WRITE (2)
 
 
-#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
-#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
-#define dma_set_pte_prot(p, prot) \
-		do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
-#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
-#define dma_set_pte_addr(p, addr) do {\
-		(p).val |= ((addr) & PAGE_MASK_4K); } while (0)
-#define dma_pte_present(p) (((p).val & 3) != 0)
+#define QI_CC_TYPE		0x1
+#define QI_IOTLB_TYPE		0x2
+#define QI_DIOTLB_TYPE		0x3
+#define QI_IEC_TYPE		0x4
+#define QI_IWD_TYPE		0x5
 
 
-struct intel_iommu;
+#define QI_IEC_SELECTIVE	(((u64)1) << 4)
+#define QI_IEC_IIDEX(idx)	(((u64)(idx & 0xffff) << 32))
+#define QI_IEC_IM(m)		(((u64)(m & 0x1f) << 27))
 
 
-struct dmar_domain {
-	int	id;			/* domain id */
-	struct intel_iommu *iommu;	/* back pointer to owning iommu */
+#define QI_IWD_STATUS_DATA(d)	(((u64)d) << 32)
+#define QI_IWD_STATUS_WRITE	(((u64)1) << 5)
 
 
-	struct list_head devices; 	/* all devices' list */
-	struct iova_domain iovad;	/* iova's that belong to this domain */
+struct qi_desc {
+	u64 low, high;
+};
 
 
-	struct dma_pte	*pgd;		/* virtual address */
-	spinlock_t	mapping_lock;	/* page table lock */
-	int		gaw;		/* max guest address width */
+struct q_inval {
+	spinlock_t      q_lock;
+	struct qi_desc  *desc;          /* invalidation queue */
+	int             *desc_status;   /* desc status */
+	int             free_head;      /* first free entry */
+	int             free_tail;      /* last free entry */
+	int             free_cnt;
+};
 
 
-	/* adjusted guest address width, 0 is level 2 30-bit */
-	int		agaw;
+#ifdef CONFIG_INTR_REMAP
+/* 1MB - maximum possible interrupt remapping table size */
+#define INTR_REMAP_PAGE_ORDER	8
+#define INTR_REMAP_TABLE_REG_SIZE	0xf
 
 
-#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
-	int		flags;
-};
+#define INTR_REMAP_TABLE_ENTRIES	65536
 
 
-/* PCI domain-device relationship */
-struct device_domain_info {
-	struct list_head link;	/* link to domain siblings */
-	struct list_head global; /* link to global list */
-	u8 bus;			/* PCI bus numer */
-	u8 devfn;		/* PCI devfn number */
-	struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
-	struct dmar_domain *domain; /* pointer to domain */
+struct ir_table {
+	struct irte *base;
 };
 };
-
-extern int init_dmars(void);
+#endif
 
 
 struct intel_iommu {
 struct intel_iommu {
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
 	void __iomem	*reg; /* Pointer to hardware regs, virtual addr */
 	u64		cap;
 	u64		cap;
 	u64		ecap;
 	u64		ecap;
-	unsigned long 	*domain_ids; /* bitmap of domains */
-	struct dmar_domain **domains; /* ptr to domains */
 	int		seg;
 	int		seg;
 	u32		gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
 	u32		gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
-	spinlock_t	lock; /* protect context, domain ids */
 	spinlock_t	register_lock; /* protect register handling */
 	spinlock_t	register_lock; /* protect register handling */
+	int		seq_id;	/* sequence id of the iommu */
+
+#ifdef CONFIG_DMAR
+	unsigned long 	*domain_ids; /* bitmap of domains */
+	struct dmar_domain **domains; /* ptr to domains */
+	spinlock_t	lock; /* protect context, domain ids */
 	struct root_entry *root_entry; /* virtual address */
 	struct root_entry *root_entry; /* virtual address */
 
 
 	unsigned int irq;
 	unsigned int irq;
 	unsigned char name[7];    /* Device Name */
 	unsigned char name[7];    /* Device Name */
 	struct msi_msg saved_msg;
 	struct msi_msg saved_msg;
 	struct sys_device sysdev;
 	struct sys_device sysdev;
+#endif
+	struct q_inval  *qi;            /* Queued invalidation info */
+#ifdef CONFIG_INTR_REMAP
+	struct ir_table *ir_table;	/* Interrupt remapping info */
+#endif
 };
 };
 
 
-#ifndef CONFIG_DMAR_GFX_WA
-static inline void iommu_prepare_gfx_mapping(void)
+static inline void __iommu_flush_cache(
+	struct intel_iommu *iommu, void *addr, int size)
 {
 {
-	return;
+	if (!ecap_coherent(iommu->ecap))
+		clflush_cache_range(addr, size);
 }
 }
-#endif /* !CONFIG_DMAR_GFX_WA */
 
 
+extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
+
+extern int alloc_iommu(struct dmar_drhd_unit *drhd);
+extern void free_iommu(struct intel_iommu *iommu);
+extern int dmar_enable_qi(struct intel_iommu *iommu);
+extern void qi_global_iec(struct intel_iommu *iommu);
+
+extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 #endif
 #endif

Некоторые файлы не были показаны из-за большого количества измененных файлов