Jelajahi Sumber

Merge branch 'x86-64'

* x86-64: (83 commits)
  [PATCH] x86_64: x86_64 stack usage debugging
  [PATCH] x86_64: (resend) x86_64 stack overflow debugging
  [PATCH] x86_64: msi_apic.c build fix
  [PATCH] x86_64: i386/x86-64 Add nmi watchdog support for new Intel CPUs
  [PATCH] x86_64: Avoid broadcasting NMI IPIs
  [PATCH] x86_64: fix apic error on bootup
  [PATCH] x86_64: enlarge window for stack growth
  [PATCH] x86_64: Minor string functions optimizations
  [PATCH] x86_64: Move export symbols to their C functions
  [PATCH] x86_64: Standardize i386/x86_64 handling of NMI_VECTOR
  [PATCH] x86_64: Fix modular pc speaker
  [PATCH] x86_64: remove sys32_ni_syscall()
  [PATCH] x86_64: Do not use -ffunction-sections for modules
  [PATCH] x86_64: Add cpu_relax to apic_wait_icr_idle
  [PATCH] x86_64: adjust kstack_depth_to_print default
  [PATCH] i386/x86-64: adjust /proc/interrupts column headings
  [PATCH] x86_64: Fix race in cpu_local_* on preemptible kernels
  [PATCH] x86_64: Fix fast check in safe_smp_processor_id
  [PATCH] x86_64: x86_64 setup.c - printing cmp related boottime information
  [PATCH] i386/x86-64/ia64: Move polling flag into thread_info_status
  ...

Manual resolve of trivial conflict in arch/i386/kernel/Makefile
Linus Torvalds 19 tahun lalu
induk
melakukan
81a07d7588
100 mengubah file dengan 3636 tambahan dan 1125 penghapusan
  1. 21 0
      Documentation/x86_64/boot-options.txt
  2. 13 0
      arch/i386/Kconfig
  3. 7 2
      arch/i386/boot/Makefile
  4. 12 20
      arch/i386/boot/compressed/misc.c
  5. 4 0
      arch/i386/kernel/Makefile
  6. 85 33
      arch/i386/kernel/alternative.c
  7. 14 2
      arch/i386/kernel/apic.c
  8. 3 3
      arch/i386/kernel/apm.c
  9. 10 6
      arch/i386/kernel/cpu/amd.c
  10. 6 0
      arch/i386/kernel/cpu/intel.c
  11. 100 13
      arch/i386/kernel/cpu/intel_cacheinfo.c
  12. 1 6
      arch/i386/kernel/crash.c
  13. 252 11
      arch/i386/kernel/entry.S
  14. 28 21
      arch/i386/kernel/io_apic.c
  15. 1 1
      arch/i386/kernel/irq.c
  16. 65 7
      arch/i386/kernel/nmi.c
  17. 4 4
      arch/i386/kernel/process.c
  18. 11 1
      arch/i386/kernel/smp.c
  19. 1 0
      arch/i386/kernel/smpboot.c
  20. 60 10
      arch/i386/kernel/traps.c
  21. 9 0
      arch/i386/kernel/vmlinux.lds.S
  22. 1 0
      arch/i386/oprofile/op_model_athlon.c
  23. 1 0
      arch/i386/oprofile/op_model_p4.c
  24. 1 0
      arch/i386/oprofile/op_model_ppro.c
  25. 2 2
      arch/ia64/kernel/process.c
  26. 38 13
      arch/x86_64/Kconfig
  27. 17 1
      arch/x86_64/Kconfig.debug
  28. 3 1
      arch/x86_64/Makefile
  29. 7 2
      arch/x86_64/boot/Makefile
  30. 23 23
      arch/x86_64/boot/compressed/misc.c
  31. 2 4
      arch/x86_64/boot/tools/build.c
  32. 134 25
      arch/x86_64/defconfig
  33. 0 1
      arch/x86_64/ia32/fpu32.c
  34. 0 2
      arch/x86_64/ia32/ia32_signal.c
  35. 5 6
      arch/x86_64/ia32/ia32entry.S
  36. 35 8
      arch/x86_64/ia32/ptrace32.c
  37. 1 24
      arch/x86_64/ia32/sys_ia32.c
  38. 6 2
      arch/x86_64/kernel/Makefile
  39. 11 15
      arch/x86_64/kernel/aperture.c
  40. 20 12
      arch/x86_64/kernel/apic.c
  41. 2 2
      arch/x86_64/kernel/crash.c
  42. 1 1
      arch/x86_64/kernel/e820.c
  43. 86 27
      arch/x86_64/kernel/entry.S
  44. 17 13
      arch/x86_64/kernel/genapic_flat.c
  45. 0 2
      arch/x86_64/kernel/head64.c
  46. 7 7
      arch/x86_64/kernel/i8259.c
  47. 25 20
      arch/x86_64/kernel/io_apic.c
  48. 28 2
      arch/x86_64/kernel/irq.c
  49. 118 0
      arch/x86_64/kernel/k8.c
  50. 1 1
      arch/x86_64/kernel/mce.c
  51. 323 183
      arch/x86_64/kernel/mce_amd.c
  52. 25 13
      arch/x86_64/kernel/module.c
  53. 76 13
      arch/x86_64/kernel/nmi.c
  54. 1018 0
      arch/x86_64/kernel/pci-calgary.c
  55. 48 7
      arch/x86_64/kernel/pci-dma.c
  56. 54 101
      arch/x86_64/kernel/pci-gart.c
  57. 6 3
      arch/x86_64/kernel/pci-nommu.c
  58. 1 1
      arch/x86_64/kernel/pci-swiotlb.c
  59. 9 7
      arch/x86_64/kernel/process.c
  60. 1 0
      arch/x86_64/kernel/reboot.c
  61. 55 125
      arch/x86_64/kernel/setup.c
  62. 2 1
      arch/x86_64/kernel/setup64.c
  63. 0 3
      arch/x86_64/kernel/signal.c
  64. 7 3
      arch/x86_64/kernel/smp.c
  65. 14 9
      arch/x86_64/kernel/smpboot.c
  66. 202 0
      arch/x86_64/kernel/tce.c
  67. 41 46
      arch/x86_64/kernel/time.c
  68. 72 11
      arch/x86_64/kernel/traps.c
  69. 29 0
      arch/x86_64/kernel/vmlinux.lds.S
  70. 2 2
      arch/x86_64/kernel/vsyscall.c
  71. 5 109
      arch/x86_64/kernel/x8664_ksyms.c
  72. 1 0
      arch/x86_64/lib/csum-partial.c
  73. 1 0
      arch/x86_64/lib/csum-wrappers.c
  74. 5 0
      arch/x86_64/lib/delay.c
  75. 3 1
      arch/x86_64/lib/memmove.c
  76. 11 2
      arch/x86_64/lib/usercopy.c
  77. 5 3
      arch/x86_64/mm/fault.c
  78. 22 26
      arch/x86_64/mm/init.c
  79. 5 0
      arch/x86_64/mm/ioremap.c
  80. 5 5
      arch/x86_64/pci/k8-bus.c
  81. 6 6
      drivers/acpi/processor_idle.c
  82. 2 2
      drivers/char/agp/Kconfig
  83. 27 54
      drivers/char/agp/amd64-agp.c
  84. 1 0
      drivers/pci/msi-apic.c
  85. 4 1
      drivers/scsi/aacraid/comminit.c
  86. 15 1
      fs/compat.c
  87. 2 0
      include/asm-i386/alternative.h
  88. 0 12
      include/asm-i386/apic.h
  89. 1 0
      include/asm-i386/cpufeature.h
  90. 54 0
      include/asm-i386/dwarf2.h
  91. 2 0
      include/asm-i386/hw_irq.h
  92. 19 0
      include/asm-i386/intel_arch_perfmon.h
  93. 1 0
      include/asm-i386/k8.h
  94. 20 6
      include/asm-i386/local.h
  95. 5 2
      include/asm-i386/mach-default/mach_ipi.h
  96. 21 7
      include/asm-i386/nmi.h
  97. 2 1
      include/asm-i386/processor.h
  98. 4 3
      include/asm-i386/thread_info.h
  99. 98 0
      include/asm-i386/unwind.h
  100. 5 0
      include/asm-ia64/thread_info.h

+ 21 - 0
Documentation/x86_64/boot-options.txt

@@ -205,6 +205,27 @@ IOMMU
   pages  Prereserve that many 128K pages for the software IO bounce buffering.
   pages  Prereserve that many 128K pages for the software IO bounce buffering.
   force  Force all IO through the software TLB.
   force  Force all IO through the software TLB.
 
 
+  calgary=[64k,128k,256k,512k,1M,2M,4M,8M]
+  calgary=[translate_empty_slots]
+  calgary=[disable=<PCI bus number>]
+
+    64k,...,8M - Set the size of each PCI slot's translation table
+    when using the Calgary IOMMU. This is the size of the translation
+    table itself in main memory. The smallest table, 64k, covers an IO
+    space of 32MB; the largest, 8MB table, can cover an IO space of
+    4GB. Normally the kernel will make the right choice by itself.
+
+    translate_empty_slots - Enable translation even on slots that have
+    no devices attached to them, in case a device will be hotplugged
+    in the future.
+
+    disable=<PCI bus number> - Disable translation on a given PHB. For
+    example, the built-in graphics adapter resides on the first bridge
+    (PCI bus number 0); if translation (isolation) is enabled on this
+    bridge, X servers that access the hardware directly from user
+    space might stop working. Use this option if you have devices that
+    are accessed from userspace directly on some PCI host bridge.
+
 Debugging
 Debugging
 
 
   oops=panic Always panic on oopses. Default is to just kill the process,
   oops=panic Always panic on oopses. Default is to just kill the process,

+ 13 - 0
arch/i386/Kconfig

@@ -328,6 +328,15 @@ config X86_MCE_P4THERMAL
 	  Enabling this feature will cause a message to be printed when the P4
 	  Enabling this feature will cause a message to be printed when the P4
 	  enters thermal throttling.
 	  enters thermal throttling.
 
 
+config VM86
+	default y
+	bool "Enable VM86 support" if EMBEDDED
+	help
+          This option is required by programs like DOSEMU to run 16-bit legacy
+	  code on X86 processors. It also may be needed by software like
+          XFree86 to initialize some video cards via BIOS. Disabling this
+          option saves about 6k.
+
 config TOSHIBA
 config TOSHIBA
 	tristate "Toshiba Laptop support"
 	tristate "Toshiba Laptop support"
 	---help---
 	---help---
@@ -1068,6 +1077,10 @@ config SCx200HR_TIMER
 	  processor goes idle (as is done by the scheduler).  The
 	  processor goes idle (as is done by the scheduler).  The
 	  other workaround is idle=poll boot option.
 	  other workaround is idle=poll boot option.
 
 
+config K8_NB
+	def_bool y
+	depends on AGP_AMD64
+
 source "drivers/pcmcia/Kconfig"
 source "drivers/pcmcia/Kconfig"
 
 
 source "drivers/pci/hotplug/Kconfig"
 source "drivers/pci/hotplug/Kconfig"

+ 7 - 2
arch/i386/boot/Makefile

@@ -109,8 +109,13 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
 isoimage: $(BOOTIMAGE)
 isoimage: $(BOOTIMAGE)
 	-rm -rf $(obj)/isoimage
 	-rm -rf $(obj)/isoimage
 	mkdir $(obj)/isoimage
 	mkdir $(obj)/isoimage
-	cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \
-		$(obj)/isoimage
+	for i in lib lib64 share end ; do \
+		if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
+			cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
+			break ; \
+		fi ; \
+		if [ $$i = end ] ; then exit 1 ; fi ; \
+	done
 	cp $(BOOTIMAGE) $(obj)/isoimage/linux
 	cp $(BOOTIMAGE) $(obj)/isoimage/linux
 	echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
 	echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
 	if [ -f '$(FDINITRD)' ] ; then \
 	if [ -f '$(FDINITRD)' ] ; then \

+ 12 - 20
arch/i386/boot/compressed/misc.c

@@ -24,14 +24,6 @@
 
 
 #undef memset
 #undef memset
 #undef memcpy
 #undef memcpy
-
-/*
- * Why do we do this? Don't ask me..
- *
- * Incomprehensible are the ways of bootloaders.
- */
-static void* memset(void *, int, size_t);
-static void* memcpy(void *, __const void *, size_t);
 #define memzero(s, n)     memset ((s), 0, (n))
 #define memzero(s, n)     memset ((s), 0, (n))
 
 
 typedef unsigned char  uch;
 typedef unsigned char  uch;
@@ -93,7 +85,7 @@ static unsigned char *real_mode; /* Pointer to real-mode data */
 #endif
 #endif
 #define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
 #define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
 
 
-extern char input_data[];
+extern unsigned char input_data[];
 extern int input_len;
 extern int input_len;
 
 
 static long bytes_out = 0;
 static long bytes_out = 0;
@@ -103,6 +95,9 @@ static unsigned long output_ptr = 0;
 static void *malloc(int size);
 static void *malloc(int size);
 static void free(void *where);
 static void free(void *where);
 
 
+static void *memset(void *s, int c, unsigned n);
+static void *memcpy(void *dest, const void *src, unsigned n);
+
 static void putstr(const char *);
 static void putstr(const char *);
 
 
 extern int end;
 extern int end;
@@ -205,7 +200,7 @@ static void putstr(const char *s)
 	outb_p(0xff & (pos >> 1), vidport+1);
 	outb_p(0xff & (pos >> 1), vidport+1);
 }
 }
 
 
-static void* memset(void* s, int c, size_t n)
+static void* memset(void* s, int c, unsigned n)
 {
 {
 	int i;
 	int i;
 	char *ss = (char*)s;
 	char *ss = (char*)s;
@@ -214,14 +209,13 @@ static void* memset(void* s, int c, size_t n)
 	return s;
 	return s;
 }
 }
 
 
-static void* memcpy(void* __dest, __const void* __src,
-			    size_t __n)
+static void* memcpy(void* dest, const void* src, unsigned n)
 {
 {
 	int i;
 	int i;
-	char *d = (char *)__dest, *s = (char *)__src;
+	char *d = (char *)dest, *s = (char *)src;
 
 
-	for (i=0;i<__n;i++) d[i] = s[i];
-	return __dest;
+	for (i=0;i<n;i++) d[i] = s[i];
+	return dest;
 }
 }
 
 
 /* ===========================================================================
 /* ===========================================================================
@@ -309,7 +303,7 @@ static void setup_normal_output_buffer(void)
 #else
 #else
 	if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
 	if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
 #endif
 #endif
-	output_data = (char *)__PHYSICAL_START; /* Normally Points to 1M */
+	output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
 	free_mem_end_ptr = (long)real_mode;
 	free_mem_end_ptr = (long)real_mode;
 }
 }
 
 
@@ -324,11 +318,9 @@ static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
 #ifdef STANDARD_MEMORY_BIOS_CALL
 #ifdef STANDARD_MEMORY_BIOS_CALL
 	if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
 	if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
 #else
 #else
-	if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) <
-			(3*1024))
-		error("Less than 4MB of memory");
+	if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
 #endif	
 #endif	
-	mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START;
+	mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
 	low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
 	low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
 	  ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
 	  ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
 	low_buffer_size = low_buffer_end - LOW_BUFFER_START;
 	low_buffer_size = low_buffer_end - LOW_BUFFER_START;

+ 4 - 0
arch/i386/kernel/Makefile

@@ -37,6 +37,7 @@ obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault.o
 obj-$(CONFIG_VM86)		+= vm86.o
 obj-$(CONFIG_VM86)		+= vm86.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 obj-$(CONFIG_HPET_TIMER) 	+= hpet.o
 obj-$(CONFIG_HPET_TIMER) 	+= hpet.o
+obj-$(CONFIG_K8_NB)		+= k8.o
 
 
 EXTRA_AFLAGS   := -traditional
 EXTRA_AFLAGS   := -traditional
 
 
@@ -76,3 +77,6 @@ SYSCFLAGS_vsyscall-syms.o = -r
 $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
 $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
 			$(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
 			$(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
 	$(call if_changed,syscall)
 	$(call if_changed,syscall)
+
+k8-y                      += ../../x86_64/kernel/k8.o
+

+ 85 - 33
arch/i386/kernel/alternative.c

@@ -4,27 +4,41 @@
 #include <asm/alternative.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
 #include <asm/sections.h>
 
 
-#define DEBUG 0
-#if DEBUG
-# define DPRINTK(fmt, args...) printk(fmt, args)
-#else
-# define DPRINTK(fmt, args...)
-#endif
+static int no_replacement    = 0;
+static int smp_alt_once      = 0;
+static int debug_alternative = 0;
+
+static int __init noreplacement_setup(char *s)
+{
+	no_replacement = 1;
+	return 1;
+}
+static int __init bootonly(char *str)
+{
+	smp_alt_once = 1;
+	return 1;
+}
+static int __init debug_alt(char *str)
+{
+	debug_alternative = 1;
+	return 1;
+}
 
 
+__setup("noreplacement", noreplacement_setup);
+__setup("smp-alt-boot", bootonly);
+__setup("debug-alternative", debug_alt);
+
+#define DPRINTK(fmt, args...) if (debug_alternative) \
+	printk(KERN_DEBUG fmt, args)
+
+#ifdef GENERIC_NOP1
 /* Use inline assembly to define this because the nops are defined
 /* Use inline assembly to define this because the nops are defined
    as inline assembly strings in the include files and we cannot
    as inline assembly strings in the include files and we cannot
    get them easily into strings. */
    get them easily into strings. */
 asm("\t.data\nintelnops: "
 asm("\t.data\nintelnops: "
 	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
 	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
 	GENERIC_NOP7 GENERIC_NOP8);
 	GENERIC_NOP7 GENERIC_NOP8);
-asm("\t.data\nk8nops: "
-	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
-	K8_NOP7 K8_NOP8);
-asm("\t.data\nk7nops: "
-	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
-	K7_NOP7 K7_NOP8);
-
-extern unsigned char intelnops[], k8nops[], k7nops[];
+extern unsigned char intelnops[];
 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
 static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
 	NULL,
 	NULL,
 	intelnops,
 	intelnops,
@@ -36,6 +50,13 @@ static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
 	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
 	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
 	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 };
 };
+#endif
+
+#ifdef K8_NOP1
+asm("\t.data\nk8nops: "
+	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
+	K8_NOP7 K8_NOP8);
+extern unsigned char k8nops[];
 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
 static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
 	NULL,
 	NULL,
 	k8nops,
 	k8nops,
@@ -47,6 +68,13 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
 	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
 	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
 	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 };
 };
+#endif
+
+#ifdef K7_NOP1
+asm("\t.data\nk7nops: "
+	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
+	K7_NOP7 K7_NOP8);
+extern unsigned char k7nops[];
 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
 static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
 	NULL,
 	NULL,
 	k7nops,
 	k7nops,
@@ -58,6 +86,18 @@ static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
 	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
 	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
 	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
 };
 };
+#endif
+
+#ifdef CONFIG_X86_64
+
+extern char __vsyscall_0;
+static inline unsigned char** find_nop_table(void)
+{
+	return k8_nops;
+}
+
+#else /* CONFIG_X86_64 */
+
 static struct nop {
 static struct nop {
 	int cpuid;
 	int cpuid;
 	unsigned char **noptable;
 	unsigned char **noptable;
@@ -67,14 +107,6 @@ static struct nop {
 	{ -1, NULL }
 	{ -1, NULL }
 };
 };
 
 
-
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
-extern u8 *__smp_locks[], *__smp_locks_end[];
-
-extern u8 __smp_alt_begin[], __smp_alt_end[];
-
-
 static unsigned char** find_nop_table(void)
 static unsigned char** find_nop_table(void)
 {
 {
 	unsigned char **noptable = intel_nops;
 	unsigned char **noptable = intel_nops;
@@ -89,6 +121,14 @@ static unsigned char** find_nop_table(void)
 	return noptable;
 	return noptable;
 }
 }
 
 
+#endif /* CONFIG_X86_64 */
+
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
+extern u8 *__smp_locks[], *__smp_locks_end[];
+
+extern u8 __smp_alt_begin[], __smp_alt_end[];
+
 /* Replace instructions with better alternatives for this CPU type.
 /* Replace instructions with better alternatives for this CPU type.
    This runs before SMP is initialized to avoid SMP problems with
    This runs before SMP is initialized to avoid SMP problems with
    self modifying code. This implies that assymetric systems where
    self modifying code. This implies that assymetric systems where
@@ -99,6 +139,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 {
 {
 	unsigned char **noptable = find_nop_table();
 	unsigned char **noptable = find_nop_table();
 	struct alt_instr *a;
 	struct alt_instr *a;
+	u8 *instr;
 	int diff, i, k;
 	int diff, i, k;
 
 
 	DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
 	DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
@@ -106,7 +147,16 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 		BUG_ON(a->replacementlen > a->instrlen);
 		BUG_ON(a->replacementlen > a->instrlen);
 		if (!boot_cpu_has(a->cpuid))
 		if (!boot_cpu_has(a->cpuid))
 			continue;
 			continue;
-		memcpy(a->instr, a->replacement, a->replacementlen);
+		instr = a->instr;
+#ifdef CONFIG_X86_64
+		/* vsyscall code is not mapped yet. resolve it manually. */
+		if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
+			instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
+			DPRINTK("%s: vsyscall fixup: %p => %p\n",
+				__FUNCTION__, a->instr, instr);
+		}
+#endif
+		memcpy(instr, a->replacement, a->replacementlen);
 		diff = a->instrlen - a->replacementlen;
 		diff = a->instrlen - a->replacementlen;
 		/* Pad the rest with nops */
 		/* Pad the rest with nops */
 		for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
 		for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
@@ -186,14 +236,6 @@ struct smp_alt_module {
 static LIST_HEAD(smp_alt_modules);
 static LIST_HEAD(smp_alt_modules);
 static DEFINE_SPINLOCK(smp_alt);
 static DEFINE_SPINLOCK(smp_alt);
 
 
-static int smp_alt_once = 0;
-static int __init bootonly(char *str)
-{
-	smp_alt_once = 1;
-	return 1;
-}
-__setup("smp-alt-boot", bootonly);
-
 void alternatives_smp_module_add(struct module *mod, char *name,
 void alternatives_smp_module_add(struct module *mod, char *name,
 				 void *locks, void *locks_end,
 				 void *locks, void *locks_end,
 				 void *text,  void *text_end)
 				 void *text,  void *text_end)
@@ -201,6 +243,9 @@ void alternatives_smp_module_add(struct module *mod, char *name,
 	struct smp_alt_module *smp;
 	struct smp_alt_module *smp;
 	unsigned long flags;
 	unsigned long flags;
 
 
+	if (no_replacement)
+		return;
+
 	if (smp_alt_once) {
 	if (smp_alt_once) {
 		if (boot_cpu_has(X86_FEATURE_UP))
 		if (boot_cpu_has(X86_FEATURE_UP))
 			alternatives_smp_unlock(locks, locks_end,
 			alternatives_smp_unlock(locks, locks_end,
@@ -235,7 +280,7 @@ void alternatives_smp_module_del(struct module *mod)
 	struct smp_alt_module *item;
 	struct smp_alt_module *item;
 	unsigned long flags;
 	unsigned long flags;
 
 
-	if (smp_alt_once)
+	if (no_replacement || smp_alt_once)
 		return;
 		return;
 
 
 	spin_lock_irqsave(&smp_alt, flags);
 	spin_lock_irqsave(&smp_alt, flags);
@@ -256,7 +301,7 @@ void alternatives_smp_switch(int smp)
 	struct smp_alt_module *mod;
 	struct smp_alt_module *mod;
 	unsigned long flags;
 	unsigned long flags;
 
 
-	if (smp_alt_once)
+	if (no_replacement || smp_alt_once)
 		return;
 		return;
 	BUG_ON(!smp && (num_online_cpus() > 1));
 	BUG_ON(!smp && (num_online_cpus() > 1));
 
 
@@ -285,6 +330,13 @@ void alternatives_smp_switch(int smp)
 
 
 void __init alternative_instructions(void)
 void __init alternative_instructions(void)
 {
 {
+	if (no_replacement) {
+		printk(KERN_INFO "(SMP-)alternatives turned off\n");
+		free_init_pages("SMP alternatives",
+				(unsigned long)__smp_alt_begin,
+				(unsigned long)__smp_alt_end);
+		return;
+	}
 	apply_alternatives(__alt_instructions, __alt_instructions_end);
 	apply_alternatives(__alt_instructions, __alt_instructions_end);
 
 
 	/* switch to patch-once-at-boottime-only mode and free the
 	/* switch to patch-once-at-boottime-only mode and free the

+ 14 - 2
arch/i386/kernel/apic.c

@@ -36,6 +36,7 @@
 #include <asm/arch_hooks.h>
 #include <asm/arch_hooks.h>
 #include <asm/hpet.h>
 #include <asm/hpet.h>
 #include <asm/i8253.h>
 #include <asm/i8253.h>
+#include <asm/nmi.h>
 
 
 #include <mach_apic.h>
 #include <mach_apic.h>
 #include <mach_apicdef.h>
 #include <mach_apicdef.h>
@@ -156,7 +157,7 @@ void clear_local_APIC(void)
 	maxlvt = get_maxlvt();
 	maxlvt = get_maxlvt();
 
 
 	/*
 	/*
-	 * Masking an LVT entry on a P6 can trigger a local APIC error
+	 * Masking an LVT entry can trigger a local APIC error
 	 * if the vector is zero. Mask LVTERR first to prevent this.
 	 * if the vector is zero. Mask LVTERR first to prevent this.
 	 */
 	 */
 	if (maxlvt >= 3) {
 	if (maxlvt >= 3) {
@@ -1117,7 +1118,18 @@ void disable_APIC_timer(void)
 		unsigned long v;
 		unsigned long v;
 
 
 		v = apic_read(APIC_LVTT);
 		v = apic_read(APIC_LVTT);
-		apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+		/*
+		 * When an illegal vector value (0-15) is written to an LVT
+		 * entry and delivery mode is Fixed, the APIC may signal an
+		 * illegal vector error, with out regard to whether the mask
+		 * bit is set or whether an interrupt is actually seen on input.
+		 *
+		 * Boot sequence might call this function when the LVTT has
+		 * '0' vector value. So make sure vector field is set to
+		 * valid value.
+		 */
+		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+		apic_write_around(APIC_LVTT, v);
 	}
 	}
 }
 }
 
 

+ 3 - 3
arch/i386/kernel/apm.c

@@ -764,9 +764,9 @@ static int apm_do_idle(void)
 	int	idled = 0;
 	int	idled = 0;
 	int	polling;
 	int	polling;
 
 
-	polling = test_thread_flag(TIF_POLLING_NRFLAG);
+	polling = !!(current_thread_info()->status & TS_POLLING);
 	if (polling) {
 	if (polling) {
-		clear_thread_flag(TIF_POLLING_NRFLAG);
+		current_thread_info()->status &= ~TS_POLLING;
 		smp_mb__after_clear_bit();
 		smp_mb__after_clear_bit();
 	}
 	}
 	if (!need_resched()) {
 	if (!need_resched()) {
@@ -774,7 +774,7 @@ static int apm_do_idle(void)
 		ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax);
 		ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax);
 	}
 	}
 	if (polling)
 	if (polling)
-		set_thread_flag(TIF_POLLING_NRFLAG);
+		current_thread_info()->status |= TS_POLLING;
 
 
 	if (!idled)
 	if (!idled)
 		return 0;
 		return 0;

+ 10 - 6
arch/i386/kernel/cpu/amd.c

@@ -224,15 +224,17 @@ static void __init init_amd(struct cpuinfo_x86 *c)
 
 
 #ifdef CONFIG_X86_HT
 #ifdef CONFIG_X86_HT
 	/*
 	/*
-	 * On a AMD dual core setup the lower bits of the APIC id
-	 * distingush the cores.  Assumes number of cores is a power
-	 * of two.
+	 * On a AMD multi core setup the lower bits of the APIC id
+	 * distingush the cores.
 	 */
 	 */
 	if (c->x86_max_cores > 1) {
 	if (c->x86_max_cores > 1) {
 		int cpu = smp_processor_id();
 		int cpu = smp_processor_id();
-		unsigned bits = 0;
-		while ((1 << bits) < c->x86_max_cores)
-			bits++;
+		unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
+
+		if (bits == 0) {
+			while ((1 << bits) < c->x86_max_cores)
+				bits++;
+		}
 		cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
 		cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<<bits)-1);
 		phys_proc_id[cpu] >>= bits;
 		phys_proc_id[cpu] >>= bits;
 		printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
 		printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
@@ -240,6 +242,8 @@ static void __init init_amd(struct cpuinfo_x86 *c)
 	}
 	}
 #endif
 #endif
 
 
+	if (cpuid_eax(0x80000000) >= 0x80000006)
+		num_cache_leaves = 3;
 }
 }
 
 
 static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
 static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)

+ 6 - 0
arch/i386/kernel/cpu/intel.c

@@ -122,6 +122,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 
 
 	select_idle_routine(c);
 	select_idle_routine(c);
 	l2 = init_intel_cacheinfo(c);
 	l2 = init_intel_cacheinfo(c);
+	if (c->cpuid_level > 9 ) {
+		unsigned eax = cpuid_eax(10);
+		/* Check for version and the number of counters */
+		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
+			set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
+	}
 
 
 	/* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
 	/* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
 	if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
 	if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)

+ 100 - 13
arch/i386/kernel/cpu/intel_cacheinfo.c

@@ -4,6 +4,7 @@
  *      Changes:
  *      Changes:
  *      Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
  *      Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
  *		Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
  *		Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
+ *	Andi Kleen		: CPUID4 emulation on AMD.
  */
  */
 
 
 #include <linux/init.h>
 #include <linux/init.h>
@@ -130,25 +131,111 @@ struct _cpuid4_info {
 	cpumask_t shared_cpu_map;
 	cpumask_t shared_cpu_map;
 };
 };
 
 
-static unsigned short			num_cache_leaves;
+unsigned short			num_cache_leaves;
+
+/* AMD doesn't have CPUID4. Emulate it here to report the same
+   information to the user.  This makes some assumptions about the machine:
+   No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs.
+
+   In theory the TLBs could be reported as fake type (they are in "dummy").
+   Maybe later */
+union l1_cache {
+	struct {
+		unsigned line_size : 8;
+		unsigned lines_per_tag : 8;
+		unsigned assoc : 8;
+		unsigned size_in_kb : 8;
+	};
+	unsigned val;
+};
+
+union l2_cache {
+	struct {
+		unsigned line_size : 8;
+		unsigned lines_per_tag : 4;
+		unsigned assoc : 4;
+		unsigned size_in_kb : 16;
+	};
+	unsigned val;
+};
+
+static unsigned short assocs[] = {
+	[1] = 1, [2] = 2, [4] = 4, [6] = 8,
+	[8] = 16,
+	[0xf] = 0xffff // ??
+	};
+static unsigned char levels[] = { 1, 1, 2 };
+static unsigned char types[] = { 1, 2, 3 };
+
+static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
+		       union _cpuid4_leaf_ebx *ebx,
+		       union _cpuid4_leaf_ecx *ecx)
+{
+	unsigned dummy;
+	unsigned line_size, lines_per_tag, assoc, size_in_kb;
+	union l1_cache l1i, l1d;
+	union l2_cache l2;
+
+	eax->full = 0;
+	ebx->full = 0;
+	ecx->full = 0;
+
+	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
+	cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy);
+
+	if (leaf > 2 || !l1d.val || !l1i.val || !l2.val)
+		return;
+
+	eax->split.is_self_initializing = 1;
+	eax->split.type = types[leaf];
+	eax->split.level = levels[leaf];
+	eax->split.num_threads_sharing = 0;
+	eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
+
+	if (leaf <= 1) {
+		union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
+		assoc = l1->assoc;
+		line_size = l1->line_size;
+		lines_per_tag = l1->lines_per_tag;
+		size_in_kb = l1->size_in_kb;
+	} else {
+		assoc = l2.assoc;
+		line_size = l2.line_size;
+		lines_per_tag = l2.lines_per_tag;
+		/* cpu_data has errata corrections for K7 applied */
+		size_in_kb = current_cpu_data.x86_cache_size;
+	}
+
+	if (assoc == 0xf)
+		eax->split.is_fully_associative = 1;
+	ebx->split.coherency_line_size = line_size - 1;
+	ebx->split.ways_of_associativity = assocs[assoc] - 1;
+	ebx->split.physical_line_partition = lines_per_tag - 1;
+	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
+		(ebx->split.ways_of_associativity + 1) - 1;
+}
 
 
 static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
 static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
 {
 {
-	unsigned int		eax, ebx, ecx, edx;
-	union _cpuid4_leaf_eax	cache_eax;
+	union _cpuid4_leaf_eax 	eax;
+	union _cpuid4_leaf_ebx 	ebx;
+	union _cpuid4_leaf_ecx 	ecx;
+	unsigned		edx;
 
 
-	cpuid_count(4, index, &eax, &ebx, &ecx, &edx);
-	cache_eax.full = eax;
-	if (cache_eax.split.type == CACHE_TYPE_NULL)
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		amd_cpuid4(index, &eax, &ebx, &ecx);
+	else
+		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full,  &edx);
+	if (eax.split.type == CACHE_TYPE_NULL)
 		return -EIO; /* better error ? */
 		return -EIO; /* better error ? */
 
 
-	this_leaf->eax.full = eax;
-	this_leaf->ebx.full = ebx;
-	this_leaf->ecx.full = ecx;
-	this_leaf->size = (this_leaf->ecx.split.number_of_sets + 1) *
-		(this_leaf->ebx.split.coherency_line_size + 1) *
-		(this_leaf->ebx.split.physical_line_partition + 1) *
-		(this_leaf->ebx.split.ways_of_associativity + 1);
+	this_leaf->eax = eax;
+	this_leaf->ebx = ebx;
+	this_leaf->ecx = ecx;
+	this_leaf->size = (ecx.split.number_of_sets + 1) *
+		(ebx.split.coherency_line_size + 1) *
+		(ebx.split.physical_line_partition + 1) *
+		(ebx.split.ways_of_associativity + 1);
 	return 0;
 	return 0;
 }
 }
 
 

+ 1 - 6
arch/i386/kernel/crash.c

@@ -120,14 +120,9 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
 	return 1;
 	return 1;
 }
 }
 
 
-/*
- * By using the NMI code instead of a vector we just sneak thru the
- * word generator coming out with just what we want.  AND it does
- * not matter if clustered_apic_mode is set or not.
- */
 static void smp_send_nmi_allbutself(void)
 static void smp_send_nmi_allbutself(void)
 {
 {
-	send_IPI_allbutself(APIC_DM_NMI);
+	send_IPI_allbutself(NMI_VECTOR);
 }
 }
 
 
 static void nmi_shootdown_cpus(void)
 static void nmi_shootdown_cpus(void)

+ 252 - 11
arch/i386/kernel/entry.S

@@ -48,6 +48,7 @@
 #include <asm/smp.h>
 #include <asm/smp.h>
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/desc.h>
 #include <asm/desc.h>
+#include <asm/dwarf2.h>
 #include "irq_vectors.h"
 #include "irq_vectors.h"
 
 
 #define nr_syscalls ((syscall_table_size)/4)
 #define nr_syscalls ((syscall_table_size)/4)
@@ -85,31 +86,67 @@ VM_MASK		= 0x00020000
 #define SAVE_ALL \
 #define SAVE_ALL \
 	cld; \
 	cld; \
 	pushl %es; \
 	pushl %es; \
+	CFI_ADJUST_CFA_OFFSET 4;\
+	/*CFI_REL_OFFSET es, 0;*/\
 	pushl %ds; \
 	pushl %ds; \
+	CFI_ADJUST_CFA_OFFSET 4;\
+	/*CFI_REL_OFFSET ds, 0;*/\
 	pushl %eax; \
 	pushl %eax; \
+	CFI_ADJUST_CFA_OFFSET 4;\
+	CFI_REL_OFFSET eax, 0;\
 	pushl %ebp; \
 	pushl %ebp; \
+	CFI_ADJUST_CFA_OFFSET 4;\
+	CFI_REL_OFFSET ebp, 0;\
 	pushl %edi; \
 	pushl %edi; \
+	CFI_ADJUST_CFA_OFFSET 4;\
+	CFI_REL_OFFSET edi, 0;\
 	pushl %esi; \
 	pushl %esi; \
+	CFI_ADJUST_CFA_OFFSET 4;\
+	CFI_REL_OFFSET esi, 0;\
 	pushl %edx; \
 	pushl %edx; \
+	CFI_ADJUST_CFA_OFFSET 4;\
+	CFI_REL_OFFSET edx, 0;\
 	pushl %ecx; \
 	pushl %ecx; \
+	CFI_ADJUST_CFA_OFFSET 4;\
+	CFI_REL_OFFSET ecx, 0;\
 	pushl %ebx; \
 	pushl %ebx; \
+	CFI_ADJUST_CFA_OFFSET 4;\
+	CFI_REL_OFFSET ebx, 0;\
 	movl $(__USER_DS), %edx; \
 	movl $(__USER_DS), %edx; \
 	movl %edx, %ds; \
 	movl %edx, %ds; \
 	movl %edx, %es;
 	movl %edx, %es;
 
 
 #define RESTORE_INT_REGS \
 #define RESTORE_INT_REGS \
 	popl %ebx;	\
 	popl %ebx;	\
+	CFI_ADJUST_CFA_OFFSET -4;\
+	CFI_RESTORE ebx;\
 	popl %ecx;	\
 	popl %ecx;	\
+	CFI_ADJUST_CFA_OFFSET -4;\
+	CFI_RESTORE ecx;\
 	popl %edx;	\
 	popl %edx;	\
+	CFI_ADJUST_CFA_OFFSET -4;\
+	CFI_RESTORE edx;\
 	popl %esi;	\
 	popl %esi;	\
+	CFI_ADJUST_CFA_OFFSET -4;\
+	CFI_RESTORE esi;\
 	popl %edi;	\
 	popl %edi;	\
+	CFI_ADJUST_CFA_OFFSET -4;\
+	CFI_RESTORE edi;\
 	popl %ebp;	\
 	popl %ebp;	\
-	popl %eax
+	CFI_ADJUST_CFA_OFFSET -4;\
+	CFI_RESTORE ebp;\
+	popl %eax;	\
+	CFI_ADJUST_CFA_OFFSET -4;\
+	CFI_RESTORE eax
 
 
 #define RESTORE_REGS	\
 #define RESTORE_REGS	\
 	RESTORE_INT_REGS; \
 	RESTORE_INT_REGS; \
 1:	popl %ds;	\
 1:	popl %ds;	\
+	CFI_ADJUST_CFA_OFFSET -4;\
+	/*CFI_RESTORE ds;*/\
 2:	popl %es;	\
 2:	popl %es;	\
+	CFI_ADJUST_CFA_OFFSET -4;\
+	/*CFI_RESTORE es;*/\
 .section .fixup,"ax";	\
 .section .fixup,"ax";	\
 3:	movl $0,(%esp);	\
 3:	movl $0,(%esp);	\
 	jmp 1b;		\
 	jmp 1b;		\
@@ -122,13 +159,43 @@ VM_MASK		= 0x00020000
 	.long 2b,4b;	\
 	.long 2b,4b;	\
 .previous
 .previous
 
 
+#define RING0_INT_FRAME \
+	CFI_STARTPROC simple;\
+	CFI_DEF_CFA esp, 3*4;\
+	/*CFI_OFFSET cs, -2*4;*/\
+	CFI_OFFSET eip, -3*4
+
+#define RING0_EC_FRAME \
+	CFI_STARTPROC simple;\
+	CFI_DEF_CFA esp, 4*4;\
+	/*CFI_OFFSET cs, -2*4;*/\
+	CFI_OFFSET eip, -3*4
+
+#define RING0_PTREGS_FRAME \
+	CFI_STARTPROC simple;\
+	CFI_DEF_CFA esp, OLDESP-EBX;\
+	/*CFI_OFFSET cs, CS-OLDESP;*/\
+	CFI_OFFSET eip, EIP-OLDESP;\
+	/*CFI_OFFSET es, ES-OLDESP;*/\
+	/*CFI_OFFSET ds, DS-OLDESP;*/\
+	CFI_OFFSET eax, EAX-OLDESP;\
+	CFI_OFFSET ebp, EBP-OLDESP;\
+	CFI_OFFSET edi, EDI-OLDESP;\
+	CFI_OFFSET esi, ESI-OLDESP;\
+	CFI_OFFSET edx, EDX-OLDESP;\
+	CFI_OFFSET ecx, ECX-OLDESP;\
+	CFI_OFFSET ebx, EBX-OLDESP
 
 
 ENTRY(ret_from_fork)
 ENTRY(ret_from_fork)
+	CFI_STARTPROC
 	pushl %eax
 	pushl %eax
+	CFI_ADJUST_CFA_OFFSET -4
 	call schedule_tail
 	call schedule_tail
 	GET_THREAD_INFO(%ebp)
 	GET_THREAD_INFO(%ebp)
 	popl %eax
 	popl %eax
+	CFI_ADJUST_CFA_OFFSET -4
 	jmp syscall_exit
 	jmp syscall_exit
+	CFI_ENDPROC
 
 
 /*
 /*
  * Return to user mode is not as complex as all this looks,
  * Return to user mode is not as complex as all this looks,
@@ -139,6 +206,7 @@ ENTRY(ret_from_fork)
 
 
 	# userspace resumption stub bypassing syscall exit tracing
 	# userspace resumption stub bypassing syscall exit tracing
 	ALIGN
 	ALIGN
+	RING0_PTREGS_FRAME
 ret_from_exception:
 ret_from_exception:
 	preempt_stop
 	preempt_stop
 ret_from_intr:
 ret_from_intr:
@@ -171,20 +239,33 @@ need_resched:
 	call preempt_schedule_irq
 	call preempt_schedule_irq
 	jmp need_resched
 	jmp need_resched
 #endif
 #endif
+	CFI_ENDPROC
 
 
 /* SYSENTER_RETURN points to after the "sysenter" instruction in
 /* SYSENTER_RETURN points to after the "sysenter" instruction in
    the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
    the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
 
 
 	# sysenter call handler stub
 	# sysenter call handler stub
 ENTRY(sysenter_entry)
 ENTRY(sysenter_entry)
+	CFI_STARTPROC simple
+	CFI_DEF_CFA esp, 0
+	CFI_REGISTER esp, ebp
 	movl TSS_sysenter_esp0(%esp),%esp
 	movl TSS_sysenter_esp0(%esp),%esp
 sysenter_past_esp:
 sysenter_past_esp:
 	sti
 	sti
 	pushl $(__USER_DS)
 	pushl $(__USER_DS)
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET ss, 0*/
 	pushl %ebp
 	pushl %ebp
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET esp, 0
 	pushfl
 	pushfl
+	CFI_ADJUST_CFA_OFFSET 4
 	pushl $(__USER_CS)
 	pushl $(__USER_CS)
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET cs, 0*/
 	pushl $SYSENTER_RETURN
 	pushl $SYSENTER_RETURN
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET eip, 0
 
 
 /*
 /*
  * Load the potential sixth argument from user stack.
  * Load the potential sixth argument from user stack.
@@ -199,6 +280,7 @@ sysenter_past_esp:
 .previous
 .previous
 
 
 	pushl %eax
 	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
 	GET_THREAD_INFO(%ebp)
 
 
@@ -219,11 +301,14 @@ sysenter_past_esp:
 	xorl %ebp,%ebp
 	xorl %ebp,%ebp
 	sti
 	sti
 	sysexit
 	sysexit
+	CFI_ENDPROC
 
 
 
 
 	# system call handler stub
 	# system call handler stub
 ENTRY(system_call)
 ENTRY(system_call)
+	RING0_INT_FRAME			# can't unwind into user space anyway
 	pushl %eax			# save orig_eax
 	pushl %eax			# save orig_eax
+	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
 	GET_THREAD_INFO(%ebp)
 	testl $TF_MASK,EFLAGS(%esp)
 	testl $TF_MASK,EFLAGS(%esp)
@@ -256,10 +341,12 @@ restore_all:
 	movb CS(%esp), %al
 	movb CS(%esp), %al
 	andl $(VM_MASK | (4 << 8) | 3), %eax
 	andl $(VM_MASK | (4 << 8) | 3), %eax
 	cmpl $((4 << 8) | 3), %eax
 	cmpl $((4 << 8) | 3), %eax
+	CFI_REMEMBER_STATE
 	je ldt_ss			# returning to user-space with LDT SS
 	je ldt_ss			# returning to user-space with LDT SS
 restore_nocheck:
 restore_nocheck:
 	RESTORE_REGS
 	RESTORE_REGS
 	addl $4, %esp
 	addl $4, %esp
+	CFI_ADJUST_CFA_OFFSET -4
 1:	iret
 1:	iret
 .section .fixup,"ax"
 .section .fixup,"ax"
 iret_exc:
 iret_exc:
@@ -273,6 +360,7 @@ iret_exc:
 	.long 1b,iret_exc
 	.long 1b,iret_exc
 .previous
 .previous
 
 
+	CFI_RESTORE_STATE
 ldt_ss:
 ldt_ss:
 	larl OLDSS(%esp), %eax
 	larl OLDSS(%esp), %eax
 	jnz restore_nocheck
 	jnz restore_nocheck
@@ -285,11 +373,13 @@ ldt_ss:
 	 * CPUs, which we can try to work around to make
 	 * CPUs, which we can try to work around to make
 	 * dosemu and wine happy. */
 	 * dosemu and wine happy. */
 	subl $8, %esp		# reserve space for switch16 pointer
 	subl $8, %esp		# reserve space for switch16 pointer
+	CFI_ADJUST_CFA_OFFSET 8
 	cli
 	cli
 	movl %esp, %eax
 	movl %esp, %eax
 	/* Set up the 16bit stack frame with switch32 pointer on top,
 	/* Set up the 16bit stack frame with switch32 pointer on top,
 	 * and a switch16 pointer on top of the current frame. */
 	 * and a switch16 pointer on top of the current frame. */
 	call setup_x86_bogus_stack
 	call setup_x86_bogus_stack
+	CFI_ADJUST_CFA_OFFSET -8	# frame has moved
 	RESTORE_REGS
 	RESTORE_REGS
 	lss 20+4(%esp), %esp	# switch to 16bit stack
 	lss 20+4(%esp), %esp	# switch to 16bit stack
 1:	iret
 1:	iret
@@ -297,9 +387,11 @@ ldt_ss:
 	.align 4
 	.align 4
 	.long 1b,iret_exc
 	.long 1b,iret_exc
 .previous
 .previous
+	CFI_ENDPROC
 
 
 	# perform work that needs to be done immediately before resumption
 	# perform work that needs to be done immediately before resumption
 	ALIGN
 	ALIGN
+	RING0_PTREGS_FRAME		# can't unwind into user space anyway
 work_pending:
 work_pending:
 	testb $_TIF_NEED_RESCHED, %cl
 	testb $_TIF_NEED_RESCHED, %cl
 	jz work_notifysig
 	jz work_notifysig
@@ -329,8 +421,10 @@ work_notifysig:				# deal with pending signals and
 work_notifysig_v86:
 work_notifysig_v86:
 #ifdef CONFIG_VM86
 #ifdef CONFIG_VM86
 	pushl %ecx			# save ti_flags for do_notify_resume
 	pushl %ecx			# save ti_flags for do_notify_resume
+	CFI_ADJUST_CFA_OFFSET 4
 	call save_v86_state		# %eax contains pt_regs pointer
 	call save_v86_state		# %eax contains pt_regs pointer
 	popl %ecx
 	popl %ecx
+	CFI_ADJUST_CFA_OFFSET -4
 	movl %eax, %esp
 	movl %eax, %esp
 	xorl %edx, %edx
 	xorl %edx, %edx
 	call do_notify_resume
 	call do_notify_resume
@@ -363,19 +457,21 @@ syscall_exit_work:
 	movl $1, %edx
 	movl $1, %edx
 	call do_syscall_trace
 	call do_syscall_trace
 	jmp resume_userspace
 	jmp resume_userspace
+	CFI_ENDPROC
 
 
-	ALIGN
+	RING0_INT_FRAME			# can't unwind into user space anyway
 syscall_fault:
 syscall_fault:
 	pushl %eax			# save orig_eax
 	pushl %eax			# save orig_eax
+	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
 	GET_THREAD_INFO(%ebp)
 	movl $-EFAULT,EAX(%esp)
 	movl $-EFAULT,EAX(%esp)
 	jmp resume_userspace
 	jmp resume_userspace
 
 
-	ALIGN
 syscall_badsys:
 syscall_badsys:
 	movl $-ENOSYS,EAX(%esp)
 	movl $-ENOSYS,EAX(%esp)
 	jmp resume_userspace
 	jmp resume_userspace
+	CFI_ENDPROC
 
 
 #define FIXUP_ESPFIX_STACK \
 #define FIXUP_ESPFIX_STACK \
 	movl %esp, %eax; \
 	movl %esp, %eax; \
@@ -387,16 +483,21 @@ syscall_badsys:
 	movl %eax, %esp;
 	movl %eax, %esp;
 #define UNWIND_ESPFIX_STACK \
 #define UNWIND_ESPFIX_STACK \
 	pushl %eax; \
 	pushl %eax; \
+	CFI_ADJUST_CFA_OFFSET 4; \
 	movl %ss, %eax; \
 	movl %ss, %eax; \
 	/* see if on 16bit stack */ \
 	/* see if on 16bit stack */ \
 	cmpw $__ESPFIX_SS, %ax; \
 	cmpw $__ESPFIX_SS, %ax; \
-	jne 28f; \
-	movl $__KERNEL_DS, %edx; \
-	movl %edx, %ds; \
-	movl %edx, %es; \
+	je 28f; \
+27:	popl %eax; \
+	CFI_ADJUST_CFA_OFFSET -4; \
+.section .fixup,"ax"; \
+28:	movl $__KERNEL_DS, %eax; \
+	movl %eax, %ds; \
+	movl %eax, %es; \
 	/* switch to 32bit stack */ \
 	/* switch to 32bit stack */ \
-	FIXUP_ESPFIX_STACK \
-28:	popl %eax;
+	FIXUP_ESPFIX_STACK; \
+	jmp 27b; \
+.previous
 
 
 /*
 /*
  * Build the entry stubs and pointer table with
  * Build the entry stubs and pointer table with
@@ -408,9 +509,14 @@ ENTRY(interrupt)
 
 
 vector=0
 vector=0
 ENTRY(irq_entries_start)
 ENTRY(irq_entries_start)
+	RING0_INT_FRAME
 .rept NR_IRQS
 .rept NR_IRQS
 	ALIGN
 	ALIGN
+ .if vector
+	CFI_ADJUST_CFA_OFFSET -4
+ .endif
 1:	pushl $vector-256
 1:	pushl $vector-256
+	CFI_ADJUST_CFA_OFFSET 4
 	jmp common_interrupt
 	jmp common_interrupt
 .data
 .data
 	.long 1b
 	.long 1b
@@ -424,60 +530,99 @@ common_interrupt:
 	movl %esp,%eax
 	movl %esp,%eax
 	call do_IRQ
 	call do_IRQ
 	jmp ret_from_intr
 	jmp ret_from_intr
+	CFI_ENDPROC
 
 
 #define BUILD_INTERRUPT(name, nr)	\
 #define BUILD_INTERRUPT(name, nr)	\
 ENTRY(name)				\
 ENTRY(name)				\
+	RING0_INT_FRAME;		\
 	pushl $nr-256;			\
 	pushl $nr-256;			\
-	SAVE_ALL			\
+	CFI_ADJUST_CFA_OFFSET 4;	\
+	SAVE_ALL;			\
 	movl %esp,%eax;			\
 	movl %esp,%eax;			\
 	call smp_/**/name;		\
 	call smp_/**/name;		\
-	jmp ret_from_intr;
+	jmp ret_from_intr;	\
+	CFI_ENDPROC
 
 
 /* The include is where all of the SMP etc. interrupts come from */
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
 #include "entry_arch.h"
 
 
 ENTRY(divide_error)
 ENTRY(divide_error)
+	RING0_INT_FRAME
 	pushl $0			# no error code
 	pushl $0			# no error code
+	CFI_ADJUST_CFA_OFFSET 4
 	pushl $do_divide_error
 	pushl $do_divide_error
+	CFI_ADJUST_CFA_OFFSET 4
 	ALIGN
 	ALIGN
 error_code:
 error_code:
 	pushl %ds
 	pushl %ds
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET ds, 0*/
 	pushl %eax
 	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET eax, 0
 	xorl %eax, %eax
 	xorl %eax, %eax
 	pushl %ebp
 	pushl %ebp
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ebp, 0
 	pushl %edi
 	pushl %edi
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET edi, 0
 	pushl %esi
 	pushl %esi
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET esi, 0
 	pushl %edx
 	pushl %edx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET edx, 0
 	decl %eax			# eax = -1
 	decl %eax			# eax = -1
 	pushl %ecx
 	pushl %ecx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ecx, 0
 	pushl %ebx
 	pushl %ebx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ebx, 0
 	cld
 	cld
 	pushl %es
 	pushl %es
+	CFI_ADJUST_CFA_OFFSET 4
+	/*CFI_REL_OFFSET es, 0*/
 	UNWIND_ESPFIX_STACK
 	UNWIND_ESPFIX_STACK
 	popl %ecx
 	popl %ecx
+	CFI_ADJUST_CFA_OFFSET -4
+	/*CFI_REGISTER es, ecx*/
 	movl ES(%esp), %edi		# get the function address
 	movl ES(%esp), %edi		# get the function address
 	movl ORIG_EAX(%esp), %edx	# get the error code
 	movl ORIG_EAX(%esp), %edx	# get the error code
 	movl %eax, ORIG_EAX(%esp)
 	movl %eax, ORIG_EAX(%esp)
 	movl %ecx, ES(%esp)
 	movl %ecx, ES(%esp)
+	/*CFI_REL_OFFSET es, ES*/
 	movl $(__USER_DS), %ecx
 	movl $(__USER_DS), %ecx
 	movl %ecx, %ds
 	movl %ecx, %ds
 	movl %ecx, %es
 	movl %ecx, %es
 	movl %esp,%eax			# pt_regs pointer
 	movl %esp,%eax			# pt_regs pointer
 	call *%edi
 	call *%edi
 	jmp ret_from_exception
 	jmp ret_from_exception
+	CFI_ENDPROC
 
 
 ENTRY(coprocessor_error)
 ENTRY(coprocessor_error)
+	RING0_INT_FRAME
 	pushl $0
 	pushl $0
+	CFI_ADJUST_CFA_OFFSET 4
 	pushl $do_coprocessor_error
 	pushl $do_coprocessor_error
+	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
+	CFI_ENDPROC
 
 
 ENTRY(simd_coprocessor_error)
 ENTRY(simd_coprocessor_error)
+	RING0_INT_FRAME
 	pushl $0
 	pushl $0
+	CFI_ADJUST_CFA_OFFSET 4
 	pushl $do_simd_coprocessor_error
 	pushl $do_simd_coprocessor_error
+	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
+	CFI_ENDPROC
 
 
 ENTRY(device_not_available)
 ENTRY(device_not_available)
+	RING0_INT_FRAME
 	pushl $-1			# mark this as an int
 	pushl $-1			# mark this as an int
+	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	SAVE_ALL
 	movl %cr0, %eax
 	movl %cr0, %eax
 	testl $0x4, %eax		# EM (math emulation bit)
 	testl $0x4, %eax		# EM (math emulation bit)
@@ -487,9 +632,12 @@ ENTRY(device_not_available)
 	jmp ret_from_exception
 	jmp ret_from_exception
 device_not_available_emulate:
 device_not_available_emulate:
 	pushl $0			# temporary storage for ORIG_EIP
 	pushl $0			# temporary storage for ORIG_EIP
+	CFI_ADJUST_CFA_OFFSET 4
 	call math_emulate
 	call math_emulate
 	addl $4, %esp
 	addl $4, %esp
+	CFI_ADJUST_CFA_OFFSET -4
 	jmp ret_from_exception
 	jmp ret_from_exception
+	CFI_ENDPROC
 
 
 /*
 /*
  * Debug traps and NMI can happen at the one SYSENTER instruction
  * Debug traps and NMI can happen at the one SYSENTER instruction
@@ -514,16 +662,19 @@ label:						\
 	pushl $sysenter_past_esp
 	pushl $sysenter_past_esp
 
 
 KPROBE_ENTRY(debug)
 KPROBE_ENTRY(debug)
+	RING0_INT_FRAME
 	cmpl $sysenter_entry,(%esp)
 	cmpl $sysenter_entry,(%esp)
 	jne debug_stack_correct
 	jne debug_stack_correct
 	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
 	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
 debug_stack_correct:
 debug_stack_correct:
 	pushl $-1			# mark this as an int
 	pushl $-1			# mark this as an int
+	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	SAVE_ALL
 	xorl %edx,%edx			# error code 0
 	xorl %edx,%edx			# error code 0
 	movl %esp,%eax			# pt_regs pointer
 	movl %esp,%eax			# pt_regs pointer
 	call do_debug
 	call do_debug
 	jmp ret_from_exception
 	jmp ret_from_exception
+	CFI_ENDPROC
 	.previous .text
 	.previous .text
 /*
 /*
  * NMI is doubly nasty. It can happen _while_ we're handling
  * NMI is doubly nasty. It can happen _while_ we're handling
@@ -534,14 +685,18 @@ debug_stack_correct:
  * fault happened on the sysenter path.
  * fault happened on the sysenter path.
  */
  */
 ENTRY(nmi)
 ENTRY(nmi)
+	RING0_INT_FRAME
 	pushl %eax
 	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
 	movl %ss, %eax
 	movl %ss, %eax
 	cmpw $__ESPFIX_SS, %ax
 	cmpw $__ESPFIX_SS, %ax
 	popl %eax
 	popl %eax
+	CFI_ADJUST_CFA_OFFSET -4
 	je nmi_16bit_stack
 	je nmi_16bit_stack
 	cmpl $sysenter_entry,(%esp)
 	cmpl $sysenter_entry,(%esp)
 	je nmi_stack_fixup
 	je nmi_stack_fixup
 	pushl %eax
 	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
 	movl %esp,%eax
 	movl %esp,%eax
 	/* Do not access memory above the end of our stack page,
 	/* Do not access memory above the end of our stack page,
 	 * it might not exist.
 	 * it might not exist.
@@ -549,16 +704,19 @@ ENTRY(nmi)
 	andl $(THREAD_SIZE-1),%eax
 	andl $(THREAD_SIZE-1),%eax
 	cmpl $(THREAD_SIZE-20),%eax
 	cmpl $(THREAD_SIZE-20),%eax
 	popl %eax
 	popl %eax
+	CFI_ADJUST_CFA_OFFSET -4
 	jae nmi_stack_correct
 	jae nmi_stack_correct
 	cmpl $sysenter_entry,12(%esp)
 	cmpl $sysenter_entry,12(%esp)
 	je nmi_debug_stack_check
 	je nmi_debug_stack_check
 nmi_stack_correct:
 nmi_stack_correct:
 	pushl %eax
 	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	SAVE_ALL
 	xorl %edx,%edx		# zero error code
 	xorl %edx,%edx		# zero error code
 	movl %esp,%eax		# pt_regs pointer
 	movl %esp,%eax		# pt_regs pointer
 	call do_nmi
 	call do_nmi
 	jmp restore_all
 	jmp restore_all
+	CFI_ENDPROC
 
 
 nmi_stack_fixup:
 nmi_stack_fixup:
 	FIX_STACK(12,nmi_stack_correct, 1)
 	FIX_STACK(12,nmi_stack_correct, 1)
@@ -574,94 +732,177 @@ nmi_debug_stack_check:
 	jmp nmi_stack_correct
 	jmp nmi_stack_correct
 
 
 nmi_16bit_stack:
 nmi_16bit_stack:
+	RING0_INT_FRAME
 	/* create the pointer to lss back */
 	/* create the pointer to lss back */
 	pushl %ss
 	pushl %ss
+	CFI_ADJUST_CFA_OFFSET 4
 	pushl %esp
 	pushl %esp
+	CFI_ADJUST_CFA_OFFSET 4
 	movzwl %sp, %esp
 	movzwl %sp, %esp
 	addw $4, (%esp)
 	addw $4, (%esp)
 	/* copy the iret frame of 12 bytes */
 	/* copy the iret frame of 12 bytes */
 	.rept 3
 	.rept 3
 	pushl 16(%esp)
 	pushl 16(%esp)
+	CFI_ADJUST_CFA_OFFSET 4
 	.endr
 	.endr
 	pushl %eax
 	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	SAVE_ALL
 	FIXUP_ESPFIX_STACK		# %eax == %esp
 	FIXUP_ESPFIX_STACK		# %eax == %esp
+	CFI_ADJUST_CFA_OFFSET -20	# the frame has now moved
 	xorl %edx,%edx			# zero error code
 	xorl %edx,%edx			# zero error code
 	call do_nmi
 	call do_nmi
 	RESTORE_REGS
 	RESTORE_REGS
 	lss 12+4(%esp), %esp		# back to 16bit stack
 	lss 12+4(%esp), %esp		# back to 16bit stack
 1:	iret
 1:	iret
+	CFI_ENDPROC
 .section __ex_table,"a"
 .section __ex_table,"a"
 	.align 4
 	.align 4
 	.long 1b,iret_exc
 	.long 1b,iret_exc
 .previous
 .previous
 
 
 KPROBE_ENTRY(int3)
 KPROBE_ENTRY(int3)
+	RING0_INT_FRAME
 	pushl $-1			# mark this as an int
 	pushl $-1			# mark this as an int
+	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	SAVE_ALL
 	xorl %edx,%edx		# zero error code
 	xorl %edx,%edx		# zero error code
 	movl %esp,%eax		# pt_regs pointer
 	movl %esp,%eax		# pt_regs pointer
 	call do_int3
 	call do_int3
 	jmp ret_from_exception
 	jmp ret_from_exception
+	CFI_ENDPROC
 	.previous .text
 	.previous .text
 
 
 ENTRY(overflow)
 ENTRY(overflow)
+	RING0_INT_FRAME
 	pushl $0
 	pushl $0
+	CFI_ADJUST_CFA_OFFSET 4
 	pushl $do_overflow
 	pushl $do_overflow
+	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
+	CFI_ENDPROC
 
 
 ENTRY(bounds)
 ENTRY(bounds)
+	RING0_INT_FRAME
 	pushl $0
 	pushl $0
+	CFI_ADJUST_CFA_OFFSET 4
 	pushl $do_bounds
 	pushl $do_bounds
+	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
+	CFI_ENDPROC
 
 
 ENTRY(invalid_op)
 ENTRY(invalid_op)
+	RING0_INT_FRAME
 	pushl $0
 	pushl $0
+	CFI_ADJUST_CFA_OFFSET 4
 	pushl $do_invalid_op
 	pushl $do_invalid_op
+	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
+	CFI_ENDPROC
 
 
 ENTRY(coprocessor_segment_overrun)
 ENTRY(coprocessor_segment_overrun)
+	RING0_INT_FRAME
 	pushl $0
 	pushl $0
+	CFI_ADJUST_CFA_OFFSET 4
 	pushl $do_coprocessor_segment_overrun
 	pushl $do_coprocessor_segment_overrun
+	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
+	CFI_ENDPROC
 
 
 ENTRY(invalid_TSS)
 ENTRY(invalid_TSS)
+	RING0_EC_FRAME
 	pushl $do_invalid_TSS
 	pushl $do_invalid_TSS
+	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
+	CFI_ENDPROC
 
 
 ENTRY(segment_not_present)
 ENTRY(segment_not_present)
+	RING0_EC_FRAME
 	pushl $do_segment_not_present
 	pushl $do_segment_not_present
+	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
+	CFI_ENDPROC
 
 
 ENTRY(stack_segment)
 ENTRY(stack_segment)
+	RING0_EC_FRAME
 	pushl $do_stack_segment
 	pushl $do_stack_segment
+	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
+	CFI_ENDPROC
 
 
 KPROBE_ENTRY(general_protection)
 KPROBE_ENTRY(general_protection)
+	RING0_EC_FRAME
 	pushl $do_general_protection
 	pushl $do_general_protection
+	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
+	CFI_ENDPROC
 	.previous .text
 	.previous .text
 
 
 ENTRY(alignment_check)
 ENTRY(alignment_check)
+	RING0_EC_FRAME
 	pushl $do_alignment_check
 	pushl $do_alignment_check
+	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
+	CFI_ENDPROC
 
 
 KPROBE_ENTRY(page_fault)
 KPROBE_ENTRY(page_fault)
+	RING0_EC_FRAME
 	pushl $do_page_fault
 	pushl $do_page_fault
+	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
+	CFI_ENDPROC
 	.previous .text
 	.previous .text
 
 
 #ifdef CONFIG_X86_MCE
 #ifdef CONFIG_X86_MCE
 ENTRY(machine_check)
 ENTRY(machine_check)
+	RING0_INT_FRAME
 	pushl $0
 	pushl $0
+	CFI_ADJUST_CFA_OFFSET 4
 	pushl machine_check_vector
 	pushl machine_check_vector
+	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
+	CFI_ENDPROC
 #endif
 #endif
 
 
 ENTRY(spurious_interrupt_bug)
 ENTRY(spurious_interrupt_bug)
+	RING0_INT_FRAME
 	pushl $0
 	pushl $0
+	CFI_ADJUST_CFA_OFFSET 4
 	pushl $do_spurious_interrupt_bug
 	pushl $do_spurious_interrupt_bug
+	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
+	CFI_ENDPROC
+
+#ifdef CONFIG_STACK_UNWIND
+ENTRY(arch_unwind_init_running)
+	CFI_STARTPROC
+	movl	4(%esp), %edx
+	movl	(%esp), %ecx
+	leal	4(%esp), %eax
+	movl	%ebx, EBX(%edx)
+	xorl	%ebx, %ebx
+	movl	%ebx, ECX(%edx)
+	movl	%ebx, EDX(%edx)
+	movl	%esi, ESI(%edx)
+	movl	%edi, EDI(%edx)
+	movl	%ebp, EBP(%edx)
+	movl	%ebx, EAX(%edx)
+	movl	$__USER_DS, DS(%edx)
+	movl	$__USER_DS, ES(%edx)
+	movl	%ebx, ORIG_EAX(%edx)
+	movl	%ecx, EIP(%edx)
+	movl	12(%esp), %ecx
+	movl	$__KERNEL_CS, CS(%edx)
+	movl	%ebx, EFLAGS(%edx)
+	movl	%eax, OLDESP(%edx)
+	movl	8(%esp), %eax
+	movl	%ecx, 8(%esp)
+	movl	EBX(%edx), %ebx
+	movl	$__KERNEL_DS, OLDSS(%edx)
+	jmpl	*%eax
+	CFI_ENDPROC
+ENDPROC(arch_unwind_init_running)
+#endif
 
 
 .section .rodata,"a"
 .section .rodata,"a"
 #include "syscall_table.S"
 #include "syscall_table.S"

+ 28 - 21
arch/i386/kernel/io_apic.c

@@ -38,6 +38,7 @@
 #include <asm/desc.h>
 #include <asm/desc.h>
 #include <asm/timer.h>
 #include <asm/timer.h>
 #include <asm/i8259.h>
 #include <asm/i8259.h>
+#include <asm/nmi.h>
 
 
 #include <mach_apic.h>
 #include <mach_apic.h>
 
 
@@ -50,6 +51,7 @@ atomic_t irq_mis_count;
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
 
 static DEFINE_SPINLOCK(ioapic_lock);
 static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
 
 
 int timer_over_8254 __initdata = 1;
 int timer_over_8254 __initdata = 1;
 
 
@@ -1161,10 +1163,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
 int assign_irq_vector(int irq)
 int assign_irq_vector(int irq)
 {
 {
 	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
 	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+	unsigned long flags;
+	int vector;
+
+	BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
 
 
-	BUG_ON(irq >= NR_IRQ_VECTORS);
-	if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
+	spin_lock_irqsave(&vector_lock, flags);
+
+	if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+		spin_unlock_irqrestore(&vector_lock, flags);
 		return IO_APIC_VECTOR(irq);
 		return IO_APIC_VECTOR(irq);
+	}
 next:
 next:
 	current_vector += 8;
 	current_vector += 8;
 	if (current_vector == SYSCALL_VECTOR)
 	if (current_vector == SYSCALL_VECTOR)
@@ -1172,16 +1181,21 @@ next:
 
 
 	if (current_vector >= FIRST_SYSTEM_VECTOR) {
 	if (current_vector >= FIRST_SYSTEM_VECTOR) {
 		offset++;
 		offset++;
-		if (!(offset%8))
+		if (!(offset%8)) {
+			spin_unlock_irqrestore(&vector_lock, flags);
 			return -ENOSPC;
 			return -ENOSPC;
+		}
 		current_vector = FIRST_DEVICE_VECTOR + offset;
 		current_vector = FIRST_DEVICE_VECTOR + offset;
 	}
 	}
 
 
-	vector_irq[current_vector] = irq;
+	vector = current_vector;
+	vector_irq[vector] = irq;
 	if (irq != AUTO_ASSIGN)
 	if (irq != AUTO_ASSIGN)
-		IO_APIC_VECTOR(irq) = current_vector;
+		IO_APIC_VECTOR(irq) = vector;
 
 
-	return current_vector;
+	spin_unlock_irqrestore(&vector_lock, flags);
+
+	return vector;
 }
 }
 
 
 static struct hw_interrupt_type ioapic_level_type;
 static struct hw_interrupt_type ioapic_level_type;
@@ -1193,21 +1207,14 @@ static struct hw_interrupt_type ioapic_edge_type;
 
 
 static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 {
 {
-	if (use_pci_vector() && !platform_legacy_irq(irq)) {
-		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-				trigger == IOAPIC_LEVEL)
-			irq_desc[vector].handler = &ioapic_level_type;
-		else
-			irq_desc[vector].handler = &ioapic_edge_type;
-		set_intr_gate(vector, interrupt[vector]);
-	} else	{
-		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-				trigger == IOAPIC_LEVEL)
-			irq_desc[irq].handler = &ioapic_level_type;
-		else
-			irq_desc[irq].handler = &ioapic_edge_type;
-		set_intr_gate(vector, interrupt[irq]);
-	}
+	unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+
+	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+			trigger == IOAPIC_LEVEL)
+		irq_desc[idx].handler = &ioapic_level_type;
+	else
+		irq_desc[idx].handler = &ioapic_edge_type;
+	set_intr_gate(vector, interrupt[idx]);
 }
 }
 
 
 static void __init setup_IO_APIC_irqs(void)
 static void __init setup_IO_APIC_irqs(void)

+ 1 - 1
arch/i386/kernel/irq.c

@@ -227,7 +227,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	if (i == 0) {
 	if (i == 0) {
 		seq_printf(p, "           ");
 		seq_printf(p, "           ");
 		for_each_online_cpu(j)
 		for_each_online_cpu(j)
-			seq_printf(p, "CPU%d       ",j);
+			seq_printf(p, "CPU%-8d",j);
 		seq_putc(p, '\n');
 		seq_putc(p, '\n');
 	}
 	}
 
 

+ 65 - 7
arch/i386/kernel/nmi.c

@@ -14,21 +14,17 @@
  */
  */
 
 
 #include <linux/config.h>
 #include <linux/config.h>
-#include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/nmi.h>
 #include <linux/nmi.h>
 #include <linux/sysdev.h>
 #include <linux/sysdev.h>
 #include <linux/sysctl.h>
 #include <linux/sysctl.h>
+#include <linux/percpu.h>
 
 
 #include <asm/smp.h>
 #include <asm/smp.h>
-#include <asm/div64.h>
 #include <asm/nmi.h>
 #include <asm/nmi.h>
+#include <asm/intel_arch_perfmon.h>
 
 
 #include "mach_traps.h"
 #include "mach_traps.h"
 
 
@@ -100,6 +96,9 @@ int nmi_active;
 	(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT|	\
 	(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT|	\
 	 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
 	 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
 
 
+#define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK	ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
  * the CPU is idle. To make sure the NMI watchdog really ticks on all
  * the CPU is idle. To make sure the NMI watchdog really ticks on all
@@ -212,6 +211,8 @@ static int __init setup_nmi_watchdog(char *str)
 
 
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 
 
+static void disable_intel_arch_watchdog(void);
+
 static void disable_lapic_nmi_watchdog(void)
 static void disable_lapic_nmi_watchdog(void)
 {
 {
 	if (nmi_active <= 0)
 	if (nmi_active <= 0)
@@ -221,6 +222,10 @@ static void disable_lapic_nmi_watchdog(void)
 		wrmsr(MSR_K7_EVNTSEL0, 0, 0);
 		wrmsr(MSR_K7_EVNTSEL0, 0, 0);
 		break;
 		break;
 	case X86_VENDOR_INTEL:
 	case X86_VENDOR_INTEL:
+		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+			disable_intel_arch_watchdog();
+			break;
+		}
 		switch (boot_cpu_data.x86) {
 		switch (boot_cpu_data.x86) {
 		case 6:
 		case 6:
 			if (boot_cpu_data.x86_model > 0xd)
 			if (boot_cpu_data.x86_model > 0xd)
@@ -449,6 +454,53 @@ static int setup_p4_watchdog(void)
 	return 1;
 	return 1;
 }
 }
 
 
+static void disable_intel_arch_watchdog(void)
+{
+	unsigned ebx;
+
+	/*
+	 * Check whether the Architectural PerfMon supports
+	 * Unhalted Core Cycles Event or not.
+	 * NOTE: Corresponding bit = 0 in ebp indicates event present.
+	 */
+	ebx = cpuid_ebx(10);
+	if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+		wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
+}
+
+static int setup_intel_arch_watchdog(void)
+{
+	unsigned int evntsel;
+	unsigned ebx;
+
+	/*
+	 * Check whether the Architectural PerfMon supports
+	 * Unhalted Core Cycles Event or not.
+	 * NOTE: Corresponding bit = 0 in ebp indicates event present.
+	 */
+	ebx = cpuid_ebx(10);
+	if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+		return 0;
+
+	nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+
+	clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
+	clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
+
+	evntsel = ARCH_PERFMON_EVENTSEL_INT
+		| ARCH_PERFMON_EVENTSEL_OS
+		| ARCH_PERFMON_EVENTSEL_USR
+		| ARCH_PERFMON_NMI_EVENT_SEL
+		| ARCH_PERFMON_NMI_EVENT_UMASK;
+
+	wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+	write_watchdog_counter("INTEL_ARCH_PERFCTR0");
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+	wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+	return 1;
+}
+
 void setup_apic_nmi_watchdog (void)
 void setup_apic_nmi_watchdog (void)
 {
 {
 	switch (boot_cpu_data.x86_vendor) {
 	switch (boot_cpu_data.x86_vendor) {
@@ -458,6 +510,11 @@ void setup_apic_nmi_watchdog (void)
 		setup_k7_watchdog();
 		setup_k7_watchdog();
 		break;
 		break;
 	case X86_VENDOR_INTEL:
 	case X86_VENDOR_INTEL:
+		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+			if (!setup_intel_arch_watchdog())
+				return;
+			break;
+		}
 		switch (boot_cpu_data.x86) {
 		switch (boot_cpu_data.x86) {
 		case 6:
 		case 6:
 			if (boot_cpu_data.x86_model > 0xd)
 			if (boot_cpu_data.x86_model > 0xd)
@@ -561,7 +618,8 @@ void nmi_watchdog_tick (struct pt_regs * regs)
 			wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
 			wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
 			apic_write(APIC_LVTPC, APIC_DM_NMI);
 			apic_write(APIC_LVTPC, APIC_DM_NMI);
 		}
 		}
-		else if (nmi_perfctr_msr == MSR_P6_PERFCTR0) {
+		else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 ||
+		         nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
 			/* Only P6 based Pentium M need to re-unmask
 			/* Only P6 based Pentium M need to re-unmask
 			 * the apic vector but it doesn't hurt
 			 * the apic vector but it doesn't hurt
 			 * other P6 variant */
 			 * other P6 variant */

+ 4 - 4
arch/i386/kernel/process.c

@@ -102,7 +102,7 @@ void default_idle(void)
 	local_irq_enable();
 	local_irq_enable();
 
 
 	if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
 	if (!hlt_counter && boot_cpu_data.hlt_works_ok) {
-		clear_thread_flag(TIF_POLLING_NRFLAG);
+		current_thread_info()->status &= ~TS_POLLING;
 		smp_mb__after_clear_bit();
 		smp_mb__after_clear_bit();
 		while (!need_resched()) {
 		while (!need_resched()) {
 			local_irq_disable();
 			local_irq_disable();
@@ -111,7 +111,7 @@ void default_idle(void)
 			else
 			else
 				local_irq_enable();
 				local_irq_enable();
 		}
 		}
-		set_thread_flag(TIF_POLLING_NRFLAG);
+		current_thread_info()->status |= TS_POLLING;
 	} else {
 	} else {
 		while (!need_resched())
 		while (!need_resched())
 			cpu_relax();
 			cpu_relax();
@@ -174,7 +174,7 @@ void cpu_idle(void)
 {
 {
 	int cpu = smp_processor_id();
 	int cpu = smp_processor_id();
 
 
-	set_thread_flag(TIF_POLLING_NRFLAG);
+	current_thread_info()->status |= TS_POLLING;
 
 
 	/* endless idle loop with no priority at all */
 	/* endless idle loop with no priority at all */
 	while (1) {
 	while (1) {
@@ -312,7 +312,7 @@ void show_regs(struct pt_regs * regs)
 	cr3 = read_cr3();
 	cr3 = read_cr3();
 	cr4 = read_cr4_safe();
 	cr4 = read_cr4_safe();
 	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
 	printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
-	show_trace(NULL, &regs->esp);
+	show_trace(NULL, regs, &regs->esp);
 }
 }
 
 
 /*
 /*

+ 11 - 1
arch/i386/kernel/smp.c

@@ -114,7 +114,17 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_m
 
 
 static inline int __prepare_ICR (unsigned int shortcut, int vector)
 static inline int __prepare_ICR (unsigned int shortcut, int vector)
 {
 {
-	return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
+	unsigned int icr = shortcut | APIC_DEST_LOGICAL;
+
+	switch (vector) {
+	default:
+		icr |= APIC_DM_FIXED | vector;
+		break;
+	case NMI_VECTOR:
+		icr |= APIC_DM_NMI;
+		break;
+	}
+	return icr;
 }
 }
 
 
 static inline int __prepare_ICR2 (unsigned int mask)
 static inline int __prepare_ICR2 (unsigned int mask)

+ 1 - 0
arch/i386/kernel/smpboot.c

@@ -52,6 +52,7 @@
 #include <asm/tlbflush.h>
 #include <asm/tlbflush.h>
 #include <asm/desc.h>
 #include <asm/desc.h>
 #include <asm/arch_hooks.h>
 #include <asm/arch_hooks.h>
+#include <asm/nmi.h>
 
 
 #include <mach_apic.h>
 #include <mach_apic.h>
 #include <mach_wakecpu.h>
 #include <mach_wakecpu.h>

+ 60 - 10
arch/i386/kernel/traps.c

@@ -28,6 +28,7 @@
 #include <linux/utsname.h>
 #include <linux/utsname.h>
 #include <linux/kprobes.h>
 #include <linux/kprobes.h>
 #include <linux/kexec.h>
 #include <linux/kexec.h>
+#include <linux/unwind.h>
 
 
 #ifdef CONFIG_EISA
 #ifdef CONFIG_EISA
 #include <linux/ioport.h>
 #include <linux/ioport.h>
@@ -47,7 +48,7 @@
 #include <asm/desc.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
 #include <asm/i387.h>
 #include <asm/nmi.h>
 #include <asm/nmi.h>
-
+#include <asm/unwind.h>
 #include <asm/smp.h>
 #include <asm/smp.h>
 #include <asm/arch_hooks.h>
 #include <asm/arch_hooks.h>
 #include <asm/kdebug.h>
 #include <asm/kdebug.h>
@@ -92,6 +93,7 @@ asmlinkage void spurious_interrupt_bug(void);
 asmlinkage void machine_check(void);
 asmlinkage void machine_check(void);
 
 
 static int kstack_depth_to_print = 24;
 static int kstack_depth_to_print = 24;
+static int call_trace = 1;
 ATOMIC_NOTIFIER_HEAD(i386die_chain);
 ATOMIC_NOTIFIER_HEAD(i386die_chain);
 
 
 int register_die_notifier(struct notifier_block *nb)
 int register_die_notifier(struct notifier_block *nb)
@@ -170,7 +172,23 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 	return ebp;
 	return ebp;
 }
 }
 
 
-static void show_trace_log_lvl(struct task_struct *task,
+static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
+{
+	int n = 0;
+	int printed = 0; /* nr of entries already printed on current line */
+
+	while (unwind(info) == 0 && UNW_PC(info)) {
+		++n;
+		printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed);
+		if (arch_unw_user_mode(info))
+			break;
+	}
+	if (printed)
+		printk("\n");
+	return n;
+}
+
+static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 			       unsigned long *stack, char *log_lvl)
 			       unsigned long *stack, char *log_lvl)
 {
 {
 	unsigned long ebp;
 	unsigned long ebp;
@@ -178,6 +196,26 @@ static void show_trace_log_lvl(struct task_struct *task,
 	if (!task)
 	if (!task)
 		task = current;
 		task = current;
 
 
+	if (call_trace >= 0) {
+		int unw_ret = 0;
+		struct unwind_frame_info info;
+
+		if (regs) {
+			if (unwind_init_frame_info(&info, task, regs) == 0)
+				unw_ret = show_trace_unwind(&info, log_lvl);
+		} else if (task == current)
+			unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
+		else {
+			if (unwind_init_blocked(&info, task) == 0)
+				unw_ret = show_trace_unwind(&info, log_lvl);
+		}
+		if (unw_ret > 0) {
+			if (call_trace > 0)
+				return;
+			printk("%sLegacy call trace:\n", log_lvl);
+		}
+	}
+
 	if (task == current) {
 	if (task == current) {
 		/* Grab ebp right from our regs */
 		/* Grab ebp right from our regs */
 		asm ("movl %%ebp, %0" : "=r" (ebp) : );
 		asm ("movl %%ebp, %0" : "=r" (ebp) : );
@@ -198,13 +236,13 @@ static void show_trace_log_lvl(struct task_struct *task,
 	}
 	}
 }
 }
 
 
-void show_trace(struct task_struct *task, unsigned long * stack)
+void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
 {
 {
-	show_trace_log_lvl(task, stack, "");
+	show_trace_log_lvl(task, regs, stack, "");
 }
 }
 
 
-static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
-			       char *log_lvl)
+static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+			       unsigned long *esp, char *log_lvl)
 {
 {
 	unsigned long *stack;
 	unsigned long *stack;
 	int i;
 	int i;
@@ -225,13 +263,13 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp,
 		printk("%08lx ", *stack++);
 		printk("%08lx ", *stack++);
 	}
 	}
 	printk("\n%sCall Trace:\n", log_lvl);
 	printk("\n%sCall Trace:\n", log_lvl);
-	show_trace_log_lvl(task, esp, log_lvl);
+	show_trace_log_lvl(task, regs, esp, log_lvl);
 }
 }
 
 
 void show_stack(struct task_struct *task, unsigned long *esp)
 void show_stack(struct task_struct *task, unsigned long *esp)
 {
 {
 	printk("       ");
 	printk("       ");
-	show_stack_log_lvl(task, esp, "");
+	show_stack_log_lvl(task, NULL, esp, "");
 }
 }
 
 
 /*
 /*
@@ -241,7 +279,7 @@ void dump_stack(void)
 {
 {
 	unsigned long stack;
 	unsigned long stack;
 
 
-	show_trace(current, &stack);
+	show_trace(current, NULL, &stack);
 }
 }
 
 
 EXPORT_SYMBOL(dump_stack);
 EXPORT_SYMBOL(dump_stack);
@@ -285,7 +323,7 @@ void show_registers(struct pt_regs *regs)
 		u8 __user *eip;
 		u8 __user *eip;
 
 
 		printk("\n" KERN_EMERG "Stack: ");
 		printk("\n" KERN_EMERG "Stack: ");
-		show_stack_log_lvl(NULL, (unsigned long *)esp, KERN_EMERG);
+		show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
 
 
 		printk(KERN_EMERG "Code: ");
 		printk(KERN_EMERG "Code: ");
 
 
@@ -1215,3 +1253,15 @@ static int __init kstack_setup(char *s)
 	return 1;
 	return 1;
 }
 }
 __setup("kstack=", kstack_setup);
 __setup("kstack=", kstack_setup);
+
+static int __init call_trace_setup(char *s)
+{
+	if (strcmp(s, "old") == 0)
+		call_trace = -1;
+	else if (strcmp(s, "both") == 0)
+		call_trace = 0;
+	else if (strcmp(s, "new") == 0)
+		call_trace = 1;
+	return 1;
+}
+__setup("call_trace=", call_trace_setup);

+ 9 - 0
arch/i386/kernel/vmlinux.lds.S

@@ -71,6 +71,15 @@ SECTIONS
   .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
   .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { *(.data.read_mostly) }
   _edata = .;			/* End of data section */
   _edata = .;			/* End of data section */
 
 
+#ifdef CONFIG_STACK_UNWIND
+  . = ALIGN(4);
+  .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
+	__start_unwind = .;
+  	*(.eh_frame)
+	__end_unwind = .;
+  }
+#endif
+
   . = ALIGN(THREAD_SIZE);	/* init_task */
   . = ALIGN(THREAD_SIZE);	/* init_task */
   .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
   .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
 	*(.data.init_task)
 	*(.data.init_task)

+ 1 - 0
arch/i386/oprofile/op_model_athlon.c

@@ -13,6 +13,7 @@
 #include <linux/oprofile.h>
 #include <linux/oprofile.h>
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
 #include <asm/msr.h>
 #include <asm/msr.h>
+#include <asm/nmi.h>
  
  
 #include "op_x86_model.h"
 #include "op_x86_model.h"
 #include "op_counter.h"
 #include "op_counter.h"

+ 1 - 0
arch/i386/oprofile/op_model_p4.c

@@ -14,6 +14,7 @@
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
 #include <asm/fixmap.h>
 #include <asm/fixmap.h>
 #include <asm/apic.h>
 #include <asm/apic.h>
+#include <asm/nmi.h>
 
 
 #include "op_x86_model.h"
 #include "op_x86_model.h"
 #include "op_counter.h"
 #include "op_counter.h"

+ 1 - 0
arch/i386/oprofile/op_model_ppro.c

@@ -14,6 +14,7 @@
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
 #include <asm/msr.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
 #include <asm/apic.h>
+#include <asm/nmi.h>
  
  
 #include "op_x86_model.h"
 #include "op_x86_model.h"
 #include "op_counter.h"
 #include "op_counter.h"

+ 2 - 2
arch/ia64/kernel/process.c

@@ -272,9 +272,9 @@ cpu_idle (void)
 	/* endless idle loop with no priority at all */
 	/* endless idle loop with no priority at all */
 	while (1) {
 	while (1) {
 		if (can_do_pal_halt)
 		if (can_do_pal_halt)
-			clear_thread_flag(TIF_POLLING_NRFLAG);
+			current_thread_info()->status &= ~TS_POLLING;
 		else
 		else
-			set_thread_flag(TIF_POLLING_NRFLAG);
+			current_thread_info()->status |= TS_POLLING;
 
 
 		if (!need_resched()) {
 		if (!need_resched()) {
 			void (*idle)(void);
 			void (*idle)(void);

+ 38 - 13
arch/x86_64/Kconfig

@@ -386,24 +386,45 @@ config HPET_EMULATE_RTC
 	bool "Provide RTC interrupt"
 	bool "Provide RTC interrupt"
 	depends on HPET_TIMER && RTC=y
 	depends on HPET_TIMER && RTC=y
 
 
-config GART_IOMMU
-	bool "K8 GART IOMMU support"
+# Mark as embedded because too many people got it wrong.
+# The code disables itself when not needed.
+config IOMMU
+	bool "IOMMU support" if EMBEDDED
 	default y
 	default y
 	select SWIOTLB
 	select SWIOTLB
 	select AGP
 	select AGP
 	depends on PCI
 	depends on PCI
 	help
 	help
-	  Support for hardware IOMMU in AMD's Opteron/Athlon64 Processors
-	  and for the bounce buffering software IOMMU.
-	  Needed to run systems with more than 3GB of memory properly with
-	  32-bit PCI devices that do not support DAC (Double Address Cycle).
-	  The IOMMU can be turned off at runtime with the iommu=off parameter.
-  	  Normally the kernel will take the right choice by itself.
-  	  This option includes a driver for the AMD Opteron/Athlon64 IOMMU
-  	  northbridge and a software emulation used on other systems without
-	  hardware IOMMU.  If unsure, say Y.
-
-# need this always selected by GART_IOMMU for the VIA workaround
+	  Support for full DMA access of devices with 32bit memory access only
+	  on systems with more than 3GB. This is usually needed for USB,
+	  sound, many IDE/SATA chipsets and some other devices.
+	  Provides a driver for the AMD Athlon64/Opteron/Turion/Sempron GART
+	  based IOMMU and a software bounce buffer based IOMMU used on Intel
+	  systems and as fallback.
+	  The code is only active when needed (enough memory and limited
+	  device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified
+	  too.
+
+config CALGARY_IOMMU
+	bool "IBM Calgary IOMMU support"
+	default y
+	select SWIOTLB
+	depends on PCI && EXPERIMENTAL
+	help
+	  Support for hardware IOMMUs in IBM's xSeries x366 and x460
+	  systems. Needed to run systems with more than 3GB of memory
+	  properly with 32-bit PCI devices that do not support DAC
+	  (Double Address Cycle). Calgary also supports bus level
+	  isolation, where all DMAs pass through the IOMMU.  This
+	  prevents them from going anywhere except their intended
+	  destination. This catches hard-to-find kernel bugs and
+	  mis-behaving drivers and devices that do not use the DMA-API
+	  properly to set up their DMA buffers.  The IOMMU can be
+	  turned off at boot time with the iommu=off parameter.
+	  Normally the kernel will make the right choice by itself.
+	  If unsure, say Y.
+
+# need this always selected by IOMMU for the VIA workaround
 config SWIOTLB
 config SWIOTLB
 	bool
 	bool
 
 
@@ -501,6 +522,10 @@ config REORDER
          optimal TLB usage. If you have pretty much any version of binutils, 
          optimal TLB usage. If you have pretty much any version of binutils, 
 	 this can increase your kernel build time by roughly one minute.
 	 this can increase your kernel build time by roughly one minute.
 
 
+config K8_NB
+	def_bool y
+	depends on AGP_AMD64 || IOMMU || (PCI && NUMA)
+
 endmenu
 endmenu
 
 
 #
 #

+ 17 - 1
arch/x86_64/Kconfig.debug

@@ -13,7 +13,7 @@ config DEBUG_RODATA
 	 If in doubt, say "N".
 	 If in doubt, say "N".
 
 
 config IOMMU_DEBUG
 config IOMMU_DEBUG
-       depends on GART_IOMMU && DEBUG_KERNEL
+       depends on IOMMU && DEBUG_KERNEL
        bool "Enable IOMMU debugging"
        bool "Enable IOMMU debugging"
        help
        help
          Force the IOMMU to on even when you have less than 4GB of
          Force the IOMMU to on even when you have less than 4GB of
@@ -35,6 +35,22 @@ config IOMMU_LEAK
          Add a simple leak tracer to the IOMMU code. This is useful when you
          Add a simple leak tracer to the IOMMU code. This is useful when you
 	 are debugging a buggy device driver that leaks IOMMU mappings.
 	 are debugging a buggy device driver that leaks IOMMU mappings.
 
 
+config DEBUG_STACKOVERFLOW
+        bool "Check for stack overflows"
+        depends on DEBUG_KERNEL
+        help
+	  This option will cause messages to be printed if free stack space
+	  drops below a certain limit.
+
+config DEBUG_STACK_USAGE
+        bool "Stack utilization instrumentation"
+        depends on DEBUG_KERNEL
+        help
+	  Enables the display of the minimum amount of free stack which each
+	  task has ever had available in the sysrq-T and sysrq-P debug output.
+
+	  This option will slow down process creation somewhat.
+
 #config X86_REMOTE_DEBUG
 #config X86_REMOTE_DEBUG
 #       bool "kgdb debugging stub"
 #       bool "kgdb debugging stub"
 
 

+ 3 - 1
arch/x86_64/Makefile

@@ -27,6 +27,7 @@ LDFLAGS_vmlinux :=
 CHECKFLAGS      += -D__x86_64__ -m64
 CHECKFLAGS      += -D__x86_64__ -m64
 
 
 cflags-y	:=
 cflags-y	:=
+cflags-kernel-y	:=
 cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
 cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
 cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
 cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
 cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
 cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
@@ -35,7 +36,7 @@ cflags-y += -m64
 cflags-y += -mno-red-zone
 cflags-y += -mno-red-zone
 cflags-y += -mcmodel=kernel
 cflags-y += -mcmodel=kernel
 cflags-y += -pipe
 cflags-y += -pipe
-cflags-$(CONFIG_REORDER) += -ffunction-sections
+cflags-kernel-$(CONFIG_REORDER) += -ffunction-sections
 # this makes reading assembly source easier, but produces worse code
 # this makes reading assembly source easier, but produces worse code
 # actually it makes the kernel smaller too.
 # actually it makes the kernel smaller too.
 cflags-y += -fno-reorder-blocks
 cflags-y += -fno-reorder-blocks
@@ -55,6 +56,7 @@ cflags-y += $(call cc-option,-funit-at-a-time)
 cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
 cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
 
 
 CFLAGS += $(cflags-y)
 CFLAGS += $(cflags-y)
+CFLAGS_KERNEL += $(cflags-kernel-y)
 AFLAGS += -m64
 AFLAGS += -m64
 
 
 head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
 head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o

+ 7 - 2
arch/x86_64/boot/Makefile

@@ -107,8 +107,13 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
 isoimage: $(BOOTIMAGE)
 isoimage: $(BOOTIMAGE)
 	-rm -rf $(obj)/isoimage
 	-rm -rf $(obj)/isoimage
 	mkdir $(obj)/isoimage
 	mkdir $(obj)/isoimage
-	cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \
-		$(obj)/isoimage
+	for i in lib lib64 share end ; do \
+		if [ -f /usr/$$i/syslinux/isolinux.bin ] ; then \
+			cp /usr/$$i/syslinux/isolinux.bin $(obj)/isoimage ; \
+			break ; \
+		fi ; \
+		if [ $$i = end ] ; then exit 1 ; fi ; \
+	done
 	cp $(BOOTIMAGE) $(obj)/isoimage/linux
 	cp $(BOOTIMAGE) $(obj)/isoimage/linux
 	echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
 	echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
 	if [ -f '$(FDINITRD)' ] ; then \
 	if [ -f '$(FDINITRD)' ] ; then \

+ 23 - 23
arch/x86_64/boot/compressed/misc.c

@@ -77,11 +77,11 @@ static void gzip_release(void **);
  */
  */
 static unsigned char *real_mode; /* Pointer to real-mode data */
 static unsigned char *real_mode; /* Pointer to real-mode data */
 
 
-#define EXT_MEM_K   (*(unsigned short *)(real_mode + 0x2))
+#define RM_EXT_MEM_K   (*(unsigned short *)(real_mode + 0x2))
 #ifndef STANDARD_MEMORY_BIOS_CALL
 #ifndef STANDARD_MEMORY_BIOS_CALL
-#define ALT_MEM_K   (*(unsigned long *)(real_mode + 0x1e0))
+#define RM_ALT_MEM_K   (*(unsigned long *)(real_mode + 0x1e0))
 #endif
 #endif
-#define SCREEN_INFO (*(struct screen_info *)(real_mode+0))
+#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
 
 
 extern unsigned char input_data[];
 extern unsigned char input_data[];
 extern int input_len;
 extern int input_len;
@@ -92,9 +92,9 @@ static unsigned long output_ptr = 0;
 
 
 static void *malloc(int size);
 static void *malloc(int size);
 static void free(void *where);
 static void free(void *where);
- 
-void* memset(void* s, int c, unsigned n);
-void* memcpy(void* dest, const void* src, unsigned n);
+
+static void *memset(void *s, int c, unsigned n);
+static void *memcpy(void *dest, const void *src, unsigned n);
 
 
 static void putstr(const char *);
 static void putstr(const char *);
 
 
@@ -162,8 +162,8 @@ static void putstr(const char *s)
 	int x,y,pos;
 	int x,y,pos;
 	char c;
 	char c;
 
 
-	x = SCREEN_INFO.orig_x;
-	y = SCREEN_INFO.orig_y;
+	x = RM_SCREEN_INFO.orig_x;
+	y = RM_SCREEN_INFO.orig_y;
 
 
 	while ( ( c = *s++ ) != '\0' ) {
 	while ( ( c = *s++ ) != '\0' ) {
 		if ( c == '\n' ) {
 		if ( c == '\n' ) {
@@ -184,8 +184,8 @@ static void putstr(const char *s)
 		}
 		}
 	}
 	}
 
 
-	SCREEN_INFO.orig_x = x;
-	SCREEN_INFO.orig_y = y;
+	RM_SCREEN_INFO.orig_x = x;
+	RM_SCREEN_INFO.orig_y = y;
 
 
 	pos = (x + cols * y) * 2;	/* Update cursor position */
 	pos = (x + cols * y) * 2;	/* Update cursor position */
 	outb_p(14, vidport);
 	outb_p(14, vidport);
@@ -194,7 +194,7 @@ static void putstr(const char *s)
 	outb_p(0xff & (pos >> 1), vidport+1);
 	outb_p(0xff & (pos >> 1), vidport+1);
 }
 }
 
 
-void* memset(void* s, int c, unsigned n)
+static void* memset(void* s, int c, unsigned n)
 {
 {
 	int i;
 	int i;
 	char *ss = (char*)s;
 	char *ss = (char*)s;
@@ -203,7 +203,7 @@ void* memset(void* s, int c, unsigned n)
 	return s;
 	return s;
 }
 }
 
 
-void* memcpy(void* dest, const void* src, unsigned n)
+static void* memcpy(void* dest, const void* src, unsigned n)
 {
 {
 	int i;
 	int i;
 	char *d = (char *)dest, *s = (char *)src;
 	char *d = (char *)dest, *s = (char *)src;
@@ -278,15 +278,15 @@ static void error(char *x)
 	putstr(x);
 	putstr(x);
 	putstr("\n\n -- System halted");
 	putstr("\n\n -- System halted");
 
 
-	while(1);
+	while(1);	/* Halt */
 }
 }
 
 
-void setup_normal_output_buffer(void)
+static void setup_normal_output_buffer(void)
 {
 {
 #ifdef STANDARD_MEMORY_BIOS_CALL
 #ifdef STANDARD_MEMORY_BIOS_CALL
-	if (EXT_MEM_K < 1024) error("Less than 2MB of memory");
+	if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory");
 #else
 #else
-	if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < 1024) error("Less than 2MB of memory");
+	if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory");
 #endif
 #endif
 	output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
 	output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */
 	free_mem_end_ptr = (long)real_mode;
 	free_mem_end_ptr = (long)real_mode;
@@ -297,13 +297,13 @@ struct moveparams {
 	uch *high_buffer_start; int hcount;
 	uch *high_buffer_start; int hcount;
 };
 };
 
 
-void setup_output_buffer_if_we_run_high(struct moveparams *mv)
+static void setup_output_buffer_if_we_run_high(struct moveparams *mv)
 {
 {
 	high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
 	high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
 #ifdef STANDARD_MEMORY_BIOS_CALL
 #ifdef STANDARD_MEMORY_BIOS_CALL
-	if (EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
+	if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory");
 #else
 #else
-	if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
+	if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory");
 #endif	
 #endif	
 	mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
 	mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START;
 	low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
 	low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
@@ -319,7 +319,7 @@ void setup_output_buffer_if_we_run_high(struct moveparams *mv)
 	mv->high_buffer_start = high_buffer_start;
 	mv->high_buffer_start = high_buffer_start;
 }
 }
 
 
-void close_output_buffer_if_we_run_high(struct moveparams *mv)
+static void close_output_buffer_if_we_run_high(struct moveparams *mv)
 {
 {
 	if (bytes_out > low_buffer_size) {
 	if (bytes_out > low_buffer_size) {
 		mv->lcount = low_buffer_size;
 		mv->lcount = low_buffer_size;
@@ -335,7 +335,7 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
 {
 {
 	real_mode = rmode;
 	real_mode = rmode;
 
 
-	if (SCREEN_INFO.orig_video_mode == 7) {
+	if (RM_SCREEN_INFO.orig_video_mode == 7) {
 		vidmem = (char *) 0xb0000;
 		vidmem = (char *) 0xb0000;
 		vidport = 0x3b4;
 		vidport = 0x3b4;
 	} else {
 	} else {
@@ -343,8 +343,8 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
 		vidport = 0x3d4;
 		vidport = 0x3d4;
 	}
 	}
 
 
-	lines = SCREEN_INFO.orig_video_lines;
-	cols = SCREEN_INFO.orig_video_cols;
+	lines = RM_SCREEN_INFO.orig_video_lines;
+	cols = RM_SCREEN_INFO.orig_video_cols;
 
 
 	if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
 	if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
 	else setup_output_buffer_if_we_run_high(mv);
 	else setup_output_buffer_if_we_run_high(mv);

+ 2 - 4
arch/x86_64/boot/tools/build.c

@@ -149,10 +149,8 @@ int main(int argc, char ** argv)
 	sz = sb.st_size;
 	sz = sb.st_size;
 	fprintf (stderr, "System is %d kB\n", sz/1024);
 	fprintf (stderr, "System is %d kB\n", sz/1024);
 	sys_size = (sz + 15) / 16;
 	sys_size = (sz + 15) / 16;
-	/* 0x40000*16 = 4.0 MB, reasonable estimate for the current maximum */
-	if (sys_size > (is_big_kernel ? 0x40000 : DEF_SYSSIZE))
-		die("System is too big. Try using %smodules.",
-			is_big_kernel ? "" : "bzImage or ");
+	if (!is_big_kernel && sys_size > DEF_SYSSIZE)
+		die("System is too big. Try using bzImage or modules.");
 	while (sz > 0) {
 	while (sz > 0) {
 		int l, n;
 		int l, n;
 
 

+ 134 - 25
arch/x86_64/defconfig

@@ -1,7 +1,7 @@
 #
 #
 # Automatically generated make config: don't edit
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.17-rc1-git11
-# Sun Apr 16 07:22:36 2006
+# Linux kernel version: 2.6.17-git6
+# Sat Jun 24 00:52:28 2006
 #
 #
 CONFIG_X86_64=y
 CONFIG_X86_64=y
 CONFIG_64BIT=y
 CONFIG_64BIT=y
@@ -42,7 +42,6 @@ CONFIG_IKCONFIG_PROC=y
 # CONFIG_RELAY is not set
 # CONFIG_RELAY is not set
 CONFIG_INITRAMFS_SOURCE=""
 CONFIG_INITRAMFS_SOURCE=""
 CONFIG_UID16=y
 CONFIG_UID16=y
-CONFIG_VM86=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 # CONFIG_EMBEDDED is not set
 # CONFIG_EMBEDDED is not set
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS=y
@@ -57,7 +56,6 @@ CONFIG_FUTEX=y
 CONFIG_EPOLL=y
 CONFIG_EPOLL=y
 CONFIG_SHMEM=y
 CONFIG_SHMEM=y
 CONFIG_SLAB=y
 CONFIG_SLAB=y
-CONFIG_DOUBLEFAULT=y
 # CONFIG_TINY_SHMEM is not set
 # CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
 CONFIG_BASE_SMALL=0
 # CONFIG_SLOB is not set
 # CONFIG_SLOB is not set
@@ -144,7 +142,8 @@ CONFIG_NR_CPUS=32
 CONFIG_HOTPLUG_CPU=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_HPET_TIMER=y
 CONFIG_HPET_TIMER=y
 CONFIG_HPET_EMULATE_RTC=y
 CONFIG_HPET_EMULATE_RTC=y
-CONFIG_GART_IOMMU=y
+CONFIG_IOMMU=y
+# CONFIG_CALGARY_IOMMU is not set
 CONFIG_SWIOTLB=y
 CONFIG_SWIOTLB=y
 CONFIG_X86_MCE=y
 CONFIG_X86_MCE=y
 CONFIG_X86_MCE_INTEL=y
 CONFIG_X86_MCE_INTEL=y
@@ -158,6 +157,7 @@ CONFIG_HZ_250=y
 # CONFIG_HZ_1000 is not set
 # CONFIG_HZ_1000 is not set
 CONFIG_HZ=250
 CONFIG_HZ=250
 # CONFIG_REORDER is not set
 # CONFIG_REORDER is not set
+CONFIG_K8_NB=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_ISA_DMA_API=y
 CONFIG_ISA_DMA_API=y
@@ -293,6 +293,8 @@ CONFIG_IP_PNP_DHCP=y
 # CONFIG_INET_IPCOMP is not set
 # CONFIG_INET_IPCOMP is not set
 # CONFIG_INET_XFRM_TUNNEL is not set
 # CONFIG_INET_XFRM_TUNNEL is not set
 # CONFIG_INET_TUNNEL is not set
 # CONFIG_INET_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
 CONFIG_INET_DIAG=y
 CONFIG_INET_DIAG=y
 CONFIG_INET_TCP_DIAG=y
 CONFIG_INET_TCP_DIAG=y
 # CONFIG_TCP_CONG_ADVANCED is not set
 # CONFIG_TCP_CONG_ADVANCED is not set
@@ -305,7 +307,10 @@ CONFIG_IPV6=y
 # CONFIG_INET6_IPCOMP is not set
 # CONFIG_INET6_IPCOMP is not set
 # CONFIG_INET6_XFRM_TUNNEL is not set
 # CONFIG_INET6_XFRM_TUNNEL is not set
 # CONFIG_INET6_TUNNEL is not set
 # CONFIG_INET6_TUNNEL is not set
+# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
 # CONFIG_IPV6_TUNNEL is not set
 # CONFIG_IPV6_TUNNEL is not set
+# CONFIG_NETWORK_SECMARK is not set
 # CONFIG_NETFILTER is not set
 # CONFIG_NETFILTER is not set
 
 
 #
 #
@@ -344,6 +349,7 @@ CONFIG_IPV6=y
 # Network testing
 # Network testing
 #
 #
 # CONFIG_NET_PKTGEN is not set
 # CONFIG_NET_PKTGEN is not set
+# CONFIG_NET_TCPPROBE is not set
 # CONFIG_HAMRADIO is not set
 # CONFIG_HAMRADIO is not set
 # CONFIG_IRDA is not set
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
 # CONFIG_BT is not set
@@ -360,6 +366,7 @@ CONFIG_STANDALONE=y
 CONFIG_PREVENT_FIRMWARE_BUILD=y
 CONFIG_PREVENT_FIRMWARE_BUILD=y
 CONFIG_FW_LOADER=y
 CONFIG_FW_LOADER=y
 # CONFIG_DEBUG_DRIVER is not set
 # CONFIG_DEBUG_DRIVER is not set
+# CONFIG_SYS_HYPERVISOR is not set
 
 
 #
 #
 # Connector - unified userspace <-> kernelspace linker
 # Connector - unified userspace <-> kernelspace linker
@@ -526,6 +533,7 @@ CONFIG_SCSI_ATA_PIIX=y
 # CONFIG_SCSI_SATA_MV is not set
 # CONFIG_SCSI_SATA_MV is not set
 CONFIG_SCSI_SATA_NV=y
 CONFIG_SCSI_SATA_NV=y
 # CONFIG_SCSI_PDC_ADMA is not set
 # CONFIG_SCSI_PDC_ADMA is not set
+# CONFIG_SCSI_HPTIOP is not set
 # CONFIG_SCSI_SATA_QSTOR is not set
 # CONFIG_SCSI_SATA_QSTOR is not set
 # CONFIG_SCSI_SATA_PROMISE is not set
 # CONFIG_SCSI_SATA_PROMISE is not set
 # CONFIG_SCSI_SATA_SX4 is not set
 # CONFIG_SCSI_SATA_SX4 is not set
@@ -591,10 +599,7 @@ CONFIG_IEEE1394=y
 #
 #
 # Device Drivers
 # Device Drivers
 #
 #
-
-#
-# Texas Instruments PCILynx requires I2C
-#
+# CONFIG_IEEE1394_PCILYNX is not set
 CONFIG_IEEE1394_OHCI1394=y
 CONFIG_IEEE1394_OHCI1394=y
 
 
 #
 #
@@ -645,7 +650,16 @@ CONFIG_VORTEX=y
 #
 #
 # Tulip family network device support
 # Tulip family network device support
 #
 #
-# CONFIG_NET_TULIP is not set
+CONFIG_NET_TULIP=y
+# CONFIG_DE2104X is not set
+CONFIG_TULIP=y
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_TULIP_NAPI is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_ULI526X is not set
 # CONFIG_HP100 is not set
 # CONFIG_HP100 is not set
 CONFIG_NET_PCI=y
 CONFIG_NET_PCI=y
 # CONFIG_PCNET32 is not set
 # CONFIG_PCNET32 is not set
@@ -697,6 +711,7 @@ CONFIG_TIGON3=y
 # CONFIG_IXGB is not set
 # CONFIG_IXGB is not set
 CONFIG_S2IO=m
 CONFIG_S2IO=m
 # CONFIG_S2IO_NAPI is not set
 # CONFIG_S2IO_NAPI is not set
+# CONFIG_MYRI10GE is not set
 
 
 #
 #
 # Token Ring devices
 # Token Ring devices
@@ -887,7 +902,56 @@ CONFIG_HPET_MMAP=y
 #
 #
 # I2C support
 # I2C support
 #
 #
-# CONFIG_I2C is not set
+CONFIG_I2C=m
+CONFIG_I2C_CHARDEV=m
+
+#
+# I2C Algorithms
+#
+# CONFIG_I2C_ALGOBIT is not set
+# CONFIG_I2C_ALGOPCF is not set
+# CONFIG_I2C_ALGOPCA is not set
+
+#
+# I2C Hardware Bus support
+#
+# CONFIG_I2C_ALI1535 is not set
+# CONFIG_I2C_ALI1563 is not set
+# CONFIG_I2C_ALI15X3 is not set
+# CONFIG_I2C_AMD756 is not set
+# CONFIG_I2C_AMD8111 is not set
+# CONFIG_I2C_I801 is not set
+# CONFIG_I2C_I810 is not set
+# CONFIG_I2C_PIIX4 is not set
+CONFIG_I2C_ISA=m
+# CONFIG_I2C_NFORCE2 is not set
+# CONFIG_I2C_OCORES is not set
+# CONFIG_I2C_PARPORT_LIGHT is not set
+# CONFIG_I2C_PROSAVAGE is not set
+# CONFIG_I2C_SAVAGE4 is not set
+# CONFIG_I2C_SIS5595 is not set
+# CONFIG_I2C_SIS630 is not set
+# CONFIG_I2C_SIS96X is not set
+# CONFIG_I2C_STUB is not set
+# CONFIG_I2C_VIA is not set
+# CONFIG_I2C_VIAPRO is not set
+# CONFIG_I2C_VOODOO3 is not set
+# CONFIG_I2C_PCA_ISA is not set
+
+#
+# Miscellaneous I2C Chip support
+#
+# CONFIG_SENSORS_DS1337 is not set
+# CONFIG_SENSORS_DS1374 is not set
+# CONFIG_SENSORS_EEPROM is not set
+# CONFIG_SENSORS_PCF8574 is not set
+# CONFIG_SENSORS_PCA9539 is not set
+# CONFIG_SENSORS_PCF8591 is not set
+# CONFIG_SENSORS_MAX6875 is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
 
 
 #
 #
 # SPI support
 # SPI support
@@ -898,14 +962,51 @@ CONFIG_HPET_MMAP=y
 #
 #
 # Dallas's 1-wire bus
 # Dallas's 1-wire bus
 #
 #
-# CONFIG_W1 is not set
 
 
 #
 #
 # Hardware Monitoring support
 # Hardware Monitoring support
 #
 #
 CONFIG_HWMON=y
 CONFIG_HWMON=y
 # CONFIG_HWMON_VID is not set
 # CONFIG_HWMON_VID is not set
+# CONFIG_SENSORS_ABITUGURU is not set
+# CONFIG_SENSORS_ADM1021 is not set
+# CONFIG_SENSORS_ADM1025 is not set
+# CONFIG_SENSORS_ADM1026 is not set
+# CONFIG_SENSORS_ADM1031 is not set
+# CONFIG_SENSORS_ADM9240 is not set
+# CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATXP1 is not set
+# CONFIG_SENSORS_DS1621 is not set
 # CONFIG_SENSORS_F71805F is not set
 # CONFIG_SENSORS_F71805F is not set
+# CONFIG_SENSORS_FSCHER is not set
+# CONFIG_SENSORS_FSCPOS is not set
+# CONFIG_SENSORS_GL518SM is not set
+# CONFIG_SENSORS_GL520SM is not set
+# CONFIG_SENSORS_IT87 is not set
+# CONFIG_SENSORS_LM63 is not set
+# CONFIG_SENSORS_LM75 is not set
+# CONFIG_SENSORS_LM77 is not set
+# CONFIG_SENSORS_LM78 is not set
+# CONFIG_SENSORS_LM80 is not set
+# CONFIG_SENSORS_LM83 is not set
+# CONFIG_SENSORS_LM85 is not set
+# CONFIG_SENSORS_LM87 is not set
+# CONFIG_SENSORS_LM90 is not set
+# CONFIG_SENSORS_LM92 is not set
+# CONFIG_SENSORS_MAX1619 is not set
+# CONFIG_SENSORS_PC87360 is not set
+# CONFIG_SENSORS_SIS5595 is not set
+# CONFIG_SENSORS_SMSC47M1 is not set
+# CONFIG_SENSORS_SMSC47M192 is not set
+CONFIG_SENSORS_SMSC47B397=m
+# CONFIG_SENSORS_VIA686A is not set
+# CONFIG_SENSORS_VT8231 is not set
+# CONFIG_SENSORS_W83781D is not set
+# CONFIG_SENSORS_W83791D is not set
+# CONFIG_SENSORS_W83792D is not set
+# CONFIG_SENSORS_W83L785TS is not set
+# CONFIG_SENSORS_W83627HF is not set
+# CONFIG_SENSORS_W83627EHF is not set
 # CONFIG_SENSORS_HDAPS is not set
 # CONFIG_SENSORS_HDAPS is not set
 # CONFIG_HWMON_DEBUG_CHIP is not set
 # CONFIG_HWMON_DEBUG_CHIP is not set
 
 
@@ -918,6 +1019,7 @@ CONFIG_HWMON=y
 # Multimedia devices
 # Multimedia devices
 #
 #
 # CONFIG_VIDEO_DEV is not set
 # CONFIG_VIDEO_DEV is not set
+CONFIG_VIDEO_V4L2=y
 
 
 #
 #
 # Digital Video Broadcasting Devices
 # Digital Video Broadcasting Devices
@@ -953,28 +1055,17 @@ CONFIG_SOUND=y
 # Open Sound System
 # Open Sound System
 #
 #
 CONFIG_SOUND_PRIME=y
 CONFIG_SOUND_PRIME=y
-CONFIG_OBSOLETE_OSS_DRIVER=y
 # CONFIG_SOUND_BT878 is not set
 # CONFIG_SOUND_BT878 is not set
-# CONFIG_SOUND_CMPCI is not set
 # CONFIG_SOUND_EMU10K1 is not set
 # CONFIG_SOUND_EMU10K1 is not set
 # CONFIG_SOUND_FUSION is not set
 # CONFIG_SOUND_FUSION is not set
-# CONFIG_SOUND_CS4281 is not set
-# CONFIG_SOUND_ES1370 is not set
 # CONFIG_SOUND_ES1371 is not set
 # CONFIG_SOUND_ES1371 is not set
-# CONFIG_SOUND_ESSSOLO1 is not set
-# CONFIG_SOUND_MAESTRO is not set
-# CONFIG_SOUND_MAESTRO3 is not set
 CONFIG_SOUND_ICH=y
 CONFIG_SOUND_ICH=y
-# CONFIG_SOUND_SONICVIBES is not set
 # CONFIG_SOUND_TRIDENT is not set
 # CONFIG_SOUND_TRIDENT is not set
 # CONFIG_SOUND_MSNDCLAS is not set
 # CONFIG_SOUND_MSNDCLAS is not set
 # CONFIG_SOUND_MSNDPIN is not set
 # CONFIG_SOUND_MSNDPIN is not set
 # CONFIG_SOUND_VIA82CXXX is not set
 # CONFIG_SOUND_VIA82CXXX is not set
 # CONFIG_SOUND_OSS is not set
 # CONFIG_SOUND_OSS is not set
-# CONFIG_SOUND_ALI5455 is not set
-# CONFIG_SOUND_FORTE is not set
-# CONFIG_SOUND_RME96XX is not set
-# CONFIG_SOUND_AD1980 is not set
+# CONFIG_SOUND_TVMIXER is not set
 
 
 #
 #
 # USB support
 # USB support
@@ -1000,6 +1091,7 @@ CONFIG_USB_DEVICEFS=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD=y
 # CONFIG_USB_EHCI_SPLIT_ISO is not set
 # CONFIG_USB_EHCI_SPLIT_ISO is not set
 # CONFIG_USB_EHCI_ROOT_HUB_TT is not set
 # CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
 # CONFIG_USB_ISP116X_HCD is not set
 # CONFIG_USB_ISP116X_HCD is not set
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD=y
 # CONFIG_USB_OHCI_BIG_ENDIAN is not set
 # CONFIG_USB_OHCI_BIG_ENDIAN is not set
@@ -1089,10 +1181,12 @@ CONFIG_USB_MON=y
 # CONFIG_USB_LEGOTOWER is not set
 # CONFIG_USB_LEGOTOWER is not set
 # CONFIG_USB_LCD is not set
 # CONFIG_USB_LCD is not set
 # CONFIG_USB_LED is not set
 # CONFIG_USB_LED is not set
+# CONFIG_USB_CY7C63 is not set
 # CONFIG_USB_CYTHERM is not set
 # CONFIG_USB_CYTHERM is not set
 # CONFIG_USB_PHIDGETKIT is not set
 # CONFIG_USB_PHIDGETKIT is not set
 # CONFIG_USB_PHIDGETSERVO is not set
 # CONFIG_USB_PHIDGETSERVO is not set
 # CONFIG_USB_IDMOUSE is not set
 # CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_APPLEDISPLAY is not set
 # CONFIG_USB_SISUSBVGA is not set
 # CONFIG_USB_SISUSBVGA is not set
 # CONFIG_USB_LD is not set
 # CONFIG_USB_LD is not set
 # CONFIG_USB_TEST is not set
 # CONFIG_USB_TEST is not set
@@ -1140,6 +1234,19 @@ CONFIG_USB_MON=y
 #
 #
 # CONFIG_RTC_CLASS is not set
 # CONFIG_RTC_CLASS is not set
 
 
+#
+# DMA Engine support
+#
+# CONFIG_DMA_ENGINE is not set
+
+#
+# DMA Clients
+#
+
+#
+# DMA Devices
+#
+
 #
 #
 # Firmware Drivers
 # Firmware Drivers
 #
 #
@@ -1175,6 +1282,7 @@ CONFIG_FS_POSIX_ACL=y
 # CONFIG_MINIX_FS is not set
 # CONFIG_MINIX_FS is not set
 # CONFIG_ROMFS_FS is not set
 # CONFIG_ROMFS_FS is not set
 CONFIG_INOTIFY=y
 CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
 # CONFIG_QUOTA is not set
 # CONFIG_QUOTA is not set
 CONFIG_DNOTIFY=y
 CONFIG_DNOTIFY=y
 CONFIG_AUTOFS_FS=y
 CONFIG_AUTOFS_FS=y
@@ -1331,7 +1439,8 @@ CONFIG_DETECT_SOFTLOCKUP=y
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_FS=y
 # CONFIG_DEBUG_VM is not set
 # CONFIG_DEBUG_VM is not set
 # CONFIG_FRAME_POINTER is not set
 # CONFIG_FRAME_POINTER is not set
-# CONFIG_UNWIND_INFO is not set
+CONFIG_UNWIND_INFO=y
+CONFIG_STACK_UNWIND=y
 # CONFIG_FORCED_INLINING is not set
 # CONFIG_FORCED_INLINING is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_DEBUG_RODATA is not set
 # CONFIG_DEBUG_RODATA is not set

+ 0 - 1
arch/x86_64/ia32/fpu32.c

@@ -2,7 +2,6 @@
  * Copyright 2002 Andi Kleen, SuSE Labs.
  * Copyright 2002 Andi Kleen, SuSE Labs.
  * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes.
  * FXSAVE<->i387 conversion support. Based on code by Gareth Hughes.
  * This is used for ptrace, signals and coredumps in 32bit emulation.
  * This is used for ptrace, signals and coredumps in 32bit emulation.
- * $Id: fpu32.c,v 1.1 2002/03/21 14:16:32 ak Exp $
  */ 
  */ 
 
 
 #include <linux/sched.h>
 #include <linux/sched.h>

+ 0 - 2
arch/x86_64/ia32/ia32_signal.c

@@ -6,8 +6,6 @@
  *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
  *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
  *  2000-12-*   x86-64 compatibility mode signal handling by Andi Kleen
  *  2000-12-*   x86-64 compatibility mode signal handling by Andi Kleen
- * 
- *  $Id: ia32_signal.c,v 1.22 2002/07/29 10:34:03 ak Exp $
  */
  */
 
 
 #include <linux/sched.h>
 #include <linux/sched.h>

+ 5 - 6
arch/x86_64/ia32/ia32entry.S

@@ -155,6 +155,7 @@ sysenter_tracesys:
 	.previous
 	.previous
 	jmp	sysenter_do_call
 	jmp	sysenter_do_call
 	CFI_ENDPROC
 	CFI_ENDPROC
+ENDPROC(ia32_sysenter_target)
 
 
 /*
 /*
  * 32bit SYSCALL instruction entry.
  * 32bit SYSCALL instruction entry.
@@ -178,7 +179,7 @@ sysenter_tracesys:
  */ 	
  */ 	
 ENTRY(ia32_cstar_target)
 ENTRY(ia32_cstar_target)
 	CFI_STARTPROC32	simple
 	CFI_STARTPROC32	simple
-	CFI_DEF_CFA	rsp,0
+	CFI_DEF_CFA	rsp,PDA_STACKOFFSET
 	CFI_REGISTER	rip,rcx
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	/*CFI_REGISTER	rflags,r11*/
 	swapgs
 	swapgs
@@ -249,6 +250,7 @@ cstar_tracesys:
 	.quad 1b,ia32_badarg
 	.quad 1b,ia32_badarg
 	.previous
 	.previous
 	jmp cstar_do_call
 	jmp cstar_do_call
+END(ia32_cstar_target)
 				
 				
 ia32_badarg:
 ia32_badarg:
 	movq $-EFAULT,%rax
 	movq $-EFAULT,%rax
@@ -314,16 +316,13 @@ ia32_tracesys:
 	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
 	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
 	RESTORE_REST
 	RESTORE_REST
 	jmp ia32_do_syscall
 	jmp ia32_do_syscall
+END(ia32_syscall)
 
 
 ia32_badsys:
 ia32_badsys:
 	movq $0,ORIG_RAX-ARGOFFSET(%rsp)
 	movq $0,ORIG_RAX-ARGOFFSET(%rsp)
 	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
 	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
 	jmp int_ret_from_sys_call
 	jmp int_ret_from_sys_call
 
 
-ni_syscall:
-	movq %rax,%rdi
-	jmp  sys32_ni_syscall			
-
 quiet_ni_syscall:
 quiet_ni_syscall:
 	movq $-ENOSYS,%rax
 	movq $-ENOSYS,%rax
 	ret
 	ret
@@ -370,10 +369,10 @@ ENTRY(ia32_ptregs_common)
 	RESTORE_REST
 	RESTORE_REST
 	jmp  ia32_sysret	/* misbalances the return cache */
 	jmp  ia32_sysret	/* misbalances the return cache */
 	CFI_ENDPROC
 	CFI_ENDPROC
+END(ia32_ptregs_common)
 
 
 	.section .rodata,"a"
 	.section .rodata,"a"
 	.align 8
 	.align 8
-	.globl ia32_sys_call_table
 ia32_sys_call_table:
 ia32_sys_call_table:
 	.quad sys_restart_syscall
 	.quad sys_restart_syscall
 	.quad sys_exit
 	.quad sys_exit

+ 35 - 8
arch/x86_64/ia32/ptrace32.c

@@ -7,8 +7,6 @@
  * 
  * 
  * This allows to access 64bit processes too; but there is no way to see the extended 
  * This allows to access 64bit processes too; but there is no way to see the extended 
  * register contents.
  * register contents.
- *
- * $Id: ptrace32.c,v 1.16 2003/03/14 16:06:35 ak Exp $
  */ 
  */ 
 
 
 #include <linux/kernel.h>
 #include <linux/kernel.h>
@@ -27,6 +25,7 @@
 #include <asm/debugreg.h>
 #include <asm/debugreg.h>
 #include <asm/i387.h>
 #include <asm/i387.h>
 #include <asm/fpu32.h>
 #include <asm/fpu32.h>
+#include <asm/ia32.h>
 
 
 /*
 /*
  * Determines which flags the user has access to [1 = access, 0 = no access].
  * Determines which flags the user has access to [1 = access, 0 = no access].
@@ -199,6 +198,24 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 
 
 #undef R32
 #undef R32
 
 
+static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
+{
+	int ret;
+	compat_siginfo_t *si32 = (compat_siginfo_t *)compat_ptr(data);
+	siginfo_t *si = compat_alloc_user_space(sizeof(siginfo_t));
+	if (request == PTRACE_SETSIGINFO) {
+		ret = copy_siginfo_from_user32(si, si32);
+		if (ret)
+			return ret;
+	}
+	ret = sys_ptrace(request, pid, addr, (unsigned long)si);
+	if (ret)
+		return ret;
+	if (request == PTRACE_GETSIGINFO)
+		ret = copy_siginfo_to_user32(si32, si);
+	return ret;
+}
+
 asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 {
 {
 	struct task_struct *child;
 	struct task_struct *child;
@@ -208,9 +225,19 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 	__u32 val;
 	__u32 val;
 
 
 	switch (request) { 
 	switch (request) { 
-	default:
+	case PTRACE_TRACEME:
+	case PTRACE_ATTACH:
+	case PTRACE_KILL:
+	case PTRACE_CONT:
+	case PTRACE_SINGLESTEP:
+	case PTRACE_DETACH:
+	case PTRACE_SYSCALL:
+	case PTRACE_SETOPTIONS:
 		return sys_ptrace(request, pid, addr, data); 
 		return sys_ptrace(request, pid, addr, data); 
 
 
+	default:
+		return -EINVAL;
+
 	case PTRACE_PEEKTEXT:
 	case PTRACE_PEEKTEXT:
 	case PTRACE_PEEKDATA:
 	case PTRACE_PEEKDATA:
 	case PTRACE_POKEDATA:
 	case PTRACE_POKEDATA:
@@ -225,10 +252,11 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 	case PTRACE_GETFPXREGS:
 	case PTRACE_GETFPXREGS:
 	case PTRACE_GETEVENTMSG:
 	case PTRACE_GETEVENTMSG:
 		break;
 		break;
-	} 
 
 
-	if (request == PTRACE_TRACEME)
-		return ptrace_traceme();
+	case PTRACE_SETSIGINFO:
+	case PTRACE_GETSIGINFO:
+		return ptrace32_siginfo(request, pid, addr, data);
+	}
 
 
 	child = ptrace_get_task_struct(pid);
 	child = ptrace_get_task_struct(pid);
 	if (IS_ERR(child))
 	if (IS_ERR(child))
@@ -349,8 +377,7 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 		break;
 		break;
 
 
 	default:
 	default:
-		ret = -EINVAL;
-		break;
+		BUG();
 	}
 	}
 
 
  out:
  out:

+ 1 - 24
arch/x86_64/ia32/sys_ia32.c

@@ -508,19 +508,6 @@ sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr, int options)
 	return compat_sys_wait4(pid, stat_addr, options, NULL);
 	return compat_sys_wait4(pid, stat_addr, options, NULL);
 }
 }
 
 
-int sys32_ni_syscall(int call)
-{ 
-	struct task_struct *me = current;
-	static char lastcomm[sizeof(me->comm)];
-
-	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-		printk(KERN_INFO "IA32 syscall %d from %s not implemented\n",
-		       call, me->comm);
-		strncpy(lastcomm, me->comm, sizeof(lastcomm));
-	} 
-	return -ENOSYS;	       
-} 
-
 /* 32-bit timeval and related flotsam.  */
 /* 32-bit timeval and related flotsam.  */
 
 
 asmlinkage long
 asmlinkage long
@@ -916,7 +903,7 @@ long sys32_vm86_warning(void)
 	struct task_struct *me = current;
 	struct task_struct *me = current;
 	static char lastcomm[sizeof(me->comm)];
 	static char lastcomm[sizeof(me->comm)];
 	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
 	if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-		printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
+		compat_printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
 		       me->comm);
 		       me->comm);
 		strncpy(lastcomm, me->comm, sizeof(lastcomm));
 		strncpy(lastcomm, me->comm, sizeof(lastcomm));
 	} 
 	} 
@@ -929,13 +916,3 @@ long sys32_lookup_dcookie(u32 addr_low, u32 addr_high,
 	return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len);
 	return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len);
 }
 }
 
 
-static int __init ia32_init (void)
-{
-	printk("IA32 emulation $Id: sys_ia32.c,v 1.32 2002/03/24 13:02:28 ak Exp $\n");  
-	return 0;
-}
-
-__initcall(ia32_init);
-
-extern unsigned long ia32_sys_call_table[];
-EXPORT_SYMBOL(ia32_sys_call_table);

+ 6 - 2
arch/x86_64/kernel/Makefile

@@ -8,7 +8,7 @@ obj-y	:= process.o signal.o entry.o traps.o irq.o \
 		ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
 		ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
 		x8664_ksyms.o i387.o syscall.o vsyscall.o \
 		x8664_ksyms.o i387.o syscall.o vsyscall.o \
 		setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
 		setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
-		pci-dma.o pci-nommu.o
+		pci-dma.o pci-nommu.o alternative.o
 
 
 obj-$(CONFIG_X86_MCE)         += mce.o
 obj-$(CONFIG_X86_MCE)         += mce.o
 obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel.o
 obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel.o
@@ -28,11 +28,13 @@ obj-$(CONFIG_PM)		+= suspend.o
 obj-$(CONFIG_SOFTWARE_SUSPEND)	+= suspend_asm.o
 obj-$(CONFIG_SOFTWARE_SUSPEND)	+= suspend_asm.o
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
-obj-$(CONFIG_GART_IOMMU)	+= pci-gart.o aperture.o
+obj-$(CONFIG_IOMMU)		+= pci-gart.o aperture.o
+obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary.o tce.o
 obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb.o
 obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer.o
 obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer.o
 obj-$(CONFIG_X86_VSMP)		+= vsmp.o
 obj-$(CONFIG_X86_VSMP)		+= vsmp.o
+obj-$(CONFIG_K8_NB)		+= k8.o
 
 
 obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_MODULES)		+= module.o
 
 
@@ -49,3 +51,5 @@ intel_cacheinfo-y		+= ../../i386/kernel/cpu/intel_cacheinfo.o
 quirks-y			+= ../../i386/kernel/quirks.o
 quirks-y			+= ../../i386/kernel/quirks.o
 i8237-y				+= ../../i386/kernel/i8237.o
 i8237-y				+= ../../i386/kernel/i8237.o
 msr-$(subst m,y,$(CONFIG_X86_MSR))  += ../../i386/kernel/msr.o
 msr-$(subst m,y,$(CONFIG_X86_MSR))  += ../../i386/kernel/msr.o
+alternative-y			+= ../../i386/kernel/alternative.o
+

+ 11 - 15
arch/x86_64/kernel/aperture.c

@@ -8,7 +8,6 @@
  * because only the bootmem allocator can allocate 32+MB. 
  * because only the bootmem allocator can allocate 32+MB. 
  * 
  * 
  * Copyright 2002 Andi Kleen, SuSE Labs.
  * Copyright 2002 Andi Kleen, SuSE Labs.
- * $Id: aperture.c,v 1.7 2003/08/01 03:36:18 ak Exp $
  */
  */
 #include <linux/config.h>
 #include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
@@ -24,6 +23,7 @@
 #include <asm/proto.h>
 #include <asm/proto.h>
 #include <asm/pci-direct.h>
 #include <asm/pci-direct.h>
 #include <asm/dma.h>
 #include <asm/dma.h>
+#include <asm/k8.h>
 
 
 int iommu_aperture;
 int iommu_aperture;
 int iommu_aperture_disabled __initdata = 0;
 int iommu_aperture_disabled __initdata = 0;
@@ -37,8 +37,6 @@ int fix_aperture __initdata = 1;
 /* This code runs before the PCI subsystem is initialized, so just
 /* This code runs before the PCI subsystem is initialized, so just
    access the northbridge directly. */
    access the northbridge directly. */
 
 
-#define NB_ID_3 (PCI_VENDOR_ID_AMD | (0x1103<<16))
-
 static u32 __init allocate_aperture(void) 
 static u32 __init allocate_aperture(void) 
 {
 {
 	pg_data_t *nd0 = NODE_DATA(0);
 	pg_data_t *nd0 = NODE_DATA(0);
@@ -68,20 +66,20 @@ static u32 __init allocate_aperture(void)
 	return (u32)__pa(p); 
 	return (u32)__pa(p); 
 }
 }
 
 
-static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size) 
+static int __init aperture_valid(u64 aper_base, u32 aper_size)
 { 
 { 
 	if (!aper_base) 
 	if (!aper_base) 
 		return 0;
 		return 0;
 	if (aper_size < 64*1024*1024) { 
 	if (aper_size < 64*1024*1024) { 
-		printk("Aperture from %s too small (%d MB)\n", name, aper_size>>20); 
+		printk("Aperture too small (%d MB)\n", aper_size>>20);
 		return 0;
 		return 0;
 	}
 	}
 	if (aper_base + aper_size >= 0xffffffff) { 
 	if (aper_base + aper_size >= 0xffffffff) { 
-		printk("Aperture from %s beyond 4GB. Ignoring.\n",name);
+		printk("Aperture beyond 4GB. Ignoring.\n");
 		return 0; 
 		return 0; 
 	}
 	}
 	if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
 	if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) {
-		printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name);
+		printk("Aperture pointing to e820 RAM. Ignoring.\n");
 		return 0; 
 		return 0; 
 	} 
 	} 
 	return 1;
 	return 1;
@@ -140,7 +138,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order)
 	printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", 
 	printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", 
 	       aper, 32 << *order, apsizereg);
 	       aper, 32 << *order, apsizereg);
 
 
-	if (!aperture_valid("AGP bridge", aper, (32*1024*1024) << *order))
+	if (!aperture_valid(aper, (32*1024*1024) << *order))
 	    return 0;
 	    return 0;
 	return (u32)aper; 
 	return (u32)aper; 
 } 
 } 
@@ -208,10 +206,10 @@ void __init iommu_hole_init(void)
 
 
 	fix = 0;
 	fix = 0;
 	for (num = 24; num < 32; num++) {		
 	for (num = 24; num < 32; num++) {		
-		char name[30];
-		if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) 
-			continue;	
+		if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
+			continue;
 
 
+		iommu_detected = 1;
 		iommu_aperture = 1; 
 		iommu_aperture = 1; 
 
 
 		aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; 
 		aper_order = (read_pci_config(0, num, 3, 0x90) >> 1) & 7; 
@@ -222,9 +220,7 @@ void __init iommu_hole_init(void)
 		printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, 
 		printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, 
 		       aper_base, aper_size>>20);
 		       aper_base, aper_size>>20);
 		
 		
-		sprintf(name, "northbridge cpu %d", num-24); 
-
-		if (!aperture_valid(name, aper_base, aper_size)) { 
+		if (!aperture_valid(aper_base, aper_size)) {
 			fix = 1; 
 			fix = 1; 
 			break; 
 			break; 
 		}
 		}
@@ -273,7 +269,7 @@ void __init iommu_hole_init(void)
 
 
 	/* Fix up the north bridges */
 	/* Fix up the north bridges */
 	for (num = 24; num < 32; num++) { 		
 	for (num = 24; num < 32; num++) { 		
-		if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) 
+		if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00)))
 			continue;	
 			continue;	
 
 
 		/* Don't enable translation yet. That is done later. 
 		/* Don't enable translation yet. That is done later. 

+ 20 - 12
arch/x86_64/kernel/apic.c

@@ -100,7 +100,7 @@ void clear_local_APIC(void)
 	maxlvt = get_maxlvt();
 	maxlvt = get_maxlvt();
 
 
 	/*
 	/*
-	 * Masking an LVT entry on a P6 can trigger a local APIC error
+	 * Masking an LVT entry can trigger a local APIC error
 	 * if the vector is zero. Mask LVTERR first to prevent this.
 	 * if the vector is zero. Mask LVTERR first to prevent this.
 	 */
 	 */
 	if (maxlvt >= 3) {
 	if (maxlvt >= 3) {
@@ -851,7 +851,18 @@ void disable_APIC_timer(void)
 		unsigned long v;
 		unsigned long v;
 
 
 		v = apic_read(APIC_LVTT);
 		v = apic_read(APIC_LVTT);
-		apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
+		/*
+		 * When an illegal vector value (0-15) is written to an LVT
+		 * entry and delivery mode is Fixed, the APIC may signal an
+		 * illegal vector error, with out regard to whether the mask
+		 * bit is set or whether an interrupt is actually seen on input.
+		 *
+		 * Boot sequence might call this function when the LVTT has
+		 * '0' vector value. So make sure vector field is set to
+		 * valid value.
+		 */
+		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+		apic_write(APIC_LVTT, v);
 	}
 	}
 }
 }
 
 
@@ -909,15 +920,13 @@ int setup_profiling_timer(unsigned int multiplier)
 	return -EINVAL;
 	return -EINVAL;
 }
 }
 
 
-#ifdef CONFIG_X86_MCE_AMD
-void setup_threshold_lvt(unsigned long lvt_off)
+void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector,
+			    unsigned char msg_type, unsigned char mask)
 {
 {
-	unsigned int v = 0;
-	unsigned long reg = (lvt_off << 4) + 0x500;
-	v |= THRESHOLD_APIC_VECTOR;
+	unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
+	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
 	apic_write(reg, v);
 	apic_write(reg, v);
 }
 }
-#endif /* CONFIG_X86_MCE_AMD */
 
 
 #undef APIC_DIVISOR
 #undef APIC_DIVISOR
 
 
@@ -983,7 +992,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
 }
 }
 
 
 /*
 /*
- * oem_force_hpet_timer -- force HPET mode for some boxes.
+ * apic_is_clustered_box() -- Check if we can expect good TSC
  *
  *
  * Thus far, the major user of this is IBM's Summit2 series:
  * Thus far, the major user of this is IBM's Summit2 series:
  *
  *
@@ -991,7 +1000,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
  * multi-chassis. Use available data to take a good guess.
  * multi-chassis. Use available data to take a good guess.
  * If in doubt, go HPET.
  * If in doubt, go HPET.
  */
  */
-__cpuinit int oem_force_hpet_timer(void)
+__cpuinit int apic_is_clustered_box(void)
 {
 {
 	int i, clusters, zeros;
 	int i, clusters, zeros;
 	unsigned id;
 	unsigned id;
@@ -1022,8 +1031,7 @@ __cpuinit int oem_force_hpet_timer(void)
 	}
 	}
 
 
 	/*
 	/*
-	 * If clusters > 2, then should be multi-chassis.  Return 1 for HPET.
-	 * Else return 0 to use TSC.
+	 * If clusters > 2, then should be multi-chassis.
 	 * May have to revisit this when multi-core + hyperthreaded CPUs come
 	 * May have to revisit this when multi-core + hyperthreaded CPUs come
 	 * out, but AFAIK this will work even for them.
 	 * out, but AFAIK this will work even for them.
 	 */
 	 */

+ 2 - 2
arch/x86_64/kernel/crash.c

@@ -111,14 +111,14 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu)
 	atomic_dec(&waiting_for_crash_ipi);
 	atomic_dec(&waiting_for_crash_ipi);
 	/* Assume hlt works */
 	/* Assume hlt works */
 	for(;;)
 	for(;;)
-		asm("hlt");
+		halt();
 
 
 	return 1;
 	return 1;
 }
 }
 
 
 static void smp_send_nmi_allbutself(void)
 static void smp_send_nmi_allbutself(void)
 {
 {
-	send_IPI_allbutself(APIC_DM_NMI);
+	send_IPI_allbutself(NMI_VECTOR);
 }
 }
 
 
 /*
 /*

+ 1 - 1
arch/x86_64/kernel/e820.c

@@ -1,7 +1,6 @@
 /* 
 /* 
  * Handle the memory map.
  * Handle the memory map.
  * The functions here do the job until bootmem takes over.
  * The functions here do the job until bootmem takes over.
- * $Id: e820.c,v 1.4 2002/09/19 19:25:32 ak Exp $
  *
  *
  *  Getting sanitize_e820_map() in sync with i386 version by applying change:
  *  Getting sanitize_e820_map() in sync with i386 version by applying change:
  *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
  *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
@@ -621,6 +620,7 @@ void __init parse_memmapopt(char *p, char **from)
 }
 }
 
 
 unsigned long pci_mem_start = 0xaeedbabe;
 unsigned long pci_mem_start = 0xaeedbabe;
+EXPORT_SYMBOL(pci_mem_start);
 
 
 /*
 /*
  * Search for the biggest gap in the low 32 bits of the e820
  * Search for the biggest gap in the low 32 bits of the e820

+ 86 - 27
arch/x86_64/kernel/entry.S

@@ -154,6 +154,7 @@ rff_trace:
 	GET_THREAD_INFO(%rcx)	
 	GET_THREAD_INFO(%rcx)	
 	jmp rff_action
 	jmp rff_action
 	CFI_ENDPROC
 	CFI_ENDPROC
+END(ret_from_fork)
 
 
 /*
 /*
  * System call entry. Upto 6 arguments in registers are supported.
  * System call entry. Upto 6 arguments in registers are supported.
@@ -188,7 +189,7 @@ rff_trace:
 
 
 ENTRY(system_call)
 ENTRY(system_call)
 	CFI_STARTPROC	simple
 	CFI_STARTPROC	simple
-	CFI_DEF_CFA	rsp,0
+	CFI_DEF_CFA	rsp,PDA_STACKOFFSET
 	CFI_REGISTER	rip,rcx
 	CFI_REGISTER	rip,rcx
 	/*CFI_REGISTER	rflags,r11*/
 	/*CFI_REGISTER	rflags,r11*/
 	swapgs
 	swapgs
@@ -285,6 +286,7 @@ tracesys:
 	/* Use IRET because user could have changed frame */
 	/* Use IRET because user could have changed frame */
 	jmp int_ret_from_sys_call
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 	CFI_ENDPROC
+END(system_call)
 		
 		
 /* 
 /* 
  * Syscall return path ending with IRET.
  * Syscall return path ending with IRET.
@@ -364,6 +366,7 @@ int_restore_rest:
 	cli
 	cli
 	jmp int_with_check
 	jmp int_with_check
 	CFI_ENDPROC
 	CFI_ENDPROC
+END(int_ret_from_sys_call)
 		
 		
 /* 
 /* 
  * Certain special system calls that need to save a complete full stack frame.
  * Certain special system calls that need to save a complete full stack frame.
@@ -375,6 +378,7 @@ int_restore_rest:
 	leaq	\func(%rip),%rax
 	leaq	\func(%rip),%rax
 	leaq    -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
 	leaq    -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
 	jmp	ptregscall_common
 	jmp	ptregscall_common
+END(\label)
 	.endm
 	.endm
 
 
 	CFI_STARTPROC
 	CFI_STARTPROC
@@ -404,6 +408,7 @@ ENTRY(ptregscall_common)
 	CFI_REL_OFFSET rip, 0
 	CFI_REL_OFFSET rip, 0
 	ret
 	ret
 	CFI_ENDPROC
 	CFI_ENDPROC
+END(ptregscall_common)
 	
 	
 ENTRY(stub_execve)
 ENTRY(stub_execve)
 	CFI_STARTPROC
 	CFI_STARTPROC
@@ -418,6 +423,7 @@ ENTRY(stub_execve)
 	RESTORE_REST
 	RESTORE_REST
 	jmp int_ret_from_sys_call
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 	CFI_ENDPROC
+END(stub_execve)
 	
 	
 /*
 /*
  * sigreturn is special because it needs to restore all registers on return.
  * sigreturn is special because it needs to restore all registers on return.
@@ -435,6 +441,7 @@ ENTRY(stub_rt_sigreturn)
 	RESTORE_REST
 	RESTORE_REST
 	jmp int_ret_from_sys_call
 	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 	CFI_ENDPROC
+END(stub_rt_sigreturn)
 
 
 /*
 /*
  * initial frame state for interrupts and exceptions
  * initial frame state for interrupts and exceptions
@@ -466,29 +473,18 @@ ENTRY(stub_rt_sigreturn)
 /* 0(%rsp): interrupt number */ 
 /* 0(%rsp): interrupt number */ 
 	.macro interrupt func
 	.macro interrupt func
 	cld
 	cld
-#ifdef CONFIG_DEBUG_INFO
-	SAVE_ALL	
-	movq %rsp,%rdi
-	/*
-	 * Setup a stack frame pointer.  This allows gdb to trace
-	 * back to the original stack.
-	 */
-	movq %rsp,%rbp
-	CFI_DEF_CFA_REGISTER	rbp
-#else		
 	SAVE_ARGS
 	SAVE_ARGS
 	leaq -ARGOFFSET(%rsp),%rdi	# arg1 for handler
 	leaq -ARGOFFSET(%rsp),%rdi	# arg1 for handler
-#endif	
+	pushq %rbp
+	CFI_ADJUST_CFA_OFFSET	8
+	CFI_REL_OFFSET		rbp, 0
+	movq %rsp,%rbp
+	CFI_DEF_CFA_REGISTER	rbp
 	testl $3,CS(%rdi)
 	testl $3,CS(%rdi)
 	je 1f
 	je 1f
 	swapgs	
 	swapgs	
 1:	incl	%gs:pda_irqcount	# RED-PEN should check preempt count
 1:	incl	%gs:pda_irqcount	# RED-PEN should check preempt count
-	movq %gs:pda_irqstackptr,%rax
-	cmoveq %rax,%rsp /*todo This needs CFI annotation! */
-	pushq %rdi			# save old stack	
-#ifndef CONFIG_DEBUG_INFO
-	CFI_ADJUST_CFA_OFFSET	8
-#endif
+	cmoveq %gs:pda_irqstackptr,%rsp
 	call \func
 	call \func
 	.endm
 	.endm
 
 
@@ -497,17 +493,11 @@ ENTRY(common_interrupt)
 	interrupt do_IRQ
 	interrupt do_IRQ
 	/* 0(%rsp): oldrsp-ARGOFFSET */
 	/* 0(%rsp): oldrsp-ARGOFFSET */
 ret_from_intr:
 ret_from_intr:
-	popq  %rdi
-#ifndef CONFIG_DEBUG_INFO
-	CFI_ADJUST_CFA_OFFSET	-8
-#endif
 	cli	
 	cli	
 	decl %gs:pda_irqcount
 	decl %gs:pda_irqcount
-#ifdef CONFIG_DEBUG_INFO
-	movq RBP(%rdi),%rbp
+	leaveq
 	CFI_DEF_CFA_REGISTER	rsp
 	CFI_DEF_CFA_REGISTER	rsp
-#endif
-	leaq ARGOFFSET(%rdi),%rsp /*todo This needs CFI annotation! */
+	CFI_ADJUST_CFA_OFFSET	-8
 exit_intr:
 exit_intr:
 	GET_THREAD_INFO(%rcx)
 	GET_THREAD_INFO(%rcx)
 	testl $3,CS-ARGOFFSET(%rsp)
 	testl $3,CS-ARGOFFSET(%rsp)
@@ -589,7 +579,9 @@ retint_kernel:
 	call preempt_schedule_irq
 	call preempt_schedule_irq
 	jmp exit_intr
 	jmp exit_intr
 #endif	
 #endif	
+
 	CFI_ENDPROC
 	CFI_ENDPROC
+END(common_interrupt)
 	
 	
 /*
 /*
  * APIC interrupts.
  * APIC interrupts.
@@ -605,17 +597,21 @@ retint_kernel:
 
 
 ENTRY(thermal_interrupt)
 ENTRY(thermal_interrupt)
 	apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
 	apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
+END(thermal_interrupt)
 
 
 ENTRY(threshold_interrupt)
 ENTRY(threshold_interrupt)
 	apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
 	apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
+END(threshold_interrupt)
 
 
 #ifdef CONFIG_SMP	
 #ifdef CONFIG_SMP	
 ENTRY(reschedule_interrupt)
 ENTRY(reschedule_interrupt)
 	apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
 	apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
+END(reschedule_interrupt)
 
 
 	.macro INVALIDATE_ENTRY num
 	.macro INVALIDATE_ENTRY num
 ENTRY(invalidate_interrupt\num)
 ENTRY(invalidate_interrupt\num)
 	apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt	
 	apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt	
+END(invalidate_interrupt\num)
 	.endm
 	.endm
 
 
 	INVALIDATE_ENTRY 0
 	INVALIDATE_ENTRY 0
@@ -629,17 +625,21 @@ ENTRY(invalidate_interrupt\num)
 
 
 ENTRY(call_function_interrupt)
 ENTRY(call_function_interrupt)
 	apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
 	apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
+END(call_function_interrupt)
 #endif
 #endif
 
 
 #ifdef CONFIG_X86_LOCAL_APIC	
 #ifdef CONFIG_X86_LOCAL_APIC	
 ENTRY(apic_timer_interrupt)
 ENTRY(apic_timer_interrupt)
 	apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
 	apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
+END(apic_timer_interrupt)
 
 
 ENTRY(error_interrupt)
 ENTRY(error_interrupt)
 	apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
 	apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
+END(error_interrupt)
 
 
 ENTRY(spurious_interrupt)
 ENTRY(spurious_interrupt)
 	apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
 	apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
+END(spurious_interrupt)
 #endif
 #endif
 				
 				
 /*
 /*
@@ -777,6 +777,7 @@ error_kernelspace:
 	cmpq $gs_change,RIP(%rsp)
 	cmpq $gs_change,RIP(%rsp)
         je   error_swapgs
         je   error_swapgs
 	jmp  error_sti
 	jmp  error_sti
+END(error_entry)
 	
 	
        /* Reload gs selector with exception handling */
        /* Reload gs selector with exception handling */
        /* edi:  new selector */ 
        /* edi:  new selector */ 
@@ -794,6 +795,7 @@ gs_change:
 	CFI_ADJUST_CFA_OFFSET -8
 	CFI_ADJUST_CFA_OFFSET -8
         ret
         ret
 	CFI_ENDPROC
 	CFI_ENDPROC
+ENDPROC(load_gs_index)
        
        
         .section __ex_table,"a"
         .section __ex_table,"a"
         .align 8
         .align 8
@@ -847,7 +849,7 @@ ENTRY(kernel_thread)
 	UNFAKE_STACK_FRAME
 	UNFAKE_STACK_FRAME
 	ret
 	ret
 	CFI_ENDPROC
 	CFI_ENDPROC
-
+ENDPROC(kernel_thread)
 	
 	
 child_rip:
 child_rip:
 	/*
 	/*
@@ -860,6 +862,7 @@ child_rip:
 	# exit
 	# exit
 	xorl %edi, %edi
 	xorl %edi, %edi
 	call do_exit
 	call do_exit
+ENDPROC(child_rip)
 
 
 /*
 /*
  * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -889,19 +892,24 @@ ENTRY(execve)
 	UNFAKE_STACK_FRAME
 	UNFAKE_STACK_FRAME
 	ret
 	ret
 	CFI_ENDPROC
 	CFI_ENDPROC
+ENDPROC(execve)
 
 
 KPROBE_ENTRY(page_fault)
 KPROBE_ENTRY(page_fault)
 	errorentry do_page_fault
 	errorentry do_page_fault
+END(page_fault)
 	.previous .text
 	.previous .text
 
 
 ENTRY(coprocessor_error)
 ENTRY(coprocessor_error)
 	zeroentry do_coprocessor_error
 	zeroentry do_coprocessor_error
+END(coprocessor_error)
 
 
 ENTRY(simd_coprocessor_error)
 ENTRY(simd_coprocessor_error)
 	zeroentry do_simd_coprocessor_error	
 	zeroentry do_simd_coprocessor_error	
+END(simd_coprocessor_error)
 
 
 ENTRY(device_not_available)
 ENTRY(device_not_available)
 	zeroentry math_state_restore
 	zeroentry math_state_restore
+END(device_not_available)
 
 
 	/* runs on exception stack */
 	/* runs on exception stack */
 KPROBE_ENTRY(debug)
 KPROBE_ENTRY(debug)
@@ -911,6 +919,7 @@ KPROBE_ENTRY(debug)
 	paranoidentry do_debug, DEBUG_STACK
 	paranoidentry do_debug, DEBUG_STACK
 	jmp paranoid_exit
 	jmp paranoid_exit
 	CFI_ENDPROC
 	CFI_ENDPROC
+END(debug)
 	.previous .text
 	.previous .text
 
 
 	/* runs on exception stack */	
 	/* runs on exception stack */	
@@ -961,6 +970,7 @@ paranoid_schedule:
 	cli
 	cli
 	jmp paranoid_userspace
 	jmp paranoid_userspace
 	CFI_ENDPROC
 	CFI_ENDPROC
+END(nmi)
 	.previous .text
 	.previous .text
 
 
 KPROBE_ENTRY(int3)
 KPROBE_ENTRY(int3)
@@ -970,22 +980,28 @@ KPROBE_ENTRY(int3)
  	paranoidentry do_int3, DEBUG_STACK
  	paranoidentry do_int3, DEBUG_STACK
  	jmp paranoid_exit
  	jmp paranoid_exit
  	CFI_ENDPROC
  	CFI_ENDPROC
+END(int3)
 	.previous .text
 	.previous .text
 
 
 ENTRY(overflow)
 ENTRY(overflow)
 	zeroentry do_overflow
 	zeroentry do_overflow
+END(overflow)
 
 
 ENTRY(bounds)
 ENTRY(bounds)
 	zeroentry do_bounds
 	zeroentry do_bounds
+END(bounds)
 
 
 ENTRY(invalid_op)
 ENTRY(invalid_op)
 	zeroentry do_invalid_op	
 	zeroentry do_invalid_op	
+END(invalid_op)
 
 
 ENTRY(coprocessor_segment_overrun)
 ENTRY(coprocessor_segment_overrun)
 	zeroentry do_coprocessor_segment_overrun
 	zeroentry do_coprocessor_segment_overrun
+END(coprocessor_segment_overrun)
 
 
 ENTRY(reserved)
 ENTRY(reserved)
 	zeroentry do_reserved
 	zeroentry do_reserved
+END(reserved)
 
 
 	/* runs on exception stack */
 	/* runs on exception stack */
 ENTRY(double_fault)
 ENTRY(double_fault)
@@ -993,12 +1009,15 @@ ENTRY(double_fault)
 	paranoidentry do_double_fault
 	paranoidentry do_double_fault
 	jmp paranoid_exit
 	jmp paranoid_exit
 	CFI_ENDPROC
 	CFI_ENDPROC
+END(double_fault)
 
 
 ENTRY(invalid_TSS)
 ENTRY(invalid_TSS)
 	errorentry do_invalid_TSS
 	errorentry do_invalid_TSS
+END(invalid_TSS)
 
 
 ENTRY(segment_not_present)
 ENTRY(segment_not_present)
 	errorentry do_segment_not_present
 	errorentry do_segment_not_present
+END(segment_not_present)
 
 
 	/* runs on exception stack */
 	/* runs on exception stack */
 ENTRY(stack_segment)
 ENTRY(stack_segment)
@@ -1006,19 +1025,24 @@ ENTRY(stack_segment)
 	paranoidentry do_stack_segment
 	paranoidentry do_stack_segment
 	jmp paranoid_exit
 	jmp paranoid_exit
 	CFI_ENDPROC
 	CFI_ENDPROC
+END(stack_segment)
 
 
 KPROBE_ENTRY(general_protection)
 KPROBE_ENTRY(general_protection)
 	errorentry do_general_protection
 	errorentry do_general_protection
+END(general_protection)
 	.previous .text
 	.previous .text
 
 
 ENTRY(alignment_check)
 ENTRY(alignment_check)
 	errorentry do_alignment_check
 	errorentry do_alignment_check
+END(alignment_check)
 
 
 ENTRY(divide_error)
 ENTRY(divide_error)
 	zeroentry do_divide_error
 	zeroentry do_divide_error
+END(divide_error)
 
 
 ENTRY(spurious_interrupt_bug)
 ENTRY(spurious_interrupt_bug)
 	zeroentry do_spurious_interrupt_bug
 	zeroentry do_spurious_interrupt_bug
+END(spurious_interrupt_bug)
 
 
 #ifdef CONFIG_X86_MCE
 #ifdef CONFIG_X86_MCE
 	/* runs on exception stack */
 	/* runs on exception stack */
@@ -1029,6 +1053,7 @@ ENTRY(machine_check)
 	paranoidentry do_machine_check
 	paranoidentry do_machine_check
 	jmp paranoid_exit
 	jmp paranoid_exit
 	CFI_ENDPROC
 	CFI_ENDPROC
+END(machine_check)
 #endif
 #endif
 
 
 ENTRY(call_softirq)
 ENTRY(call_softirq)
@@ -1046,3 +1071,37 @@ ENTRY(call_softirq)
 	decl %gs:pda_irqcount
 	decl %gs:pda_irqcount
 	ret
 	ret
 	CFI_ENDPROC
 	CFI_ENDPROC
+ENDPROC(call_softirq)
+
+#ifdef CONFIG_STACK_UNWIND
+ENTRY(arch_unwind_init_running)
+	CFI_STARTPROC
+	movq	%r15, R15(%rdi)
+	movq	%r14, R14(%rdi)
+	xchgq	%rsi, %rdx
+	movq	%r13, R13(%rdi)
+	movq	%r12, R12(%rdi)
+	xorl	%eax, %eax
+	movq	%rbp, RBP(%rdi)
+	movq	%rbx, RBX(%rdi)
+	movq	(%rsp), %rcx
+	movq	%rax, R11(%rdi)
+	movq	%rax, R10(%rdi)
+	movq	%rax, R9(%rdi)
+	movq	%rax, R8(%rdi)
+	movq	%rax, RAX(%rdi)
+	movq	%rax, RCX(%rdi)
+	movq	%rax, RDX(%rdi)
+	movq	%rax, RSI(%rdi)
+	movq	%rax, RDI(%rdi)
+	movq	%rax, ORIG_RAX(%rdi)
+	movq	%rcx, RIP(%rdi)
+	leaq	8(%rsp), %rcx
+	movq	$__KERNEL_CS, CS(%rdi)
+	movq	%rax, EFLAGS(%rdi)
+	movq	%rcx, RSP(%rdi)
+	movq	$__KERNEL_DS, SS(%rdi)
+	jmpq	*%rdx
+	CFI_ENDPROC
+ENDPROC(arch_unwind_init_running)
+#endif

+ 17 - 13
arch/x86_64/kernel/genapic_flat.c

@@ -78,22 +78,29 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
 
 
 static void flat_send_IPI_allbutself(int vector)
 static void flat_send_IPI_allbutself(int vector)
 {
 {
-#ifndef CONFIG_HOTPLUG_CPU
-	if (((num_online_cpus()) - 1) >= 1)
-		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
+#ifdef	CONFIG_HOTPLUG_CPU
+	int hotplug = 1;
 #else
 #else
-	cpumask_t allbutme = cpu_online_map;
+	int hotplug = 0;
+#endif
+	if (hotplug || vector == NMI_VECTOR) {
+		cpumask_t allbutme = cpu_online_map;
 
 
-	cpu_clear(smp_processor_id(), allbutme);
+		cpu_clear(smp_processor_id(), allbutme);
 
 
-	if (!cpus_empty(allbutme))
-		flat_send_IPI_mask(allbutme, vector);
-#endif
+		if (!cpus_empty(allbutme))
+			flat_send_IPI_mask(allbutme, vector);
+	} else if (num_online_cpus() > 1) {
+		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
+	}
 }
 }
 
 
 static void flat_send_IPI_all(int vector)
 static void flat_send_IPI_all(int vector)
 {
 {
-	__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
+	if (vector == NMI_VECTOR)
+		flat_send_IPI_mask(cpu_online_map, vector);
+	else
+		__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
 }
 }
 
 
 static int flat_apic_id_registered(void)
 static int flat_apic_id_registered(void)
@@ -108,10 +115,7 @@ static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
 
 
 static unsigned int phys_pkg_id(int index_msb)
 static unsigned int phys_pkg_id(int index_msb)
 {
 {
-	u32 ebx;
-
-	ebx = cpuid_ebx(1);
-	return ((ebx >> 24) & 0xFF) >> index_msb;
+	return hard_smp_processor_id() >> index_msb;
 }
 }
 
 
 struct genapic apic_flat =  {
 struct genapic apic_flat =  {

+ 0 - 2
arch/x86_64/kernel/head64.c

@@ -2,8 +2,6 @@
  *  linux/arch/x86_64/kernel/head64.c -- prepare to run common code
  *  linux/arch/x86_64/kernel/head64.c -- prepare to run common code
  *
  *
  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
- *
- *  $Id: head64.c,v 1.22 2001/07/06 14:28:20 ak Exp $
  */
  */
 
 
 #include <linux/init.h>
 #include <linux/init.h>

+ 7 - 7
arch/x86_64/kernel/i8259.c

@@ -44,11 +44,11 @@
 	BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
 	BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
 	BI(x,c) BI(x,d) BI(x,e) BI(x,f)
 	BI(x,c) BI(x,d) BI(x,e) BI(x,f)
 
 
-#define BUILD_14_IRQS(x) \
+#define BUILD_15_IRQS(x) \
 	BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
 	BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
 	BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
 	BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
 	BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
 	BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
-	BI(x,c) BI(x,d)
+	BI(x,c) BI(x,d) BI(x,e)
 
 
 /*
 /*
  * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
  * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
@@ -73,13 +73,13 @@ BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
 BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
 BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
 
 
 #ifdef CONFIG_PCI_MSI
 #ifdef CONFIG_PCI_MSI
-	BUILD_14_IRQS(0xe)
+	BUILD_15_IRQS(0xe)
 #endif
 #endif
 
 
 #endif
 #endif
 
 
 #undef BUILD_16_IRQS
 #undef BUILD_16_IRQS
-#undef BUILD_14_IRQS
+#undef BUILD_15_IRQS
 #undef BI
 #undef BI
 
 
 
 
@@ -92,11 +92,11 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
 	IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
 	IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
 	IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
 	IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
 
 
-#define IRQLIST_14(x) \
+#define IRQLIST_15(x) \
 	IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
 	IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
 	IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
 	IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
 	IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
 	IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
-	IRQ(x,c), IRQ(x,d)
+	IRQ(x,c), IRQ(x,d), IRQ(x,e)
 
 
 void (*interrupt[NR_IRQS])(void) = {
 void (*interrupt[NR_IRQS])(void) = {
 	IRQLIST_16(0x0),
 	IRQLIST_16(0x0),
@@ -108,7 +108,7 @@ void (*interrupt[NR_IRQS])(void) = {
 	IRQLIST_16(0xc), IRQLIST_16(0xd)
 	IRQLIST_16(0xc), IRQLIST_16(0xd)
 
 
 #ifdef CONFIG_PCI_MSI
 #ifdef CONFIG_PCI_MSI
-	, IRQLIST_14(0xe)
+	, IRQLIST_15(0xe)
 #endif
 #endif
 
 
 #endif
 #endif

+ 25 - 20
arch/x86_64/kernel/io_apic.c

@@ -41,6 +41,7 @@
 #include <asm/mach_apic.h>
 #include <asm/mach_apic.h>
 #include <asm/acpi.h>
 #include <asm/acpi.h>
 #include <asm/dma.h>
 #include <asm/dma.h>
+#include <asm/nmi.h>
 
 
 #define __apicdebuginit  __init
 #define __apicdebuginit  __init
 
 
@@ -56,6 +57,7 @@ int timer_over_8254 __initdata = 0;
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
 
 static DEFINE_SPINLOCK(ioapic_lock);
 static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_SPINLOCK(vector_lock);
 
 
 /*
 /*
  * # of IRQ routing registers
  * # of IRQ routing registers
@@ -317,7 +319,7 @@ void __init check_ioapic(void)
 				vendor &= 0xffff;
 				vendor &= 0xffff;
 				switch (vendor) { 
 				switch (vendor) { 
 				case PCI_VENDOR_ID_VIA:
 				case PCI_VENDOR_ID_VIA:
-#ifdef CONFIG_GART_IOMMU
+#ifdef CONFIG_IOMMU
 					if ((end_pfn > MAX_DMA32_PFN ||
 					if ((end_pfn > MAX_DMA32_PFN ||
 					     force_iommu) &&
 					     force_iommu) &&
 					    !iommu_aperture_allowed) {
 					    !iommu_aperture_allowed) {
@@ -834,10 +836,17 @@ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
 int assign_irq_vector(int irq)
 int assign_irq_vector(int irq)
 {
 {
 	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
 	static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+	unsigned long flags;
+	int vector;
 
 
 	BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
 	BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
-	if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
+
+	spin_lock_irqsave(&vector_lock, flags);
+
+	if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+		spin_unlock_irqrestore(&vector_lock, flags);
 		return IO_APIC_VECTOR(irq);
 		return IO_APIC_VECTOR(irq);
+	}
 next:
 next:
 	current_vector += 8;
 	current_vector += 8;
 	if (current_vector == IA32_SYSCALL_VECTOR)
 	if (current_vector == IA32_SYSCALL_VECTOR)
@@ -849,11 +858,14 @@ next:
 		current_vector = FIRST_DEVICE_VECTOR + offset;
 		current_vector = FIRST_DEVICE_VECTOR + offset;
 	}
 	}
 
 
-	vector_irq[current_vector] = irq;
+	vector = current_vector;
+	vector_irq[vector] = irq;
 	if (irq != AUTO_ASSIGN)
 	if (irq != AUTO_ASSIGN)
-		IO_APIC_VECTOR(irq) = current_vector;
+		IO_APIC_VECTOR(irq) = vector;
+
+	spin_unlock_irqrestore(&vector_lock, flags);
 
 
-	return current_vector;
+	return vector;
 }
 }
 
 
 extern void (*interrupt[NR_IRQS])(void);
 extern void (*interrupt[NR_IRQS])(void);
@@ -866,21 +878,14 @@ static struct hw_interrupt_type ioapic_edge_type;
 
 
 static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 static inline void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 {
 {
-	if (use_pci_vector() && !platform_legacy_irq(irq)) {
-		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-				trigger == IOAPIC_LEVEL)
-			irq_desc[vector].handler = &ioapic_level_type;
-		else
-			irq_desc[vector].handler = &ioapic_edge_type;
-		set_intr_gate(vector, interrupt[vector]);
-	} else	{
-		if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
-				trigger == IOAPIC_LEVEL)
-			irq_desc[irq].handler = &ioapic_level_type;
-		else
-			irq_desc[irq].handler = &ioapic_edge_type;
-		set_intr_gate(vector, interrupt[irq]);
-	}
+	unsigned idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+
+	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+			trigger == IOAPIC_LEVEL)
+		irq_desc[idx].handler = &ioapic_level_type;
+	else
+		irq_desc[idx].handler = &ioapic_edge_type;
+	set_intr_gate(vector, interrupt[idx]);
 }
 }
 
 
 static void __init setup_IO_APIC_irqs(void)
 static void __init setup_IO_APIC_irqs(void)

+ 28 - 2
arch/x86_64/kernel/irq.c

@@ -26,6 +26,30 @@ atomic_t irq_mis_count;
 #endif
 #endif
 #endif
 #endif
 
 
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+/*
+ * Probabilistic stack overflow check:
+ *
+ * Only check the stack in process context, because everything else
+ * runs on the big interrupt stacks. Checking reliably is too expensive,
+ * so we just check from interrupts.
+ */
+static inline void stack_overflow_check(struct pt_regs *regs)
+{
+	u64 curbase = (u64) current->thread_info;
+	static unsigned long warned = -60*HZ;
+
+	if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE &&
+	    regs->rsp <  curbase + sizeof(struct thread_info) + 128 &&
+	    time_after(jiffies, warned + 60*HZ)) {
+		printk("do_IRQ: %s near stack overflow (cur:%Lx,rsp:%lx)\n",
+		       current->comm, curbase, regs->rsp);
+		show_stack(NULL,NULL);
+		warned = jiffies;
+	}
+}
+#endif
+
 /*
 /*
  * Generic, controller-independent functions:
  * Generic, controller-independent functions:
  */
  */
@@ -39,7 +63,7 @@ int show_interrupts(struct seq_file *p, void *v)
 	if (i == 0) {
 	if (i == 0) {
 		seq_printf(p, "           ");
 		seq_printf(p, "           ");
 		for_each_online_cpu(j)
 		for_each_online_cpu(j)
-			seq_printf(p, "CPU%d       ",j);
+			seq_printf(p, "CPU%-8d",j);
 		seq_putc(p, '\n');
 		seq_putc(p, '\n');
 	}
 	}
 
 
@@ -96,7 +120,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 
 
 	exit_idle();
 	exit_idle();
 	irq_enter();
 	irq_enter();
-
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+	stack_overflow_check(regs);
+#endif
 	__do_IRQ(irq, regs);
 	__do_IRQ(irq, regs);
 	irq_exit();
 	irq_exit();
 
 

+ 118 - 0
arch/x86_64/kernel/k8.c

@@ -0,0 +1,118 @@
+/*
+ * Shared support code for AMD K8 northbridges and derivates.
+ * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2.
+ */
+#include <linux/gfp.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/k8.h>
+
+int num_k8_northbridges;
+EXPORT_SYMBOL(num_k8_northbridges);
+
+static u32 *flush_words;
+
+struct pci_device_id k8_nb_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
+	{}
+};
+EXPORT_SYMBOL(k8_nb_ids);
+
+struct pci_dev **k8_northbridges;
+EXPORT_SYMBOL(k8_northbridges);
+
+static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
+{
+	do {
+		dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
+		if (!dev)
+			break;
+	} while (!pci_match_id(&k8_nb_ids[0], dev));
+	return dev;
+}
+
+int cache_k8_northbridges(void)
+{
+	int i;
+	struct pci_dev *dev;
+	if (num_k8_northbridges)
+		return 0;
+
+	num_k8_northbridges = 0;
+	dev = NULL;
+	while ((dev = next_k8_northbridge(dev)) != NULL)
+		num_k8_northbridges++;
+
+	k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *),
+				  GFP_KERNEL);
+	if (!k8_northbridges)
+		return -ENOMEM;
+
+	flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL);
+	if (!flush_words) {
+		kfree(k8_northbridges);
+		return -ENOMEM;
+	}
+
+	dev = NULL;
+	i = 0;
+	while ((dev = next_k8_northbridge(dev)) != NULL) {
+		k8_northbridges[i++] = dev;
+		pci_read_config_dword(dev, 0x9c, &flush_words[i]);
+	}
+	k8_northbridges[i] = NULL;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cache_k8_northbridges);
+
+/* Ignores subdevice/subvendor but as far as I can figure out
+   they're useless anyways */
+int __init early_is_k8_nb(u32 device)
+{
+	struct pci_device_id *id;
+	u32 vendor = device & 0xffff;
+	device >>= 16;
+	for (id = k8_nb_ids; id->vendor; id++)
+		if (vendor == id->vendor && device == id->device)
+			return 1;
+	return 0;
+}
+
+void k8_flush_garts(void)
+{
+	int flushed, i;
+	unsigned long flags;
+	static DEFINE_SPINLOCK(gart_lock);
+
+	/* Avoid races between AGP and IOMMU. In theory it's not needed
+	   but I'm not sure if the hardware won't lose flush requests
+	   when another is pending. This whole thing is so expensive anyways
+	   that it doesn't matter to serialize more. -AK */
+	spin_lock_irqsave(&gart_lock, flags);
+	flushed = 0;
+	for (i = 0; i < num_k8_northbridges; i++) {
+		pci_write_config_dword(k8_northbridges[i], 0x9c,
+				       flush_words[i]|1);
+		flushed++;
+	}
+	for (i = 0; i < num_k8_northbridges; i++) {
+		u32 w;
+		/* Make sure the hardware actually executed the flush*/
+		for (;;) {
+			pci_read_config_dword(k8_northbridges[i],
+					      0x9c, &w);
+			if (!(w & 1))
+				break;
+			cpu_relax();
+		}
+	}
+	spin_unlock_irqrestore(&gart_lock, flags);
+	if (!flushed)
+		printk("nothing to flush?\n");
+}
+EXPORT_SYMBOL_GPL(k8_flush_garts);
+

+ 1 - 1
arch/x86_64/kernel/mce.c

@@ -562,7 +562,7 @@ static struct sysdev_class mce_sysclass = {
 	set_kset_name("machinecheck"),
 	set_kset_name("machinecheck"),
 };
 };
 
 
-static DEFINE_PER_CPU(struct sys_device, device_mce);
+DEFINE_PER_CPU(struct sys_device, device_mce);
 
 
 /* Why are there no generic functions for this? */
 /* Why are there no generic functions for this? */
 #define ACCESSOR(name, var, start) \
 #define ACCESSOR(name, var, start) \

+ 323 - 183
arch/x86_64/kernel/mce_amd.c

@@ -1,5 +1,5 @@
 /*
 /*
- *  (c) 2005 Advanced Micro Devices, Inc.
+ *  (c) 2005, 2006 Advanced Micro Devices, Inc.
  *  Your use of this code is subject to the terms and conditions of the
  *  Your use of this code is subject to the terms and conditions of the
  *  GNU general public license version 2. See "COPYING" or
  *  GNU general public license version 2. See "COPYING" or
  *  http://www.gnu.org/licenses/gpl.html
  *  http://www.gnu.org/licenses/gpl.html
@@ -8,9 +8,10 @@
  *
  *
  *  Support : jacob.shin@amd.com
  *  Support : jacob.shin@amd.com
  *
  *
- *  MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F.
- *  MC4_MISC0 exists per physical processor.
+ *  April 2006
+ *     - added support for AMD Family 0x10 processors
  *
  *
+ *  All MC4_MISCi registers are shared between multi-cores
  */
  */
 
 
 #include <linux/cpu.h>
 #include <linux/cpu.h>
@@ -29,32 +30,45 @@
 #include <asm/percpu.h>
 #include <asm/percpu.h>
 #include <asm/idle.h>
 #include <asm/idle.h>
 
 
-#define PFX "mce_threshold: "
-#define VERSION "version 1.00.9"
-#define NR_BANKS 5
-#define THRESHOLD_MAX 0xFFF
-#define INT_TYPE_APIC 0x00020000
-#define MASK_VALID_HI 0x80000000
-#define MASK_LVTOFF_HI 0x00F00000
-#define MASK_COUNT_EN_HI 0x00080000
-#define MASK_INT_TYPE_HI 0x00060000
-#define MASK_OVERFLOW_HI 0x00010000
+#define PFX               "mce_threshold: "
+#define VERSION           "version 1.1.1"
+#define NR_BANKS          6
+#define NR_BLOCKS         9
+#define THRESHOLD_MAX     0xFFF
+#define INT_TYPE_APIC     0x00020000
+#define MASK_VALID_HI     0x80000000
+#define MASK_LVTOFF_HI    0x00F00000
+#define MASK_COUNT_EN_HI  0x00080000
+#define MASK_INT_TYPE_HI  0x00060000
+#define MASK_OVERFLOW_HI  0x00010000
 #define MASK_ERR_COUNT_HI 0x00000FFF
 #define MASK_ERR_COUNT_HI 0x00000FFF
-#define MASK_OVERFLOW 0x0001000000000000L
+#define MASK_BLKPTR_LO    0xFF000000
+#define MCG_XBLK_ADDR     0xC0000400
 
 
-struct threshold_bank {
+struct threshold_block {
+	unsigned int block;
+	unsigned int bank;
 	unsigned int cpu;
 	unsigned int cpu;
-	u8 bank;
-	u8 interrupt_enable;
+	u32 address;
+	u16 interrupt_enable;
 	u16 threshold_limit;
 	u16 threshold_limit;
 	struct kobject kobj;
 	struct kobject kobj;
+	struct list_head miscj;
 };
 };
 
 
-static struct threshold_bank threshold_defaults = {
+/* defaults used early on boot */
+static struct threshold_block threshold_defaults = {
 	.interrupt_enable = 0,
 	.interrupt_enable = 0,
 	.threshold_limit = THRESHOLD_MAX,
 	.threshold_limit = THRESHOLD_MAX,
 };
 };
 
 
+struct threshold_bank {
+	struct kobject kobj;
+	struct threshold_block *blocks;
+	cpumask_t cpus;
+};
+static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
+
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 static unsigned char shared_bank[NR_BANKS] = {
 static unsigned char shared_bank[NR_BANKS] = {
 	0, 0, 0, 0, 1
 	0, 0, 0, 0, 1
@@ -68,12 +82,12 @@ static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
  */
  */
 
 
 /* must be called with correct cpu affinity */
 /* must be called with correct cpu affinity */
-static void threshold_restart_bank(struct threshold_bank *b,
+static void threshold_restart_bank(struct threshold_block *b,
 				   int reset, u16 old_limit)
 				   int reset, u16 old_limit)
 {
 {
 	u32 mci_misc_hi, mci_misc_lo;
 	u32 mci_misc_hi, mci_misc_lo;
 
 
-	rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
+	rdmsr(b->address, mci_misc_lo, mci_misc_hi);
 
 
 	if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
 	if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
 		reset = 1;	/* limit cannot be lower than err count */
 		reset = 1;	/* limit cannot be lower than err count */
@@ -94,35 +108,57 @@ static void threshold_restart_bank(struct threshold_bank *b,
 	    (mci_misc_hi &= ~MASK_INT_TYPE_HI);
 	    (mci_misc_hi &= ~MASK_INT_TYPE_HI);
 
 
 	mci_misc_hi |= MASK_COUNT_EN_HI;
 	mci_misc_hi |= MASK_COUNT_EN_HI;
-	wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi);
+	wrmsr(b->address, mci_misc_lo, mci_misc_hi);
 }
 }
 
 
+/* cpu init entry point, called from mce.c with preempt off */
 void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
 void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
 {
-	int bank;
-	u32 mci_misc_lo, mci_misc_hi;
+	unsigned int bank, block;
 	unsigned int cpu = smp_processor_id();
 	unsigned int cpu = smp_processor_id();
+	u32 low = 0, high = 0, address = 0;
 
 
 	for (bank = 0; bank < NR_BANKS; ++bank) {
 	for (bank = 0; bank < NR_BANKS; ++bank) {
-		rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi);
+		for (block = 0; block < NR_BLOCKS; ++block) {
+			if (block == 0)
+				address = MSR_IA32_MC0_MISC + bank * 4;
+			else if (block == 1)
+				address = MCG_XBLK_ADDR
+					+ ((low & MASK_BLKPTR_LO) >> 21);
+			else
+				++address;
+
+			if (rdmsr_safe(address, &low, &high))
+				continue;
 
 
-		/* !valid, !counter present, bios locked */
-		if (!(mci_misc_hi & MASK_VALID_HI) ||
-		    !(mci_misc_hi & MASK_VALID_HI >> 1) ||
-		    (mci_misc_hi & MASK_VALID_HI >> 2))
-			continue;
+			if (!(high & MASK_VALID_HI)) {
+				if (block)
+					continue;
+				else
+					break;
+			}
 
 
-		per_cpu(bank_map, cpu) |= (1 << bank);
+			if (!(high & MASK_VALID_HI >> 1)  ||
+			     (high & MASK_VALID_HI >> 2))
+				continue;
 
 
+			if (!block)
+				per_cpu(bank_map, cpu) |= (1 << bank);
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
-		if (shared_bank[bank] && cpu_core_id[cpu])
-			continue;
+			if (shared_bank[bank] && c->cpu_core_id)
+				break;
 #endif
 #endif
+			high &= ~MASK_LVTOFF_HI;
+			high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20;
+			wrmsr(address, low, high);
 
 
-		setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20);
-		threshold_defaults.cpu = cpu;
-		threshold_defaults.bank = bank;
-		threshold_restart_bank(&threshold_defaults, 0, 0);
+			setup_APIC_extened_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD,
+					       THRESHOLD_APIC_VECTOR,
+					       K8_APIC_EXT_INT_MSG_FIX, 0);
+
+			threshold_defaults.address = address;
+			threshold_restart_bank(&threshold_defaults, 0, 0);
+		}
 	}
 	}
 }
 }
 
 
@@ -137,8 +173,9 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
  */
  */
 asmlinkage void mce_threshold_interrupt(void)
 asmlinkage void mce_threshold_interrupt(void)
 {
 {
-	int bank;
+	unsigned int bank, block;
 	struct mce m;
 	struct mce m;
+	u32 low = 0, high = 0, address = 0;
 
 
 	ack_APIC_irq();
 	ack_APIC_irq();
 	exit_idle();
 	exit_idle();
@@ -150,15 +187,42 @@ asmlinkage void mce_threshold_interrupt(void)
 
 
 	/* assume first bank caused it */
 	/* assume first bank caused it */
 	for (bank = 0; bank < NR_BANKS; ++bank) {
 	for (bank = 0; bank < NR_BANKS; ++bank) {
-		m.bank = MCE_THRESHOLD_BASE + bank;
-		rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc);
+		for (block = 0; block < NR_BLOCKS; ++block) {
+			if (block == 0)
+				address = MSR_IA32_MC0_MISC + bank * 4;
+			else if (block == 1)
+				address = MCG_XBLK_ADDR
+					+ ((low & MASK_BLKPTR_LO) >> 21);
+			else
+				++address;
+
+			if (rdmsr_safe(address, &low, &high))
+				continue;
 
 
-		if (m.misc & MASK_OVERFLOW) {
-			mce_log(&m);
-			goto out;
+			if (!(high & MASK_VALID_HI)) {
+				if (block)
+					continue;
+				else
+					break;
+			}
+
+			if (!(high & MASK_VALID_HI >> 1)  ||
+			     (high & MASK_VALID_HI >> 2))
+				continue;
+
+			if (high & MASK_OVERFLOW_HI) {
+				rdmsrl(address, m.misc);
+				rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,
+				       m.status);
+				m.bank = K8_MCE_THRESHOLD_BASE
+				       + bank * NR_BLOCKS
+				       + block;
+				mce_log(&m);
+				goto out;
+			}
 		}
 		}
 	}
 	}
-      out:
+out:
 	irq_exit();
 	irq_exit();
 }
 }
 
 
@@ -166,20 +230,12 @@ asmlinkage void mce_threshold_interrupt(void)
  * Sysfs Interface
  * Sysfs Interface
  */
  */
 
 
-static struct sysdev_class threshold_sysclass = {
-	set_kset_name("threshold"),
-};
-
-static DEFINE_PER_CPU(struct sys_device, device_threshold);
-
 struct threshold_attr {
 struct threshold_attr {
-        struct attribute attr;
-        ssize_t(*show) (struct threshold_bank *, char *);
-        ssize_t(*store) (struct threshold_bank *, const char *, size_t count);
+	struct attribute attr;
+	ssize_t(*show) (struct threshold_block *, char *);
+	ssize_t(*store) (struct threshold_block *, const char *, size_t count);
 };
 };
 
 
-static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
-
 static cpumask_t affinity_set(unsigned int cpu)
 static cpumask_t affinity_set(unsigned int cpu)
 {
 {
 	cpumask_t oldmask = current->cpus_allowed;
 	cpumask_t oldmask = current->cpus_allowed;
@@ -194,15 +250,15 @@ static void affinity_restore(cpumask_t oldmask)
 	set_cpus_allowed(current, oldmask);
 	set_cpus_allowed(current, oldmask);
 }
 }
 
 
-#define SHOW_FIELDS(name) \
-        static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \
-        { \
-                return sprintf(buf, "%lx\n", (unsigned long) b->name); \
-        }
+#define SHOW_FIELDS(name)                                           \
+static ssize_t show_ ## name(struct threshold_block * b, char *buf) \
+{                                                                   \
+        return sprintf(buf, "%lx\n", (unsigned long) b->name);      \
+}
 SHOW_FIELDS(interrupt_enable)
 SHOW_FIELDS(interrupt_enable)
 SHOW_FIELDS(threshold_limit)
 SHOW_FIELDS(threshold_limit)
 
 
-static ssize_t store_interrupt_enable(struct threshold_bank *b,
+static ssize_t store_interrupt_enable(struct threshold_block *b,
 				      const char *buf, size_t count)
 				      const char *buf, size_t count)
 {
 {
 	char *end;
 	char *end;
@@ -219,7 +275,7 @@ static ssize_t store_interrupt_enable(struct threshold_bank *b,
 	return end - buf;
 	return end - buf;
 }
 }
 
 
-static ssize_t store_threshold_limit(struct threshold_bank *b,
+static ssize_t store_threshold_limit(struct threshold_block *b,
 				     const char *buf, size_t count)
 				     const char *buf, size_t count)
 {
 {
 	char *end;
 	char *end;
@@ -242,18 +298,18 @@ static ssize_t store_threshold_limit(struct threshold_bank *b,
 	return end - buf;
 	return end - buf;
 }
 }
 
 
-static ssize_t show_error_count(struct threshold_bank *b, char *buf)
+static ssize_t show_error_count(struct threshold_block *b, char *buf)
 {
 {
 	u32 high, low;
 	u32 high, low;
 	cpumask_t oldmask;
 	cpumask_t oldmask;
 	oldmask = affinity_set(b->cpu);
 	oldmask = affinity_set(b->cpu);
-	rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */
+	rdmsr(b->address, low, high);
 	affinity_restore(oldmask);
 	affinity_restore(oldmask);
 	return sprintf(buf, "%x\n",
 	return sprintf(buf, "%x\n",
 		       (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
 		       (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit));
 }
 }
 
 
-static ssize_t store_error_count(struct threshold_bank *b,
+static ssize_t store_error_count(struct threshold_block *b,
 				 const char *buf, size_t count)
 				 const char *buf, size_t count)
 {
 {
 	cpumask_t oldmask;
 	cpumask_t oldmask;
@@ -269,13 +325,13 @@ static ssize_t store_error_count(struct threshold_bank *b,
         .store = _store,                                      \
         .store = _store,                                      \
 };
 };
 
 
-#define ATTR_FIELDS(name) \
-        static struct threshold_attr name = \
+#define RW_ATTR(name)                                           \
+static struct threshold_attr name =                             \
         THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
         THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
 
 
-ATTR_FIELDS(interrupt_enable);
-ATTR_FIELDS(threshold_limit);
-ATTR_FIELDS(error_count);
+RW_ATTR(interrupt_enable);
+RW_ATTR(threshold_limit);
+RW_ATTR(error_count);
 
 
 static struct attribute *default_attrs[] = {
 static struct attribute *default_attrs[] = {
 	&interrupt_enable.attr,
 	&interrupt_enable.attr,
@@ -284,12 +340,12 @@ static struct attribute *default_attrs[] = {
 	NULL
 	NULL
 };
 };
 
 
-#define to_bank(k) container_of(k,struct threshold_bank,kobj)
-#define to_attr(a) container_of(a,struct threshold_attr,attr)
+#define to_block(k) container_of(k, struct threshold_block, kobj)
+#define to_attr(a) container_of(a, struct threshold_attr, attr)
 
 
 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 {
 {
-	struct threshold_bank *b = to_bank(kobj);
+	struct threshold_block *b = to_block(kobj);
 	struct threshold_attr *a = to_attr(attr);
 	struct threshold_attr *a = to_attr(attr);
 	ssize_t ret;
 	ssize_t ret;
 	ret = a->show ? a->show(b, buf) : -EIO;
 	ret = a->show ? a->show(b, buf) : -EIO;
@@ -299,7 +355,7 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
 static ssize_t store(struct kobject *kobj, struct attribute *attr,
 static ssize_t store(struct kobject *kobj, struct attribute *attr,
 		     const char *buf, size_t count)
 		     const char *buf, size_t count)
 {
 {
-	struct threshold_bank *b = to_bank(kobj);
+	struct threshold_block *b = to_block(kobj);
 	struct threshold_attr *a = to_attr(attr);
 	struct threshold_attr *a = to_attr(attr);
 	ssize_t ret;
 	ssize_t ret;
 	ret = a->store ? a->store(b, buf, count) : -EIO;
 	ret = a->store ? a->store(b, buf, count) : -EIO;
@@ -316,69 +372,174 @@ static struct kobj_type threshold_ktype = {
 	.default_attrs = default_attrs,
 	.default_attrs = default_attrs,
 };
 };
 
 
+static __cpuinit int allocate_threshold_blocks(unsigned int cpu,
+					       unsigned int bank,
+					       unsigned int block,
+					       u32 address)
+{
+	int err;
+	u32 low, high;
+	struct threshold_block *b = NULL;
+
+	if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
+		return 0;
+
+	if (rdmsr_safe(address, &low, &high))
+		goto recurse;
+
+	if (!(high & MASK_VALID_HI)) {
+		if (block)
+			goto recurse;
+		else
+			return 0;
+	}
+
+	if (!(high & MASK_VALID_HI >> 1)  ||
+	     (high & MASK_VALID_HI >> 2))
+		goto recurse;
+
+	b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
+	if (!b)
+		return -ENOMEM;
+	memset(b, 0, sizeof(struct threshold_block));
+
+	b->block = block;
+	b->bank = bank;
+	b->cpu = cpu;
+	b->address = address;
+	b->interrupt_enable = 0;
+	b->threshold_limit = THRESHOLD_MAX;
+
+	INIT_LIST_HEAD(&b->miscj);
+
+	if (per_cpu(threshold_banks, cpu)[bank]->blocks)
+		list_add(&b->miscj,
+			 &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
+	else
+		per_cpu(threshold_banks, cpu)[bank]->blocks = b;
+
+	kobject_set_name(&b->kobj, "misc%i", block);
+	b->kobj.parent = &per_cpu(threshold_banks, cpu)[bank]->kobj;
+	b->kobj.ktype = &threshold_ktype;
+	err = kobject_register(&b->kobj);
+	if (err)
+		goto out_free;
+recurse:
+	if (!block) {
+		address = (low & MASK_BLKPTR_LO) >> 21;
+		if (!address)
+			return 0;
+		address += MCG_XBLK_ADDR;
+	} else
+		++address;
+
+	err = allocate_threshold_blocks(cpu, bank, ++block, address);
+	if (err)
+		goto out_free;
+
+	return err;
+
+out_free:
+	if (b) {
+		kobject_unregister(&b->kobj);
+		kfree(b);
+	}
+	return err;
+}
+
 /* symlinks sibling shared banks to first core.  first core owns dir/files. */
 /* symlinks sibling shared banks to first core.  first core owns dir/files. */
-static __cpuinit int threshold_create_bank(unsigned int cpu, int bank)
+static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 {
 {
-	int err = 0;
+	int i, err = 0;
 	struct threshold_bank *b = NULL;
 	struct threshold_bank *b = NULL;
+	cpumask_t oldmask = CPU_MASK_NONE;
+	char name[32];
+
+	sprintf(name, "threshold_bank%i", bank);
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
-	if (cpu_core_id[cpu] && shared_bank[bank]) {	/* symlink */
-		char name[16];
-		unsigned lcpu = first_cpu(cpu_core_map[cpu]);
-		if (cpu_core_id[lcpu])
-			goto out;	/* first core not up yet */
+	if (cpu_data[cpu].cpu_core_id && shared_bank[bank]) {	/* symlink */
+		i = first_cpu(cpu_core_map[cpu]);
+
+		/* first core not up yet */
+		if (cpu_data[i].cpu_core_id)
+			goto out;
+
+		/* already linked */
+		if (per_cpu(threshold_banks, cpu)[bank])
+			goto out;
+
+		b = per_cpu(threshold_banks, i)[bank];
 
 
-		b = per_cpu(threshold_banks, lcpu)[bank];
 		if (!b)
 		if (!b)
 			goto out;
 			goto out;
-		sprintf(name, "bank%i", bank);
-		err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj,
+
+		err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj,
 					&b->kobj, name);
 					&b->kobj, name);
 		if (err)
 		if (err)
 			goto out;
 			goto out;
+
+		b->cpus = cpu_core_map[cpu];
 		per_cpu(threshold_banks, cpu)[bank] = b;
 		per_cpu(threshold_banks, cpu)[bank] = b;
 		goto out;
 		goto out;
 	}
 	}
 #endif
 #endif
 
 
-	b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL);
+	b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
 	if (!b) {
 	if (!b) {
 		err = -ENOMEM;
 		err = -ENOMEM;
 		goto out;
 		goto out;
 	}
 	}
 	memset(b, 0, sizeof(struct threshold_bank));
 	memset(b, 0, sizeof(struct threshold_bank));
 
 
-	b->cpu = cpu;
-	b->bank = bank;
-	b->interrupt_enable = 0;
-	b->threshold_limit = THRESHOLD_MAX;
-	kobject_set_name(&b->kobj, "bank%i", bank);
-	b->kobj.parent = &per_cpu(device_threshold, cpu).kobj;
-	b->kobj.ktype = &threshold_ktype;
-
+	kobject_set_name(&b->kobj, "threshold_bank%i", bank);
+	b->kobj.parent = &per_cpu(device_mce, cpu).kobj;
+#ifndef CONFIG_SMP
+	b->cpus = CPU_MASK_ALL;
+#else
+	b->cpus = cpu_core_map[cpu];
+#endif
 	err = kobject_register(&b->kobj);
 	err = kobject_register(&b->kobj);
-	if (err) {
-		kfree(b);
-		goto out;
-	}
+	if (err)
+		goto out_free;
+
 	per_cpu(threshold_banks, cpu)[bank] = b;
 	per_cpu(threshold_banks, cpu)[bank] = b;
-      out:
+
+	oldmask = affinity_set(cpu);
+	err = allocate_threshold_blocks(cpu, bank, 0,
+					MSR_IA32_MC0_MISC + bank * 4);
+	affinity_restore(oldmask);
+
+	if (err)
+		goto out_free;
+
+	for_each_cpu_mask(i, b->cpus) {
+		if (i == cpu)
+			continue;
+
+		err = sysfs_create_link(&per_cpu(device_mce, i).kobj,
+					&b->kobj, name);
+		if (err)
+			goto out;
+
+		per_cpu(threshold_banks, i)[bank] = b;
+	}
+
+	goto out;
+
+out_free:
+	per_cpu(threshold_banks, cpu)[bank] = NULL;
+	kfree(b);
+out:
 	return err;
 	return err;
 }
 }
 
 
 /* create dir/files for all valid threshold banks */
 /* create dir/files for all valid threshold banks */
 static __cpuinit int threshold_create_device(unsigned int cpu)
 static __cpuinit int threshold_create_device(unsigned int cpu)
 {
 {
-	int bank;
+	unsigned int bank;
 	int err = 0;
 	int err = 0;
 
 
-	per_cpu(device_threshold, cpu).id = cpu;
-	per_cpu(device_threshold, cpu).cls = &threshold_sysclass;
-	err = sysdev_register(&per_cpu(device_threshold, cpu));
-	if (err)
-		goto out;
-
 	for (bank = 0; bank < NR_BANKS; ++bank) {
 	for (bank = 0; bank < NR_BANKS; ++bank) {
 		if (!(per_cpu(bank_map, cpu) & 1 << bank))
 		if (!(per_cpu(bank_map, cpu) & 1 << bank))
 			continue;
 			continue;
@@ -386,7 +547,7 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
 		if (err)
 		if (err)
 			goto out;
 			goto out;
 	}
 	}
-      out:
+out:
 	return err;
 	return err;
 }
 }
 
 
@@ -397,92 +558,85 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
  *   of shared sysfs dir/files, and rest of the cores will be symlinked to it.
  *   of shared sysfs dir/files, and rest of the cores will be symlinked to it.
  */
  */
 
 
-/* cpu hotplug call removes all symlinks before first core dies */
+static __cpuinit void deallocate_threshold_block(unsigned int cpu,
+						 unsigned int bank)
+{
+	struct threshold_block *pos = NULL;
+	struct threshold_block *tmp = NULL;
+	struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
+
+	if (!head)
+		return;
+
+	list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
+		kobject_unregister(&pos->kobj);
+		list_del(&pos->miscj);
+		kfree(pos);
+	}
+
+	kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
+	per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
+}
+
 static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank)
 static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank)
 {
 {
+	int i = 0;
 	struct threshold_bank *b;
 	struct threshold_bank *b;
-	char name[16];
+	char name[32];
 
 
 	b = per_cpu(threshold_banks, cpu)[bank];
 	b = per_cpu(threshold_banks, cpu)[bank];
+
 	if (!b)
 	if (!b)
 		return;
 		return;
-	if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) {
-		sprintf(name, "bank%i", bank);
-		sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name);
-		per_cpu(threshold_banks, cpu)[bank] = NULL;
-	} else {
-		kobject_unregister(&b->kobj);
-		kfree(per_cpu(threshold_banks, cpu)[bank]);
+
+	if (!b->blocks)
+		goto free_out;
+
+	sprintf(name, "threshold_bank%i", bank);
+
+	/* sibling symlink */
+	if (shared_bank[bank] && b->blocks->cpu != cpu) {
+		sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name);
+		per_cpu(threshold_banks, i)[bank] = NULL;
+		return;
+	}
+
+	/* remove all sibling symlinks before unregistering */
+	for_each_cpu_mask(i, b->cpus) {
+		if (i == cpu)
+			continue;
+
+		sysfs_remove_link(&per_cpu(device_mce, i).kobj, name);
+		per_cpu(threshold_banks, i)[bank] = NULL;
 	}
 	}
+
+	deallocate_threshold_block(cpu, bank);
+
+free_out:
+	kobject_unregister(&b->kobj);
+	kfree(b);
+	per_cpu(threshold_banks, cpu)[bank] = NULL;
 }
 }
 
 
 static __cpuinit void threshold_remove_device(unsigned int cpu)
 static __cpuinit void threshold_remove_device(unsigned int cpu)
 {
 {
-	int bank;
+	unsigned int bank;
 
 
 	for (bank = 0; bank < NR_BANKS; ++bank) {
 	for (bank = 0; bank < NR_BANKS; ++bank) {
 		if (!(per_cpu(bank_map, cpu) & 1 << bank))
 		if (!(per_cpu(bank_map, cpu) & 1 << bank))
 			continue;
 			continue;
 		threshold_remove_bank(cpu, bank);
 		threshold_remove_bank(cpu, bank);
 	}
 	}
-	sysdev_unregister(&per_cpu(device_threshold, cpu));
 }
 }
 
 
-/* link all existing siblings when first core comes up */
-static __cpuinit int threshold_create_symlinks(unsigned int cpu)
-{
-	int bank, err = 0;
-	unsigned int lcpu = 0;
-
-	if (cpu_core_id[cpu])
-		return 0;
-	for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
-		if (lcpu == cpu)
-			continue;
-		for (bank = 0; bank < NR_BANKS; ++bank) {
-			if (!(per_cpu(bank_map, cpu) & 1 << bank))
-				continue;
-			if (!shared_bank[bank])
-				continue;
-			err = threshold_create_bank(lcpu, bank);
-		}
-	}
-	return err;
-}
-
-/* remove all symlinks before first core dies. */
-static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
-{
-	int bank;
-	unsigned int lcpu = 0;
-	if (cpu_core_id[cpu])
-		return;
-	for_each_cpu_mask(lcpu, cpu_core_map[cpu]) {
-		if (lcpu == cpu)
-			continue;
-		for (bank = 0; bank < NR_BANKS; ++bank) {
-			if (!(per_cpu(bank_map, cpu) & 1 << bank))
-				continue;
-			if (!shared_bank[bank])
-				continue;
-			threshold_remove_bank(lcpu, bank);
-		}
-	}
-}
 #else /* !CONFIG_HOTPLUG_CPU */
 #else /* !CONFIG_HOTPLUG_CPU */
-static __cpuinit void threshold_create_symlinks(unsigned int cpu)
-{
-}
-static __cpuinit void threshold_remove_symlinks(unsigned int cpu)
-{
-}
 static void threshold_remove_device(unsigned int cpu)
 static void threshold_remove_device(unsigned int cpu)
 {
 {
 }
 }
 #endif
 #endif
 
 
 /* get notified when a cpu comes on/off */
 /* get notified when a cpu comes on/off */
-static int threshold_cpu_callback(struct notifier_block *nfb,
+static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb,
 					    unsigned long action, void *hcpu)
 					    unsigned long action, void *hcpu)
 {
 {
 	/* cpu was unsigned int to begin with */
 	/* cpu was unsigned int to begin with */
@@ -494,13 +648,6 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
 	switch (action) {
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE:
 		threshold_create_device(cpu);
 		threshold_create_device(cpu);
-		threshold_create_symlinks(cpu);
-		break;
-	case CPU_DOWN_PREPARE:
-		threshold_remove_symlinks(cpu);
-		break;
-	case CPU_DOWN_FAILED:
-		threshold_create_symlinks(cpu);
 		break;
 		break;
 	case CPU_DEAD:
 	case CPU_DEAD:
 		threshold_remove_device(cpu);
 		threshold_remove_device(cpu);
@@ -512,29 +659,22 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
 	return NOTIFY_OK;
 	return NOTIFY_OK;
 }
 }
 
 
-static struct notifier_block threshold_cpu_notifier = {
+static struct notifier_block threshold_cpu_notifier __cpuinitdata = {
 	.notifier_call = threshold_cpu_callback,
 	.notifier_call = threshold_cpu_callback,
 };
 };
 
 
 static __init int threshold_init_device(void)
 static __init int threshold_init_device(void)
 {
 {
-	int err;
-	int lcpu = 0;
-
-	err = sysdev_class_register(&threshold_sysclass);
-	if (err)
-		goto out;
+	unsigned lcpu = 0;
 
 
 	/* to hit CPUs online before the notifier is up */
 	/* to hit CPUs online before the notifier is up */
 	for_each_online_cpu(lcpu) {
 	for_each_online_cpu(lcpu) {
-		err = threshold_create_device(lcpu);
+		int err = threshold_create_device(lcpu);
 		if (err)
 		if (err)
-			goto out;
+			return err;
 	}
 	}
 	register_cpu_notifier(&threshold_cpu_notifier);
 	register_cpu_notifier(&threshold_cpu_notifier);
-
-      out:
-	return err;
+	return 0;
 }
 }
 
 
 device_initcall(threshold_init_device);
 device_initcall(threshold_init_device);

+ 25 - 13
arch/x86_64/kernel/module.c

@@ -145,26 +145,38 @@ int apply_relocate(Elf_Shdr *sechdrs,
 	return -ENOSYS;
 	return -ENOSYS;
 } 
 } 
 
 
-extern void apply_alternatives(void *start, void *end); 
-
 int module_finalize(const Elf_Ehdr *hdr,
 int module_finalize(const Elf_Ehdr *hdr,
-		    const Elf_Shdr *sechdrs,
-		    struct module *me)
+                    const Elf_Shdr *sechdrs,
+                    struct module *me)
 {
 {
-	const Elf_Shdr *s;
+	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL;
 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 
 
-	/* look for .altinstructions to patch */ 
-	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 
-		void *seg; 		
-		if (strcmp(".altinstructions", secstrings + s->sh_name))
-			continue;
-		seg = (void *)s->sh_addr; 
-		apply_alternatives(seg, seg + s->sh_size); 
-	} 	
+	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+		if (!strcmp(".text", secstrings + s->sh_name))
+			text = s;
+		if (!strcmp(".altinstructions", secstrings + s->sh_name))
+			alt = s;
+		if (!strcmp(".smp_locks", secstrings + s->sh_name))
+			locks= s;
+	}
+
+	if (alt) {
+		/* patch .altinstructions */
+		void *aseg = (void *)alt->sh_addr;
+		apply_alternatives(aseg, aseg + alt->sh_size);
+	}
+	if (locks && text) {
+		void *lseg = (void *)locks->sh_addr;
+		void *tseg = (void *)text->sh_addr;
+		alternatives_smp_module_add(me, me->name,
+					    lseg, lseg + locks->sh_size,
+					    tseg, tseg + text->sh_size);
+	}
 	return 0;
 	return 0;
 }
 }
 
 
 void module_arch_cleanup(struct module *mod)
 void module_arch_cleanup(struct module *mod)
 {
 {
+	alternatives_smp_module_del(mod);
 }
 }

+ 76 - 13
arch/x86_64/kernel/nmi.c

@@ -15,11 +15,7 @@
 #include <linux/config.h>
 #include <linux/config.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
-#include <linux/mc146818rtc.h>
-#include <linux/kernel_stat.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/sysdev.h>
 #include <linux/sysdev.h>
 #include <linux/nmi.h>
 #include <linux/nmi.h>
@@ -27,14 +23,11 @@
 #include <linux/kprobes.h>
 #include <linux/kprobes.h>
 
 
 #include <asm/smp.h>
 #include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
 #include <asm/nmi.h>
 #include <asm/nmi.h>
-#include <asm/msr.h>
 #include <asm/proto.h>
 #include <asm/proto.h>
 #include <asm/kdebug.h>
 #include <asm/kdebug.h>
-#include <asm/local.h>
 #include <asm/mce.h>
 #include <asm/mce.h>
+#include <asm/intel_arch_perfmon.h>
 
 
 /*
 /*
  * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
  * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
@@ -74,6 +67,9 @@ static unsigned int nmi_p4_cccr_val;
 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
 #define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
 #define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
 
 
+#define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK	ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
 #define MSR_P4_MISC_ENABLE	0x1A0
 #define MSR_P4_MISC_ENABLE	0x1A0
 #define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7)
 #define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7)
 #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL	(1<<12)
 #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL	(1<<12)
@@ -105,7 +101,10 @@ static __cpuinit inline int nmi_known_cpu(void)
 	case X86_VENDOR_AMD:
 	case X86_VENDOR_AMD:
 		return boot_cpu_data.x86 == 15;
 		return boot_cpu_data.x86 == 15;
 	case X86_VENDOR_INTEL:
 	case X86_VENDOR_INTEL:
-		return boot_cpu_data.x86 == 15;
+		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+			return 1;
+		else
+			return (boot_cpu_data.x86 == 15);
 	}
 	}
 	return 0;
 	return 0;
 }
 }
@@ -211,6 +210,8 @@ int __init setup_nmi_watchdog(char *str)
 
 
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 
 
+static void disable_intel_arch_watchdog(void);
+
 static void disable_lapic_nmi_watchdog(void)
 static void disable_lapic_nmi_watchdog(void)
 {
 {
 	if (nmi_active <= 0)
 	if (nmi_active <= 0)
@@ -223,6 +224,8 @@ static void disable_lapic_nmi_watchdog(void)
 		if (boot_cpu_data.x86 == 15) {
 		if (boot_cpu_data.x86 == 15) {
 			wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
 			wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
 			wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
 			wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
+		} else if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+			disable_intel_arch_watchdog();
 		}
 		}
 		break;
 		break;
 	}
 	}
@@ -375,6 +378,53 @@ static void setup_k7_watchdog(void)
 	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
 	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
 }
 }
 
 
+static void disable_intel_arch_watchdog(void)
+{
+	unsigned ebx;
+
+	/*
+	 * Check whether the Architectural PerfMon supports
+	 * Unhalted Core Cycles Event or not.
+	 * NOTE: Corresponding bit = 0 in ebp indicates event present.
+	 */
+	ebx = cpuid_ebx(10);
+	if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+		wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
+}
+
+static int setup_intel_arch_watchdog(void)
+{
+	unsigned int evntsel;
+	unsigned ebx;
+
+	/*
+	 * Check whether the Architectural PerfMon supports
+	 * Unhalted Core Cycles Event or not.
+	 * NOTE: Corresponding bit = 0 in ebp indicates event present.
+	 */
+	ebx = cpuid_ebx(10);
+	if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+		return 0;
+
+	nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+
+	clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
+	clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
+
+	evntsel = ARCH_PERFMON_EVENTSEL_INT
+		| ARCH_PERFMON_EVENTSEL_OS
+		| ARCH_PERFMON_EVENTSEL_USR
+		| ARCH_PERFMON_NMI_EVENT_SEL
+		| ARCH_PERFMON_NMI_EVENT_UMASK;
+
+	wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+	wrmsrl(MSR_ARCH_PERFMON_PERFCTR0, -((u64)cpu_khz * 1000 / nmi_hz));
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+	wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+	return 1;
+}
+
 
 
 static int setup_p4_watchdog(void)
 static int setup_p4_watchdog(void)
 {
 {
@@ -428,10 +478,16 @@ void setup_apic_nmi_watchdog(void)
 		setup_k7_watchdog();
 		setup_k7_watchdog();
 		break;
 		break;
 	case X86_VENDOR_INTEL:
 	case X86_VENDOR_INTEL:
-		if (boot_cpu_data.x86 != 15)
-			return;
-		if (!setup_p4_watchdog())
+		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+			if (!setup_intel_arch_watchdog())
+				return;
+		} else if (boot_cpu_data.x86 == 15) {
+			if (!setup_p4_watchdog())
+				return;
+		} else {
 			return;
 			return;
+		}
+
 		break;
 		break;
 
 
 	default:
 	default:
@@ -516,7 +572,14 @@ void __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
  			 */
  			 */
  			wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
  			wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
  			apic_write(APIC_LVTPC, APIC_DM_NMI);
  			apic_write(APIC_LVTPC, APIC_DM_NMI);
- 		}
+ 		} else if (nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
+			/*
+			 * For Intel based architectural perfmon
+			 * - LVTPC is masked on interrupt and must be
+			 *   unmasked by the LVTPC handler.
+			 */
+			apic_write(APIC_LVTPC, APIC_DM_NMI);
+		}
 		wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
 		wrmsrl(nmi_perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
 	}
 	}
 }
 }

+ 1018 - 0
arch/x86_64/kernel/pci-calgary.c

@@ -0,0 +1,1018 @@
+/*
+ * Derived from arch/powerpc/kernel/iommu.c
+ *
+ * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
+ * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/pci_ids.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <asm/proto.h>
+#include <asm/calgary.h>
+#include <asm/tce.h>
+#include <asm/pci-direct.h>
+#include <asm/system.h>
+#include <asm/dma.h>
+
+#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
+#define PCI_VENDOR_DEVICE_ID_CALGARY \
+	(PCI_VENDOR_ID_IBM | PCI_DEVICE_ID_IBM_CALGARY << 16)
+
+/* we need these for register space address calculation */
+#define START_ADDRESS           0xfe000000
+#define CHASSIS_BASE            0
+#define ONE_BASED_CHASSIS_NUM   1
+
+/* register offsets inside the host bridge space */
+#define PHB_CSR_OFFSET		0x0110
+#define PHB_PLSSR_OFFSET	0x0120
+#define PHB_CONFIG_RW_OFFSET	0x0160
+#define PHB_IOBASE_BAR_LOW	0x0170
+#define PHB_IOBASE_BAR_HIGH	0x0180
+#define PHB_MEM_1_LOW		0x0190
+#define PHB_MEM_1_HIGH		0x01A0
+#define PHB_IO_ADDR_SIZE	0x01B0
+#define PHB_MEM_1_SIZE		0x01C0
+#define PHB_MEM_ST_OFFSET	0x01D0
+#define PHB_AER_OFFSET		0x0200
+#define PHB_CONFIG_0_HIGH	0x0220
+#define PHB_CONFIG_0_LOW	0x0230
+#define PHB_CONFIG_0_END	0x0240
+#define PHB_MEM_2_LOW		0x02B0
+#define PHB_MEM_2_HIGH		0x02C0
+#define PHB_MEM_2_SIZE_HIGH	0x02D0
+#define PHB_MEM_2_SIZE_LOW	0x02E0
+#define PHB_DOSHOLE_OFFSET	0x08E0
+
+/* PHB_CONFIG_RW */
+#define PHB_TCE_ENABLE		0x20000000
+#define PHB_SLOT_DISABLE	0x1C000000
+#define PHB_DAC_DISABLE		0x01000000
+#define PHB_MEM2_ENABLE		0x00400000
+#define PHB_MCSR_ENABLE		0x00100000
+/* TAR (Table Address Register) */
+#define TAR_SW_BITS		0x0000ffffffff800fUL
+#define TAR_VALID		0x0000000000000008UL
+/* CSR (Channel/DMA Status Register) */
+#define CSR_AGENT_MASK		0xffe0ffff
+
+#define MAX_NUM_OF_PHBS		8 /* how many PHBs in total? */
+#define MAX_PHB_BUS_NUM		(MAX_NUM_OF_PHBS * 2) /* max dev->bus->number */
+#define PHBS_PER_CALGARY	4
+
+/* register offsets in Calgary's internal register space */
+static const unsigned long tar_offsets[] = {
+	0x0580 /* TAR0 */,
+	0x0588 /* TAR1 */,
+	0x0590 /* TAR2 */,
+	0x0598 /* TAR3 */
+};
+
+static const unsigned long split_queue_offsets[] = {
+	0x4870 /* SPLIT QUEUE 0 */,
+	0x5870 /* SPLIT QUEUE 1 */,
+	0x6870 /* SPLIT QUEUE 2 */,
+	0x7870 /* SPLIT QUEUE 3 */
+};
+
+static const unsigned long phb_offsets[] = {
+	0x8000 /* PHB0 */,
+	0x9000 /* PHB1 */,
+	0xA000 /* PHB2 */,
+	0xB000 /* PHB3 */
+};
+
+void* tce_table_kva[MAX_NUM_OF_PHBS * MAX_NUMNODES];
+unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
+static int translate_empty_slots __read_mostly = 0;
+static int calgary_detected __read_mostly = 0;
+
+/*
+ * the bitmap of PHBs the user requested that we disable
+ * translation on.
+ */
+static DECLARE_BITMAP(translation_disabled, MAX_NUMNODES * MAX_PHB_BUS_NUM);
+
+static void tce_cache_blast(struct iommu_table *tbl);
+
+/* enable this to stress test the chip's TCE cache */
+#ifdef CONFIG_IOMMU_DEBUG
+static inline void tce_cache_blast_stress(struct iommu_table *tbl)
+{
+	tce_cache_blast(tbl);
+}
+#else
+static inline void tce_cache_blast_stress(struct iommu_table *tbl)
+{
+}
+#endif /* BLAST_TCE_CACHE_ON_UNMAP */
+
+static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
+{
+	unsigned int npages;
+
+	npages = PAGE_ALIGN(dma + dmalen) - (dma & PAGE_MASK);
+	npages >>= PAGE_SHIFT;
+
+	return npages;
+}
+
+static inline int translate_phb(struct pci_dev* dev)
+{
+	int disabled = test_bit(dev->bus->number, translation_disabled);
+	return !disabled;
+}
+
+static void iommu_range_reserve(struct iommu_table *tbl,
+        unsigned long start_addr, unsigned int npages)
+{
+	unsigned long index;
+	unsigned long end;
+
+	index = start_addr >> PAGE_SHIFT;
+
+	/* bail out if we're asked to reserve a region we don't cover */
+	if (index >= tbl->it_size)
+		return;
+
+	end = index + npages;
+	if (end > tbl->it_size) /* don't go off the table */
+		end = tbl->it_size;
+
+	while (index < end) {
+		if (test_bit(index, tbl->it_map))
+			printk(KERN_ERR "Calgary: entry already allocated at "
+			       "0x%lx tbl %p dma 0x%lx npages %u\n",
+			       index, tbl, start_addr, npages);
+		++index;
+	}
+	set_bit_string(tbl->it_map, start_addr >> PAGE_SHIFT, npages);
+}
+
+static unsigned long iommu_range_alloc(struct iommu_table *tbl,
+	unsigned int npages)
+{
+	unsigned long offset;
+
+	BUG_ON(npages == 0);
+
+	offset = find_next_zero_string(tbl->it_map, tbl->it_hint,
+				       tbl->it_size, npages);
+	if (offset == ~0UL) {
+		tce_cache_blast(tbl);
+		offset = find_next_zero_string(tbl->it_map, 0,
+					       tbl->it_size, npages);
+		if (offset == ~0UL) {
+			printk(KERN_WARNING "Calgary: IOMMU full.\n");
+			if (panic_on_overflow)
+				panic("Calgary: fix the allocator.\n");
+			else
+				return bad_dma_address;
+		}
+	}
+
+	set_bit_string(tbl->it_map, offset, npages);
+	tbl->it_hint = offset + npages;
+	BUG_ON(tbl->it_hint > tbl->it_size);
+
+	return offset;
+}
+
+static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr,
+	unsigned int npages, int direction)
+{
+	unsigned long entry, flags;
+	dma_addr_t ret = bad_dma_address;
+
+	spin_lock_irqsave(&tbl->it_lock, flags);
+
+	entry = iommu_range_alloc(tbl, npages);
+
+	if (unlikely(entry == bad_dma_address))
+		goto error;
+
+	/* set the return dma address */
+	ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK);
+
+	/* put the TCEs in the HW table */
+	tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
+		  direction);
+
+	spin_unlock_irqrestore(&tbl->it_lock, flags);
+
+	return ret;
+
+error:
+	spin_unlock_irqrestore(&tbl->it_lock, flags);
+	printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
+	       "iommu %p\n", npages, tbl);
+	return bad_dma_address;
+}
+
+static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+	unsigned int npages)
+{
+	unsigned long entry;
+	unsigned long i;
+
+	entry = dma_addr >> PAGE_SHIFT;
+
+	BUG_ON(entry + npages > tbl->it_size);
+
+	tce_free(tbl, entry, npages);
+
+	for (i = 0; i < npages; ++i) {
+		if (!test_bit(entry + i, tbl->it_map))
+			printk(KERN_ERR "Calgary: bit is off at 0x%lx "
+			       "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
+			       entry + i, tbl, dma_addr, entry, npages);
+	}
+
+	__clear_bit_string(tbl->it_map, entry, npages);
+
+	tce_cache_blast_stress(tbl);
+}
+
+static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+	unsigned int npages)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&tbl->it_lock, flags);
+
+	__iommu_free(tbl, dma_addr, npages);
+
+	spin_unlock_irqrestore(&tbl->it_lock, flags);
+}
+
+static void __calgary_unmap_sg(struct iommu_table *tbl,
+	struct scatterlist *sglist, int nelems, int direction)
+{
+	while (nelems--) {
+		unsigned int npages;
+		dma_addr_t dma = sglist->dma_address;
+		unsigned int dmalen = sglist->dma_length;
+
+		if (dmalen == 0)
+			break;
+
+		npages = num_dma_pages(dma, dmalen);
+		__iommu_free(tbl, dma, npages);
+		sglist++;
+	}
+}
+
+void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
+		      int nelems, int direction)
+{
+	unsigned long flags;
+	struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+
+	if (!translate_phb(to_pci_dev(dev)))
+		return;
+
+	spin_lock_irqsave(&tbl->it_lock, flags);
+
+	__calgary_unmap_sg(tbl, sglist, nelems, direction);
+
+	spin_unlock_irqrestore(&tbl->it_lock, flags);
+}
+
+static int calgary_nontranslate_map_sg(struct device* dev,
+	struct scatterlist *sg, int nelems, int direction)
+{
+	int i;
+
+ 	for (i = 0; i < nelems; i++ ) {
+		struct scatterlist *s = &sg[i];
+		BUG_ON(!s->page);
+		s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
+		s->dma_length = s->length;
+	}
+	return nelems;
+}
+
+int calgary_map_sg(struct device *dev, struct scatterlist *sg,
+	int nelems, int direction)
+{
+	struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+	unsigned long flags;
+	unsigned long vaddr;
+	unsigned int npages;
+	unsigned long entry;
+	int i;
+
+	if (!translate_phb(to_pci_dev(dev)))
+		return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
+
+	spin_lock_irqsave(&tbl->it_lock, flags);
+
+	for (i = 0; i < nelems; i++ ) {
+		struct scatterlist *s = &sg[i];
+		BUG_ON(!s->page);
+
+		vaddr = (unsigned long)page_address(s->page) + s->offset;
+		npages = num_dma_pages(vaddr, s->length);
+
+		entry = iommu_range_alloc(tbl, npages);
+		if (entry == bad_dma_address) {
+			/* makes sure unmap knows to stop */
+			s->dma_length = 0;
+			goto error;
+		}
+
+		s->dma_address = (entry << PAGE_SHIFT) | s->offset;
+
+		/* insert into HW table */
+		tce_build(tbl, entry, npages, vaddr & PAGE_MASK,
+			  direction);
+
+		s->dma_length = s->length;
+	}
+
+	spin_unlock_irqrestore(&tbl->it_lock, flags);
+
+	return nelems;
+error:
+	__calgary_unmap_sg(tbl, sg, nelems, direction);
+	for (i = 0; i < nelems; i++) {
+		sg[i].dma_address = bad_dma_address;
+		sg[i].dma_length = 0;
+	}
+	spin_unlock_irqrestore(&tbl->it_lock, flags);
+	return 0;
+}
+
+dma_addr_t calgary_map_single(struct device *dev, void *vaddr,
+	size_t size, int direction)
+{
+	dma_addr_t dma_handle = bad_dma_address;
+	unsigned long uaddr;
+	unsigned int npages;
+	struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+
+	uaddr = (unsigned long)vaddr;
+	npages = num_dma_pages(uaddr, size);
+
+	if (translate_phb(to_pci_dev(dev)))
+		dma_handle = iommu_alloc(tbl, vaddr, npages, direction);
+	else
+		dma_handle = virt_to_bus(vaddr);
+
+	return dma_handle;
+}
+
+void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
+	size_t size, int direction)
+{
+	struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+	unsigned int npages;
+
+	if (!translate_phb(to_pci_dev(dev)))
+		return;
+
+	npages = num_dma_pages(dma_handle, size);
+	iommu_free(tbl, dma_handle, npages);
+}
+
+void* calgary_alloc_coherent(struct device *dev, size_t size,
+	dma_addr_t *dma_handle, gfp_t flag)
+{
+	void *ret = NULL;
+	dma_addr_t mapping;
+	unsigned int npages, order;
+	struct iommu_table *tbl;
+
+	tbl = to_pci_dev(dev)->bus->self->sysdata;
+
+	size = PAGE_ALIGN(size); /* size rounded up to full pages */
+	npages = size >> PAGE_SHIFT;
+	order = get_order(size);
+
+	/* alloc enough pages (and possibly more) */
+	ret = (void *)__get_free_pages(flag, order);
+	if (!ret)
+		goto error;
+	memset(ret, 0, size);
+
+	if (translate_phb(to_pci_dev(dev))) {
+		/* set up tces to cover the allocated range */
+		mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL);
+		if (mapping == bad_dma_address)
+			goto free;
+
+		*dma_handle = mapping;
+	} else /* non translated slot */
+		*dma_handle = virt_to_bus(ret);
+
+	return ret;
+
+free:
+	free_pages((unsigned long)ret, get_order(size));
+	ret = NULL;
+error:
+	return ret;
+}
+
+static struct dma_mapping_ops calgary_dma_ops = {
+	.alloc_coherent = calgary_alloc_coherent,
+	.map_single = calgary_map_single,
+	.unmap_single = calgary_unmap_single,
+	.map_sg = calgary_map_sg,
+	.unmap_sg = calgary_unmap_sg,
+};
+
+static inline int busno_to_phbid(unsigned char num)
+{
+	return bus_to_phb(num) % PHBS_PER_CALGARY;
+}
+
+static inline unsigned long split_queue_offset(unsigned char num)
+{
+	size_t idx = busno_to_phbid(num);
+
+	return split_queue_offsets[idx];
+}
+
+static inline unsigned long tar_offset(unsigned char num)
+{
+	size_t idx = busno_to_phbid(num);
+
+	return tar_offsets[idx];
+}
+
+static inline unsigned long phb_offset(unsigned char num)
+{
+	size_t idx = busno_to_phbid(num);
+
+	return phb_offsets[idx];
+}
+
+static inline void __iomem* calgary_reg(void __iomem *bar, unsigned long offset)
+{
+	unsigned long target = ((unsigned long)bar) | offset;
+	return (void __iomem*)target;
+}
+
+static void tce_cache_blast(struct iommu_table *tbl)
+{
+	u64 val;
+	u32 aer;
+	int i = 0;
+	void __iomem *bbar = tbl->bbar;
+	void __iomem *target;
+
+	/* disable arbitration on the bus */
+	target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
+	aer = readl(target);
+	writel(0, target);
+
+	/* read plssr to ensure it got there */
+	target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET);
+	val = readl(target);
+
+	/* poll split queues until all DMA activity is done */
+	target = calgary_reg(bbar, split_queue_offset(tbl->it_busno));
+	do {
+		val = readq(target);
+		i++;
+	} while ((val & 0xff) != 0xff && i < 100);
+	if (i == 100)
+		printk(KERN_WARNING "Calgary: PCI bus not quiesced, "
+		       "continuing anyway\n");
+
+	/* invalidate TCE cache */
+	target = calgary_reg(bbar, tar_offset(tbl->it_busno));
+	writeq(tbl->tar_val, target);
+
+	/* enable arbitration */
+	target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
+	writel(aer, target);
+	(void)readl(target); /* flush */
+}
+
+static void __init calgary_reserve_mem_region(struct pci_dev *dev, u64 start,
+	u64 limit)
+{
+	unsigned int numpages;
+
+	limit = limit | 0xfffff;
+	limit++;
+
+	numpages = ((limit - start) >> PAGE_SHIFT);
+	iommu_range_reserve(dev->sysdata, start, numpages);
+}
+
+static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
+{
+	void __iomem *target;
+	u64 low, high, sizelow;
+	u64 start, limit;
+	struct iommu_table *tbl = dev->sysdata;
+	unsigned char busnum = dev->bus->number;
+	void __iomem *bbar = tbl->bbar;
+
+	/* peripheral MEM_1 region */
+	target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_LOW);
+	low = be32_to_cpu(readl(target));
+	target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_HIGH);
+	high = be32_to_cpu(readl(target));
+	target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_SIZE);
+	sizelow = be32_to_cpu(readl(target));
+
+	start = (high << 32) | low;
+	limit = sizelow;
+
+	calgary_reserve_mem_region(dev, start, limit);
+}
+
+static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
+{
+	void __iomem *target;
+	u32 val32;
+	u64 low, high, sizelow, sizehigh;
+	u64 start, limit;
+	struct iommu_table *tbl = dev->sysdata;
+	unsigned char busnum = dev->bus->number;
+	void __iomem *bbar = tbl->bbar;
+
+	/* is it enabled? */
+	target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
+	val32 = be32_to_cpu(readl(target));
+	if (!(val32 & PHB_MEM2_ENABLE))
+		return;
+
+	target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_LOW);
+	low = be32_to_cpu(readl(target));
+	target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_HIGH);
+	high = be32_to_cpu(readl(target));
+	target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_LOW);
+	sizelow = be32_to_cpu(readl(target));
+	target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_HIGH);
+	sizehigh = be32_to_cpu(readl(target));
+
+	start = (high << 32) | low;
+	limit = (sizehigh << 32) | sizelow;
+
+	calgary_reserve_mem_region(dev, start, limit);
+}
+
+/*
+ * some regions of the IO address space do not get translated, so we
+ * must not give devices IO addresses in those regions. The regions
+ * are the 640KB-1MB region and the two PCI peripheral memory holes.
+ * Reserve all of them in the IOMMU bitmap to avoid giving them out
+ * later.
+ */
+static void __init calgary_reserve_regions(struct pci_dev *dev)
+{
+	unsigned int npages;
+	void __iomem *bbar;
+	unsigned char busnum;
+	u64 start;
+	struct iommu_table *tbl = dev->sysdata;
+
+	bbar = tbl->bbar;
+	busnum = dev->bus->number;
+
+	/* reserve bad_dma_address in case it's a legal address */
+	iommu_range_reserve(tbl, bad_dma_address, 1);
+
+	/* avoid the BIOS/VGA first 640KB-1MB region */
+	start = (640 * 1024);
+	npages = ((1024 - 640) * 1024) >> PAGE_SHIFT;
+	iommu_range_reserve(tbl, start, npages);
+
+	/* reserve the two PCI peripheral memory regions in IO space */
+	calgary_reserve_peripheral_mem_1(dev);
+	calgary_reserve_peripheral_mem_2(dev);
+}
+
+static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
+{
+	u64 val64;
+	u64 table_phys;
+	void __iomem *target;
+	int ret;
+	struct iommu_table *tbl;
+
+	/* build TCE tables for each PHB */
+	ret = build_tce_table(dev, bbar);
+	if (ret)
+		return ret;
+
+	calgary_reserve_regions(dev);
+
+	/* set TARs for each PHB */
+	target = calgary_reg(bbar, tar_offset(dev->bus->number));
+	val64 = be64_to_cpu(readq(target));
+
+	/* zero out all TAR bits under sw control */
+	val64 &= ~TAR_SW_BITS;
+
+	tbl = dev->sysdata;
+	table_phys = (u64)__pa(tbl->it_base);
+	val64 |= table_phys;
+
+	BUG_ON(specified_table_size > TCE_TABLE_SIZE_8M);
+	val64 |= (u64) specified_table_size;
+
+	tbl->tar_val = cpu_to_be64(val64);
+	writeq(tbl->tar_val, target);
+	readq(target); /* flush */
+
+	return 0;
+}
+
+static void __init calgary_free_tar(struct pci_dev *dev)
+{
+	u64 val64;
+	struct iommu_table *tbl = dev->sysdata;
+	void __iomem *target;
+
+	target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number));
+	val64 = be64_to_cpu(readq(target));
+	val64 &= ~TAR_SW_BITS;
+	writeq(cpu_to_be64(val64), target);
+	readq(target); /* flush */
+
+	kfree(tbl);
+	dev->sysdata = NULL;
+}
+
+static void calgary_watchdog(unsigned long data)
+{
+	struct pci_dev *dev = (struct pci_dev *)data;
+	struct iommu_table *tbl = dev->sysdata;
+	void __iomem *bbar = tbl->bbar;
+	u32 val32;
+	void __iomem *target;
+
+	target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET);
+	val32 = be32_to_cpu(readl(target));
+
+	/* If no error, the agent ID in the CSR is not valid */
+	if (val32 & CSR_AGENT_MASK) {
+		printk(KERN_EMERG "calgary_watchdog: DMA error on bus %d, "
+				  "CSR = %#x\n", dev->bus->number, val32);
+		writel(0, target);
+
+		/* Disable bus that caused the error */
+		target = calgary_reg(bbar, phb_offset(tbl->it_busno) |
+					   PHB_CONFIG_RW_OFFSET);
+		val32 = be32_to_cpu(readl(target));
+		val32 |= PHB_SLOT_DISABLE;
+		writel(cpu_to_be32(val32), target);
+		readl(target); /* flush */
+	} else {
+		/* Reset the timer */
+		mod_timer(&tbl->watchdog_timer, jiffies + 2 * HZ);
+	}
+}
+
+static void __init calgary_enable_translation(struct pci_dev *dev)
+{
+	u32 val32;
+	unsigned char busnum;
+	void __iomem *target;
+	void __iomem *bbar;
+	struct iommu_table *tbl;
+
+	busnum = dev->bus->number;
+	tbl = dev->sysdata;
+	bbar = tbl->bbar;
+
+	/* enable TCE in PHB Config Register */
+	target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
+	val32 = be32_to_cpu(readl(target));
+	val32 |= PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE;
+
+	printk(KERN_INFO "Calgary: enabling translation on PHB %d\n", busnum);
+	printk(KERN_INFO "Calgary: errant DMAs will now be prevented on this "
+	       "bus.\n");
+
+	writel(cpu_to_be32(val32), target);
+	readl(target); /* flush */
+
+	init_timer(&tbl->watchdog_timer);
+	tbl->watchdog_timer.function = &calgary_watchdog;
+	tbl->watchdog_timer.data = (unsigned long)dev;
+	mod_timer(&tbl->watchdog_timer, jiffies);
+}
+
+static void __init calgary_disable_translation(struct pci_dev *dev)
+{
+	u32 val32;
+	unsigned char busnum;
+	void __iomem *target;
+	void __iomem *bbar;
+	struct iommu_table *tbl;
+
+	busnum = dev->bus->number;
+	tbl = dev->sysdata;
+	bbar = tbl->bbar;
+
+	/* disable TCE in PHB Config Register */
+	target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
+	val32 = be32_to_cpu(readl(target));
+	val32 &= ~(PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE);
+
+	printk(KERN_INFO "Calgary: disabling translation on PHB %d!\n", busnum);
+	writel(cpu_to_be32(val32), target);
+	readl(target); /* flush */
+
+	del_timer_sync(&tbl->watchdog_timer);
+}
+
+static inline unsigned int __init locate_register_space(struct pci_dev *dev)
+{
+	int rionodeid;
+	u32 address;
+
+	rionodeid = (dev->bus->number % 15 > 4) ? 3 : 2;
+	/*
+	 * register space address calculation as follows:
+	 * FE0MB-8MB*OneBasedChassisNumber+1MB*(RioNodeId-ChassisBase)
+	 * ChassisBase is always zero for x366/x260/x460
+	 * RioNodeId is 2 for first Calgary, 3 for second Calgary
+	 */
+	address = START_ADDRESS	-
+		(0x800000 * (ONE_BASED_CHASSIS_NUM + dev->bus->number / 15)) +
+		(0x100000) * (rionodeid - CHASSIS_BASE);
+	return address;
+}
+
+static int __init calgary_init_one_nontraslated(struct pci_dev *dev)
+{
+	dev->sysdata = NULL;
+	dev->bus->self = dev;
+
+	return 0;
+}
+
+static int __init calgary_init_one(struct pci_dev *dev)
+{
+	u32 address;
+	void __iomem *bbar;
+	int ret;
+
+	address = locate_register_space(dev);
+	/* map entire 1MB of Calgary config space */
+	bbar = ioremap_nocache(address, 1024 * 1024);
+	if (!bbar) {
+		ret = -ENODATA;
+		goto done;
+	}
+
+	ret = calgary_setup_tar(dev, bbar);
+	if (ret)
+		goto iounmap;
+
+	dev->bus->self = dev;
+	calgary_enable_translation(dev);
+
+	return 0;
+
+iounmap:
+	iounmap(bbar);
+done:
+	return ret;
+}
+
+static int __init calgary_init(void)
+{
+	int i, ret = -ENODEV;
+	struct pci_dev *dev = NULL;
+
+	for (i = 0; i <= num_online_nodes() * MAX_NUM_OF_PHBS; i++) {
+		dev = pci_get_device(PCI_VENDOR_ID_IBM,
+				     PCI_DEVICE_ID_IBM_CALGARY,
+				     dev);
+		if (!dev)
+			break;
+		if (!translate_phb(dev)) {
+			calgary_init_one_nontraslated(dev);
+			continue;
+		}
+		if (!tce_table_kva[i] && !translate_empty_slots) {
+			pci_dev_put(dev);
+			continue;
+		}
+		ret = calgary_init_one(dev);
+		if (ret)
+			goto error;
+	}
+
+	return ret;
+
+error:
+	for (i--; i >= 0; i--) {
+		dev = pci_find_device_reverse(PCI_VENDOR_ID_IBM,
+					      PCI_DEVICE_ID_IBM_CALGARY,
+					      dev);
+		if (!translate_phb(dev)) {
+			pci_dev_put(dev);
+			continue;
+		}
+		if (!tce_table_kva[i] && !translate_empty_slots)
+			continue;
+		calgary_disable_translation(dev);
+		calgary_free_tar(dev);
+		pci_dev_put(dev);
+	}
+
+	return ret;
+}
+
+static inline int __init determine_tce_table_size(u64 ram)
+{
+	int ret;
+
+	if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED)
+		return specified_table_size;
+
+	/*
+	 * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to
+	 * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each
+	 * larger table size has twice as many entries, so shift the
+	 * max ram address by 13 to divide by 8K and then look at the
+	 * order of the result to choose between 0-7.
+	 */
+	ret = get_order(ram >> 13);
+	if (ret > TCE_TABLE_SIZE_8M)
+		ret = TCE_TABLE_SIZE_8M;
+
+	return ret;
+}
+
+void __init detect_calgary(void)
+{
+	u32 val;
+	int bus, table_idx;
+	void *tbl;
+	int detected = 0;
+
+	/*
+	 * if the user specified iommu=off or iommu=soft or we found
+	 * another HW IOMMU already, bail out.
+	 */
+	if (swiotlb || no_iommu || iommu_detected)
+		return;
+
+	specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
+
+	for (bus = 0, table_idx = 0;
+	     bus <= num_online_nodes() * MAX_PHB_BUS_NUM;
+	     bus++) {
+		BUG_ON(bus > MAX_NUMNODES * MAX_PHB_BUS_NUM);
+		if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
+			continue;
+		if (test_bit(bus, translation_disabled)) {
+			printk(KERN_INFO "Calgary: translation is disabled for "
+			       "PHB 0x%x\n", bus);
+			/* skip this phb, don't allocate a tbl for it */
+			tce_table_kva[table_idx] = NULL;
+			table_idx++;
+			continue;
+		}
+		/*
+		 * scan the first slot of the PCI bus to see if there
+		 * are any devices present
+		 */
+		val = read_pci_config(bus, 1, 0, 0);
+		if (val != 0xffffffff || translate_empty_slots) {
+			tbl = alloc_tce_table();
+			if (!tbl)
+				goto cleanup;
+			detected = 1;
+		} else
+			tbl = NULL;
+
+		tce_table_kva[table_idx] = tbl;
+		table_idx++;
+	}
+
+	if (detected) {
+		iommu_detected = 1;
+		calgary_detected = 1;
+		printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected. "
+		       "TCE table spec is %d.\n", specified_table_size);
+	}
+	return;
+
+cleanup:
+	for (--table_idx; table_idx >= 0; --table_idx)
+		if (tce_table_kva[table_idx])
+			free_tce_table(tce_table_kva[table_idx]);
+}
+
+int __init calgary_iommu_init(void)
+{
+	int ret;
+
+	if (no_iommu || swiotlb)
+		return -ENODEV;
+
+	if (!calgary_detected)
+		return -ENODEV;
+
+	/* ok, we're trying to use Calgary - let's roll */
+	printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
+
+	ret = calgary_init();
+	if (ret) {
+		printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
+		       "falling back to no_iommu\n", ret);
+		if (end_pfn > MAX_DMA32_PFN)
+			printk(KERN_ERR "WARNING more than 4GB of memory, "
+					"32bit PCI may malfunction.\n");
+		return ret;
+	}
+
+	force_iommu = 1;
+	dma_ops = &calgary_dma_ops;
+
+	return 0;
+}
+
+static int __init calgary_parse_options(char *p)
+{
+	unsigned int bridge;
+	size_t len;
+	char* endp;
+
+	while (*p) {
+		if (!strncmp(p, "64k", 3))
+			specified_table_size = TCE_TABLE_SIZE_64K;
+		else if (!strncmp(p, "128k", 4))
+			specified_table_size = TCE_TABLE_SIZE_128K;
+		else if (!strncmp(p, "256k", 4))
+			specified_table_size = TCE_TABLE_SIZE_256K;
+		else if (!strncmp(p, "512k", 4))
+			specified_table_size = TCE_TABLE_SIZE_512K;
+		else if (!strncmp(p, "1M", 2))
+			specified_table_size = TCE_TABLE_SIZE_1M;
+		else if (!strncmp(p, "2M", 2))
+			specified_table_size = TCE_TABLE_SIZE_2M;
+		else if (!strncmp(p, "4M", 2))
+			specified_table_size = TCE_TABLE_SIZE_4M;
+		else if (!strncmp(p, "8M", 2))
+			specified_table_size = TCE_TABLE_SIZE_8M;
+
+		len = strlen("translate_empty_slots");
+		if (!strncmp(p, "translate_empty_slots", len))
+			translate_empty_slots = 1;
+
+		len = strlen("disable");
+		if (!strncmp(p, "disable", len)) {
+			p += len;
+			if (*p == '=')
+				++p;
+			if (*p == '\0')
+				break;
+			bridge = simple_strtol(p, &endp, 0);
+			if (p == endp)
+				break;
+
+			if (bridge <= (num_online_nodes() * MAX_PHB_BUS_NUM)) {
+				printk(KERN_INFO "Calgary: disabling "
+				       "translation for PHB 0x%x\n", bridge);
+				set_bit(bridge, translation_disabled);
+			}
+		}
+
+		p = strpbrk(p, ",");
+		if (!p)
+			break;
+
+		p++; /* skip ',' */
+	}
+	return 1;
+}
+__setup("calgary=", calgary_parse_options);

+ 48 - 7
arch/x86_64/kernel/pci-dma.c

@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/module.h>
 #include <asm/io.h>
 #include <asm/io.h>
 #include <asm/proto.h>
 #include <asm/proto.h>
+#include <asm/calgary.h>
 
 
 int iommu_merge __read_mostly = 0;
 int iommu_merge __read_mostly = 0;
 EXPORT_SYMBOL(iommu_merge);
 EXPORT_SYMBOL(iommu_merge);
@@ -33,12 +34,15 @@ int panic_on_overflow __read_mostly = 0;
 int force_iommu __read_mostly= 0;
 int force_iommu __read_mostly= 0;
 #endif
 #endif
 
 
+/* Set this to 1 if there is a HW IOMMU in the system */
+int iommu_detected __read_mostly = 0;
+
 /* Dummy device used for NULL arguments (normally ISA). Better would
 /* Dummy device used for NULL arguments (normally ISA). Better would
    be probably a smaller DMA mask, but this is bug-to-bug compatible
    be probably a smaller DMA mask, but this is bug-to-bug compatible
    to i386. */
    to i386. */
 struct device fallback_dev = {
 struct device fallback_dev = {
 	.bus_id = "fallback device",
 	.bus_id = "fallback device",
-	.coherent_dma_mask = 0xffffffff,
+	.coherent_dma_mask = DMA_32BIT_MASK,
 	.dma_mask = &fallback_dev.coherent_dma_mask,
 	.dma_mask = &fallback_dev.coherent_dma_mask,
 };
 };
 
 
@@ -77,7 +81,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		dev = &fallback_dev;
 		dev = &fallback_dev;
 	dma_mask = dev->coherent_dma_mask;
 	dma_mask = dev->coherent_dma_mask;
 	if (dma_mask == 0)
 	if (dma_mask == 0)
-		dma_mask = 0xffffffff;
+		dma_mask = DMA_32BIT_MASK;
 
 
 	/* Don't invoke OOM killer */
 	/* Don't invoke OOM killer */
 	gfp |= __GFP_NORETRY;
 	gfp |= __GFP_NORETRY;
@@ -90,7 +94,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	   larger than 16MB and in this case we have a chance of
 	   larger than 16MB and in this case we have a chance of
 	   finding fitting memory in the next higher zone first. If
 	   finding fitting memory in the next higher zone first. If
 	   not retry with true GFP_DMA. -AK */
 	   not retry with true GFP_DMA. -AK */
-	if (dma_mask <= 0xffffffff)
+	if (dma_mask <= DMA_32BIT_MASK)
 		gfp |= GFP_DMA32;
 		gfp |= GFP_DMA32;
 
 
  again:
  again:
@@ -111,7 +115,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
 
 
 			/* Don't use the 16MB ZONE_DMA unless absolutely
 			/* Don't use the 16MB ZONE_DMA unless absolutely
 			   needed. It's better to use remapping first. */
 			   needed. It's better to use remapping first. */
-			if (dma_mask < 0xffffffff && !(gfp & GFP_DMA)) {
+			if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
 				gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
 				gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
 				goto again;
 				goto again;
 			}
 			}
@@ -174,7 +178,7 @@ int dma_supported(struct device *dev, u64 mask)
 	/* Copied from i386. Doesn't make much sense, because it will
 	/* Copied from i386. Doesn't make much sense, because it will
 	   only work for pci_alloc_coherent.
 	   only work for pci_alloc_coherent.
 	   The caller just has to use GFP_DMA in this case. */
 	   The caller just has to use GFP_DMA in this case. */
-        if (mask < 0x00ffffff)
+        if (mask < DMA_24BIT_MASK)
                 return 0;
                 return 0;
 
 
 	/* Tell the device to use SAC when IOMMU force is on.  This
 	/* Tell the device to use SAC when IOMMU force is on.  This
@@ -189,7 +193,7 @@ int dma_supported(struct device *dev, u64 mask)
 	   SAC for these.  Assume all masks <= 40 bits are of this
 	   SAC for these.  Assume all masks <= 40 bits are of this
 	   type. Normally this doesn't make any difference, but gives
 	   type. Normally this doesn't make any difference, but gives
 	   more gentle handling of IOMMU overflow. */
 	   more gentle handling of IOMMU overflow. */
-	if (iommu_sac_force && (mask >= 0xffffffffffULL)) {
+	if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
 		printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
 		printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask);
 		return 0;
 		return 0;
 	}
 	}
@@ -266,7 +270,7 @@ __init int iommu_setup(char *p)
 		    swiotlb = 1;
 		    swiotlb = 1;
 #endif
 #endif
 
 
-#ifdef CONFIG_GART_IOMMU
+#ifdef CONFIG_IOMMU
 	    gart_parse_options(p);
 	    gart_parse_options(p);
 #endif
 #endif
 
 
@@ -276,3 +280,40 @@ __init int iommu_setup(char *p)
     }
     }
     return 1;
     return 1;
 }
 }
+__setup("iommu=", iommu_setup);
+
+void __init pci_iommu_alloc(void)
+{
+	/*
+	 * The order of these functions is important for
+	 * fall-back/fail-over reasons
+	 */
+#ifdef CONFIG_IOMMU
+	iommu_hole_init();
+#endif
+
+#ifdef CONFIG_CALGARY_IOMMU
+	detect_calgary();
+#endif
+
+#ifdef CONFIG_SWIOTLB
+	pci_swiotlb_init();
+#endif
+}
+
+static int __init pci_iommu_init(void)
+{
+#ifdef CONFIG_CALGARY_IOMMU
+	calgary_iommu_init();
+#endif
+
+#ifdef CONFIG_IOMMU
+	gart_iommu_init();
+#endif
+
+	no_iommu_init();
+	return 0;
+}
+
+/* Must execute after PCI subsystem */
+fs_initcall(pci_iommu_init);

+ 54 - 101
arch/x86_64/kernel/pci-gart.c

@@ -32,6 +32,7 @@
 #include <asm/kdebug.h>
 #include <asm/kdebug.h>
 #include <asm/swiotlb.h>
 #include <asm/swiotlb.h>
 #include <asm/dma.h>
 #include <asm/dma.h>
+#include <asm/k8.h>
 
 
 unsigned long iommu_bus_base;	/* GART remapping area (physical) */
 unsigned long iommu_bus_base;	/* GART remapping area (physical) */
 static unsigned long iommu_size; 	/* size of remapping area bytes */
 static unsigned long iommu_size; 	/* size of remapping area bytes */
@@ -46,8 +47,6 @@ u32 *iommu_gatt_base; 		/* Remapping table */
    also seen with Qlogic at least). */
    also seen with Qlogic at least). */
 int iommu_fullflush = 1;
 int iommu_fullflush = 1;
 
 
-#define MAX_NB 8
-
 /* Allocation bitmap for the remapping area */ 
 /* Allocation bitmap for the remapping area */ 
 static DEFINE_SPINLOCK(iommu_bitmap_lock);
 static DEFINE_SPINLOCK(iommu_bitmap_lock);
 static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
 static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
@@ -63,13 +62,6 @@ static u32 gart_unmapped_entry;
 #define to_pages(addr,size) \
 #define to_pages(addr,size) \
 	(round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
 	(round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
 
 
-#define for_all_nb(dev) \
-	dev = NULL;	\
-	while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)
-
-static struct pci_dev *northbridges[MAX_NB];
-static u32 northbridge_flush_word[MAX_NB];
-
 #define EMERGENCY_PAGES 32 /* = 128KB */ 
 #define EMERGENCY_PAGES 32 /* = 128KB */ 
 
 
 #ifdef CONFIG_AGP
 #ifdef CONFIG_AGP
@@ -93,7 +85,7 @@ static unsigned long alloc_iommu(int size)
 	offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
 	offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
 	if (offset == -1) {
 	if (offset == -1) {
 		need_flush = 1;
 		need_flush = 1;
-	       	offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size);
+		offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size);
 	}
 	}
 	if (offset != -1) { 
 	if (offset != -1) { 
 		set_bit_string(iommu_gart_bitmap, offset, size); 
 		set_bit_string(iommu_gart_bitmap, offset, size); 
@@ -120,44 +112,17 @@ static void free_iommu(unsigned long offset, int size)
 /* 
 /* 
  * Use global flush state to avoid races with multiple flushers.
  * Use global flush state to avoid races with multiple flushers.
  */
  */
-static void flush_gart(struct device *dev)
+static void flush_gart(void)
 { 
 { 
 	unsigned long flags;
 	unsigned long flags;
-	int flushed = 0;
-	int i, max;
-
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
-	if (need_flush) { 
-		max = 0;
-		for (i = 0; i < MAX_NB; i++) {
-			if (!northbridges[i]) 
-				continue;
-			pci_write_config_dword(northbridges[i], 0x9c, 
-					       northbridge_flush_word[i] | 1); 
-			flushed++;
-			max = i;
-		}
-		for (i = 0; i <= max; i++) {
-			u32 w;
-			if (!northbridges[i])
-				continue;
-			/* Make sure the hardware actually executed the flush. */
-			for (;;) { 
-				pci_read_config_dword(northbridges[i], 0x9c, &w);
-				if (!(w & 1))
-					break;
-				cpu_relax();
-			}
-		} 
-		if (!flushed) 
-			printk("nothing to flush?\n");
+	if (need_flush) {
+		k8_flush_garts();
 		need_flush = 0;
 		need_flush = 0;
 	} 
 	} 
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 } 
 } 
 
 
-
-
 #ifdef CONFIG_IOMMU_LEAK
 #ifdef CONFIG_IOMMU_LEAK
 
 
 #define SET_LEAK(x) if (iommu_leak_tab) \
 #define SET_LEAK(x) if (iommu_leak_tab) \
@@ -266,7 +231,7 @@ static dma_addr_t gart_map_simple(struct device *dev, char *buf,
 				 size_t size, int dir)
 				 size_t size, int dir)
 {
 {
 	dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
 	dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
-	flush_gart(dev);
+	flush_gart();
 	return map;
 	return map;
 }
 }
 
 
@@ -288,6 +253,28 @@ dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
 	return bus; 
 	return bus; 
 }
 }
 
 
+/*
+ * Free a DMA mapping.
+ */
+void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
+		      size_t size, int direction)
+{
+	unsigned long iommu_page;
+	int npages;
+	int i;
+
+	if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
+	    dma_addr >= iommu_bus_base + iommu_size)
+		return;
+	iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
+	npages = to_pages(dma_addr, size);
+	for (i = 0; i < npages; i++) {
+		iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
+		CLEAR_LEAK(iommu_page + i);
+	}
+	free_iommu(iommu_page, npages);
+}
+
 /*
 /*
  * Wrapper for pci_unmap_single working with scatterlists.
  * Wrapper for pci_unmap_single working with scatterlists.
  */
  */
@@ -299,7 +286,7 @@ void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int di
 		struct scatterlist *s = &sg[i];
 		struct scatterlist *s = &sg[i];
 		if (!s->dma_length || !s->length)
 		if (!s->dma_length || !s->length)
 			break;
 			break;
-		dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
+		gart_unmap_single(dev, s->dma_address, s->dma_length, dir);
 	}
 	}
 }
 }
 
 
@@ -329,7 +316,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
 		s->dma_address = addr;
 		s->dma_address = addr;
 		s->dma_length = s->length;
 		s->dma_length = s->length;
 	}
 	}
-	flush_gart(dev);
+	flush_gart();
 	return nents;
 	return nents;
 }
 }
 
 
@@ -436,13 +423,13 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
 	if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
 	if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
 		goto error;
 		goto error;
 	out++;
 	out++;
-	flush_gart(dev);
+	flush_gart();
 	if (out < nents) 
 	if (out < nents) 
 		sg[out].dma_length = 0; 
 		sg[out].dma_length = 0; 
 	return out;
 	return out;
 
 
 error:
 error:
-	flush_gart(NULL);
+	flush_gart();
 	gart_unmap_sg(dev, sg, nents, dir);
 	gart_unmap_sg(dev, sg, nents, dir);
 	/* When it was forced or merged try again in a dumb way */
 	/* When it was forced or merged try again in a dumb way */
 	if (force_iommu || iommu_merge) {
 	if (force_iommu || iommu_merge) {
@@ -458,28 +445,6 @@ error:
 	return 0;
 	return 0;
 } 
 } 
 
 
-/*
- * Free a DMA mapping.
- */ 
-void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
-		      size_t size, int direction)
-{
-	unsigned long iommu_page; 
-	int npages;
-	int i;
-
-	if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || 
-	    dma_addr >= iommu_bus_base + iommu_size)
-		return;
-	iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;	
-	npages = to_pages(dma_addr, size);
-	for (i = 0; i < npages; i++) { 
-		iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; 
-		CLEAR_LEAK(iommu_page + i);
-	}
-	free_iommu(iommu_page, npages);
-}
-
 static int no_agp;
 static int no_agp;
 
 
 static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
 static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
@@ -532,10 +497,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 	void *gatt;
 	void *gatt;
 	unsigned aper_base, new_aper_base;
 	unsigned aper_base, new_aper_base;
 	unsigned aper_size, gatt_size, new_aper_size;
 	unsigned aper_size, gatt_size, new_aper_size;
-	
+	int i;
+
 	printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
 	printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
 	aper_size = aper_base = info->aper_size = 0;
 	aper_size = aper_base = info->aper_size = 0;
-	for_all_nb(dev) { 
+	dev = NULL;
+	for (i = 0; i < num_k8_northbridges; i++) {
+		dev = k8_northbridges[i];
 		new_aper_base = read_aperture(dev, &new_aper_size); 
 		new_aper_base = read_aperture(dev, &new_aper_size); 
 		if (!new_aper_base) 
 		if (!new_aper_base) 
 			goto nommu; 
 			goto nommu; 
@@ -558,11 +526,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 		panic("Cannot allocate GATT table"); 
 		panic("Cannot allocate GATT table"); 
 	memset(gatt, 0, gatt_size); 
 	memset(gatt, 0, gatt_size); 
 	agp_gatt_table = gatt;
 	agp_gatt_table = gatt;
-	
-	for_all_nb(dev) { 
+
+	for (i = 0; i < num_k8_northbridges; i++) {
 		u32 ctl; 
 		u32 ctl; 
 		u32 gatt_reg; 
 		u32 gatt_reg; 
 
 
+		dev = k8_northbridges[i];
 		gatt_reg = __pa(gatt) >> 12; 
 		gatt_reg = __pa(gatt) >> 12; 
 		gatt_reg <<= 4; 
 		gatt_reg <<= 4; 
 		pci_write_config_dword(dev, 0x98, gatt_reg);
 		pci_write_config_dword(dev, 0x98, gatt_reg);
@@ -573,7 +542,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 
 
 		pci_write_config_dword(dev, 0x90, ctl); 
 		pci_write_config_dword(dev, 0x90, ctl); 
 	}
 	}
-	flush_gart(NULL); 
+	flush_gart();
 	
 	
 	printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); 
 	printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); 
 	return 0;
 	return 0;
@@ -602,15 +571,19 @@ static struct dma_mapping_ops gart_dma_ops = {
 	.unmap_sg = gart_unmap_sg,
 	.unmap_sg = gart_unmap_sg,
 };
 };
 
 
-static int __init pci_iommu_init(void)
+void __init gart_iommu_init(void)
 { 
 { 
 	struct agp_kern_info info;
 	struct agp_kern_info info;
 	unsigned long aper_size;
 	unsigned long aper_size;
 	unsigned long iommu_start;
 	unsigned long iommu_start;
-	struct pci_dev *dev;
 	unsigned long scratch;
 	unsigned long scratch;
 	long i;
 	long i;
 
 
+	if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) {
+		printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n");
+		return;
+	}
+
 #ifndef CONFIG_AGP_AMD64
 #ifndef CONFIG_AGP_AMD64
 	no_agp = 1; 
 	no_agp = 1; 
 #else
 #else
@@ -622,7 +595,11 @@ static int __init pci_iommu_init(void)
 #endif	
 #endif	
 
 
 	if (swiotlb)
 	if (swiotlb)
-		return -1; 
+		return;
+
+	/* Did we detect a different HW IOMMU? */
+	if (iommu_detected && !iommu_aperture)
+		return;
 
 
 	if (no_iommu ||
 	if (no_iommu ||
 	    (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
 	    (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
@@ -634,15 +611,7 @@ static int __init pci_iommu_init(void)
 					"but IOMMU not available.\n"
 					"but IOMMU not available.\n"
 			       KERN_ERR "WARNING 32bit PCI may malfunction.\n");
 			       KERN_ERR "WARNING 32bit PCI may malfunction.\n");
 		}
 		}
-		return -1;
-	}
-
-	i = 0;
-	for_all_nb(dev)
-		i++;
-	if (i > MAX_NB) {
-		printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i);
-		return -1;
+		return;
 	}
 	}
 
 
 	printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
 	printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
@@ -707,26 +676,10 @@ static int __init pci_iommu_init(void)
 	for (i = EMERGENCY_PAGES; i < iommu_pages; i++) 
 	for (i = EMERGENCY_PAGES; i < iommu_pages; i++) 
 		iommu_gatt_base[i] = gart_unmapped_entry;
 		iommu_gatt_base[i] = gart_unmapped_entry;
 
 
-	for_all_nb(dev) {
-		u32 flag; 
-		int cpu = PCI_SLOT(dev->devfn) - 24;
-		if (cpu >= MAX_NB)
-			continue;
-		northbridges[cpu] = dev;
-		pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
-		northbridge_flush_word[cpu] = flag; 
-	}
-		     
-	flush_gart(NULL);
-
+	flush_gart();
 	dma_ops = &gart_dma_ops;
 	dma_ops = &gart_dma_ops;
-
-	return 0;
 } 
 } 
 
 
-/* Must execute after PCI subsystem */
-fs_initcall(pci_iommu_init);
-
 void gart_parse_options(char *p)
 void gart_parse_options(char *p)
 {
 {
 	int arg;
 	int arg;

+ 6 - 3
arch/x86_64/kernel/pci-nommu.c

@@ -4,6 +4,8 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/pci.h>
 #include <linux/string.h>
 #include <linux/string.h>
+#include <linux/dma-mapping.h>
+
 #include <asm/proto.h>
 #include <asm/proto.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/dma.h>
 #include <asm/dma.h>
@@ -12,10 +14,11 @@ static int
 check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
 check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size)
 {
 {
         if (hwdev && bus + size > *hwdev->dma_mask) {
         if (hwdev && bus + size > *hwdev->dma_mask) {
-		if (*hwdev->dma_mask >= 0xffffffffULL)
+		if (*hwdev->dma_mask >= DMA_32BIT_MASK)
 			printk(KERN_ERR
 			printk(KERN_ERR
-			    "nommu_%s: overflow %Lx+%lu of device mask %Lx\n",
-	       			name, (long long)bus, size, (long long)*hwdev->dma_mask);
+			    "nommu_%s: overflow %Lx+%zu of device mask %Lx\n",
+				name, (long long)bus, size,
+				(long long)*hwdev->dma_mask);
 		return 0;
 		return 0;
 	}
 	}
 	return 1;
 	return 1;

+ 1 - 1
arch/x86_64/kernel/pci-swiotlb.c

@@ -31,7 +31,7 @@ struct dma_mapping_ops swiotlb_dma_ops = {
 void pci_swiotlb_init(void)
 void pci_swiotlb_init(void)
 {
 {
 	/* don't initialize swiotlb if iommu=off (no_iommu=1) */
 	/* don't initialize swiotlb if iommu=off (no_iommu=1) */
-	if (!iommu_aperture && !no_iommu &&
+	if (!iommu_detected && !no_iommu &&
 	    (end_pfn > MAX_DMA32_PFN || force_iommu))
 	    (end_pfn > MAX_DMA32_PFN || force_iommu))
 	       swiotlb = 1;
 	       swiotlb = 1;
 	if (swiotlb) {
 	if (swiotlb) {

+ 9 - 7
arch/x86_64/kernel/process.c

@@ -10,7 +10,6 @@
  *	Andi Kleen.
  *	Andi Kleen.
  *
  *
  *	CPU hotplug support - ashok.raj@intel.com
  *	CPU hotplug support - ashok.raj@intel.com
- *  $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
  */
  */
 
 
 /*
 /*
@@ -64,6 +63,7 @@ EXPORT_SYMBOL(boot_option_idle_override);
  * Powermanagement idle function, if any..
  * Powermanagement idle function, if any..
  */
  */
 void (*pm_idle)(void);
 void (*pm_idle)(void);
+EXPORT_SYMBOL(pm_idle);
 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
 
 
 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -111,7 +111,7 @@ static void default_idle(void)
 {
 {
 	local_irq_enable();
 	local_irq_enable();
 
 
-	clear_thread_flag(TIF_POLLING_NRFLAG);
+	current_thread_info()->status &= ~TS_POLLING;
 	smp_mb__after_clear_bit();
 	smp_mb__after_clear_bit();
 	while (!need_resched()) {
 	while (!need_resched()) {
 		local_irq_disable();
 		local_irq_disable();
@@ -120,7 +120,7 @@ static void default_idle(void)
 		else
 		else
 			local_irq_enable();
 			local_irq_enable();
 	}
 	}
-	set_thread_flag(TIF_POLLING_NRFLAG);
+	current_thread_info()->status |= TS_POLLING;
 }
 }
 
 
 /*
 /*
@@ -203,8 +203,7 @@ static inline void play_dead(void)
  */
  */
 void cpu_idle (void)
 void cpu_idle (void)
 {
 {
-	set_thread_flag(TIF_POLLING_NRFLAG);
-
+	current_thread_info()->status |= TS_POLLING;
 	/* endless idle loop with no priority at all */
 	/* endless idle loop with no priority at all */
 	while (1) {
 	while (1) {
 		while (!need_resched()) {
 		while (!need_resched()) {
@@ -335,7 +334,7 @@ void show_regs(struct pt_regs *regs)
 {
 {
 	printk("CPU %d:", smp_processor_id());
 	printk("CPU %d:", smp_processor_id());
 	__show_regs(regs);
 	__show_regs(regs);
-	show_trace(&regs->rsp);
+	show_trace(NULL, regs, (void *)(regs + 1));
 }
 }
 
 
 /*
 /*
@@ -365,8 +364,11 @@ void flush_thread(void)
 	struct task_struct *tsk = current;
 	struct task_struct *tsk = current;
 	struct thread_info *t = current_thread_info();
 	struct thread_info *t = current_thread_info();
 
 
-	if (t->flags & _TIF_ABI_PENDING)
+	if (t->flags & _TIF_ABI_PENDING) {
 		t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
 		t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
+		if (t->flags & _TIF_IA32)
+			current_thread_info()->status |= TS_COMPAT;
+	}
 
 
 	tsk->thread.debugreg0 = 0;
 	tsk->thread.debugreg0 = 0;
 	tsk->thread.debugreg1 = 0;
 	tsk->thread.debugreg1 = 0;

+ 1 - 0
arch/x86_64/kernel/reboot.c

@@ -20,6 +20,7 @@
  * Power off function, if any
  * Power off function, if any
  */
  */
 void (*pm_power_off)(void);
 void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
 
 
 static long no_idt[3];
 static long no_idt[3];
 static enum { 
 static enum { 

+ 55 - 125
arch/x86_64/kernel/setup.c

@@ -5,8 +5,6 @@
  *
  *
  *  Nov 2001 Dave Jones <davej@suse.de>
  *  Nov 2001 Dave Jones <davej@suse.de>
  *  Forked from i386 setup code.
  *  Forked from i386 setup code.
- *
- *  $Id$
  */
  */
 
 
 /*
 /*
@@ -65,9 +63,7 @@
 #include <asm/setup.h>
 #include <asm/setup.h>
 #include <asm/mach_apic.h>
 #include <asm/mach_apic.h>
 #include <asm/numa.h>
 #include <asm/numa.h>
-#include <asm/swiotlb.h>
 #include <asm/sections.h>
 #include <asm/sections.h>
-#include <asm/gart-mapping.h>
 #include <asm/dmi.h>
 #include <asm/dmi.h>
 
 
 /*
 /*
@@ -75,6 +71,7 @@
  */
  */
 
 
 struct cpuinfo_x86 boot_cpu_data __read_mostly;
 struct cpuinfo_x86 boot_cpu_data __read_mostly;
+EXPORT_SYMBOL(boot_cpu_data);
 
 
 unsigned long mmu_cr4_features;
 unsigned long mmu_cr4_features;
 
 
@@ -103,6 +100,7 @@ char dmi_alloc_data[DMI_MAX_DATA];
  * Setup options
  * Setup options
  */
  */
 struct screen_info screen_info;
 struct screen_info screen_info;
+EXPORT_SYMBOL(screen_info);
 struct sys_desc_table_struct {
 struct sys_desc_table_struct {
 	unsigned short length;
 	unsigned short length;
 	unsigned char table[0];
 	unsigned char table[0];
@@ -474,80 +472,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 } 
 } 
 #endif
 #endif
 
 
-/* Use inline assembly to define this because the nops are defined 
-   as inline assembly strings in the include files and we cannot 
-   get them easily into strings. */
-asm("\t.data\nk8nops: " 
-    K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
-    K8_NOP7 K8_NOP8); 
-    
-extern unsigned char k8nops[];
-static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 
-     NULL,
-     k8nops,
-     k8nops + 1,
-     k8nops + 1 + 2,
-     k8nops + 1 + 2 + 3,
-     k8nops + 1 + 2 + 3 + 4,
-     k8nops + 1 + 2 + 3 + 4 + 5,
-     k8nops + 1 + 2 + 3 + 4 + 5 + 6,
-     k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
-}; 
-
-extern char __vsyscall_0;
-
-/* Replace instructions with better alternatives for this CPU type.
-
-   This runs before SMP is initialized to avoid SMP problems with
-   self modifying code. This implies that assymetric systems where
-   APs have less capabilities than the boot processor are not handled. 
-   In this case boot with "noreplacement". */ 
-void apply_alternatives(void *start, void *end) 
-{ 
-	struct alt_instr *a; 
-	int diff, i, k;
-	for (a = start; (void *)a < end; a++) { 
-		u8 *instr;
-
-		if (!boot_cpu_has(a->cpuid))
-			continue;
-
-		BUG_ON(a->replacementlen > a->instrlen); 
-		instr = a->instr;
-		/* vsyscall code is not mapped yet. resolve it manually. */
-		if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END)
-			instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
-		__inline_memcpy(instr, a->replacement, a->replacementlen);
-		diff = a->instrlen - a->replacementlen; 
-
-		/* Pad the rest with nops */
-		for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
-			k = diff;
-			if (k > ASM_NOP_MAX)
-				k = ASM_NOP_MAX;
-			__inline_memcpy(instr + i, k8_nops[k], k);
-		} 
-	}
-} 
-
-static int no_replacement __initdata = 0; 
- 
-void __init alternative_instructions(void)
-{
-	extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-	if (no_replacement) 
-		return;
-	apply_alternatives(__alt_instructions, __alt_instructions_end);
-}
-
-static int __init noreplacement_setup(char *s)
-{ 
-     no_replacement = 1; 
-     return 1;
-} 
-
-__setup("noreplacement", noreplacement_setup); 
-
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
 struct edd edd;
 struct edd edd;
 #ifdef CONFIG_EDD_MODULE
 #ifdef CONFIG_EDD_MODULE
@@ -780,10 +704,6 @@ void __init setup_arch(char **cmdline_p)
 
 
 	e820_setup_gap();
 	e820_setup_gap();
 
 
-#ifdef CONFIG_GART_IOMMU
-	iommu_hole_init();
-#endif
-
 #ifdef CONFIG_VT
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
 #if defined(CONFIG_VGA_CONSOLE)
 	conswitchp = &vga_con;
 	conswitchp = &vga_con;
@@ -868,24 +788,32 @@ static int nearby_node(int apicid)
 static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
 {
 {
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
-	int cpu = smp_processor_id();
 	unsigned bits;
 	unsigned bits;
 #ifdef CONFIG_NUMA
 #ifdef CONFIG_NUMA
+	int cpu = smp_processor_id();
 	int node = 0;
 	int node = 0;
 	unsigned apicid = hard_smp_processor_id();
 	unsigned apicid = hard_smp_processor_id();
 #endif
 #endif
+	unsigned ecx = cpuid_ecx(0x80000008);
+
+	c->x86_max_cores = (ecx & 0xff) + 1;
 
 
-	bits = 0;
-	while ((1 << bits) < c->x86_max_cores)
-		bits++;
+	/* CPU telling us the core id bits shift? */
+	bits = (ecx >> 12) & 0xF;
+
+	/* Otherwise recompute */
+	if (bits == 0) {
+		while ((1 << bits) < c->x86_max_cores)
+			bits++;
+	}
 
 
 	/* Low order bits define the core id (index of core in socket) */
 	/* Low order bits define the core id (index of core in socket) */
-	cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
+	c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
 	/* Convert the APIC ID into the socket ID */
 	/* Convert the APIC ID into the socket ID */
-	phys_proc_id[cpu] = phys_pkg_id(bits);
+	c->phys_proc_id = phys_pkg_id(bits);
 
 
 #ifdef CONFIG_NUMA
 #ifdef CONFIG_NUMA
-  	node = phys_proc_id[cpu];
+  	node = c->phys_proc_id;
  	if (apicid_to_node[apicid] != NUMA_NO_NODE)
  	if (apicid_to_node[apicid] != NUMA_NO_NODE)
  		node = apicid_to_node[apicid];
  		node = apicid_to_node[apicid];
  	if (!node_online(node)) {
  	if (!node_online(node)) {
@@ -898,7 +826,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
  		   but in the same order as the HT nodeids.
  		   but in the same order as the HT nodeids.
  		   If that doesn't result in a usable node fall back to the
  		   If that doesn't result in a usable node fall back to the
  		   path for the previous case.  */
  		   path for the previous case.  */
- 		int ht_nodeid = apicid - (phys_proc_id[0] << bits);
+ 		int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits);
  		if (ht_nodeid >= 0 &&
  		if (ht_nodeid >= 0 &&
  		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
  		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
  			node = apicid_to_node[ht_nodeid];
  			node = apicid_to_node[ht_nodeid];
@@ -908,15 +836,13 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
  	}
  	}
 	numa_set_node(cpu, node);
 	numa_set_node(cpu, node);
 
 
-  	printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n",
-  			cpu, apicid, c->x86_max_cores, node, cpu_core_id[cpu]);
+	printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
 #endif
 #endif
 #endif
 #endif
 }
 }
 
 
-static int __init init_amd(struct cpuinfo_x86 *c)
+static void __init init_amd(struct cpuinfo_x86 *c)
 {
 {
-	int r;
 	unsigned level;
 	unsigned level;
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
@@ -949,8 +875,8 @@ static int __init init_amd(struct cpuinfo_x86 *c)
 	if (c->x86 >= 6)
 	if (c->x86 >= 6)
 		set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
 		set_bit(X86_FEATURE_FXSAVE_LEAK, &c->x86_capability);
 
 
-	r = get_model_name(c);
-	if (!r) { 
+	level = get_model_name(c);
+	if (!level) {
 		switch (c->x86) { 
 		switch (c->x86) { 
 		case 15:
 		case 15:
 			/* Should distinguish Models here, but this is only
 			/* Should distinguish Models here, but this is only
@@ -965,13 +891,12 @@ static int __init init_amd(struct cpuinfo_x86 *c)
 	if (c->x86_power & (1<<8))
 	if (c->x86_power & (1<<8))
 		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
 		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
 
 
-	if (c->extended_cpuid_level >= 0x80000008) {
-		c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
-
+	/* Multi core CPU? */
+	if (c->extended_cpuid_level >= 0x80000008)
 		amd_detect_cmp(c);
 		amd_detect_cmp(c);
-	}
 
 
-	return r;
+	/* Fix cpuid4 emulation for more */
+	num_cache_leaves = 3;
 }
 }
 
 
 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -979,13 +904,14 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 	u32 	eax, ebx, ecx, edx;
 	u32 	eax, ebx, ecx, edx;
 	int 	index_msb, core_bits;
 	int 	index_msb, core_bits;
-	int 	cpu = smp_processor_id();
 
 
 	cpuid(1, &eax, &ebx, &ecx, &edx);
 	cpuid(1, &eax, &ebx, &ecx, &edx);
 
 
 
 
-	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
+	if (!cpu_has(c, X86_FEATURE_HT))
 		return;
 		return;
+ 	if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
+		goto out;
 
 
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
 	smp_num_siblings = (ebx & 0xff0000) >> 16;
 
 
@@ -1000,10 +926,7 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 		}
 		}
 
 
 		index_msb = get_count_order(smp_num_siblings);
 		index_msb = get_count_order(smp_num_siblings);
-		phys_proc_id[cpu] = phys_pkg_id(index_msb);
-
-		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
-		       phys_proc_id[cpu]);
+		c->phys_proc_id = phys_pkg_id(index_msb);
 
 
 		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
 		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
 
 
@@ -1011,13 +934,15 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 
 
 		core_bits = get_count_order(c->x86_max_cores);
 		core_bits = get_count_order(c->x86_max_cores);
 
 
-		cpu_core_id[cpu] = phys_pkg_id(index_msb) &
+		c->cpu_core_id = phys_pkg_id(index_msb) &
 					       ((1 << core_bits) - 1);
 					       ((1 << core_bits) - 1);
-
-		if (c->x86_max_cores > 1)
-			printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
-			       cpu_core_id[cpu]);
 	}
 	}
+out:
+	if ((c->x86_max_cores * smp_num_siblings) > 1) {
+		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n", c->phys_proc_id);
+		printk(KERN_INFO  "CPU: Processor Core ID: %d\n", c->cpu_core_id);
+	}
+
 #endif
 #endif
 }
 }
 
 
@@ -1026,15 +951,12 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
  */
  */
 static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
 static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
 {
 {
-	unsigned int eax;
+	unsigned int eax, t;
 
 
 	if (c->cpuid_level < 4)
 	if (c->cpuid_level < 4)
 		return 1;
 		return 1;
 
 
-	__asm__("cpuid"
-		: "=a" (eax)
-		: "0" (4), "c" (0)
-		: "bx", "dx");
+	cpuid_count(4, 0, &eax, &t, &t, &t);
 
 
 	if (eax & 0x1f)
 	if (eax & 0x1f)
 		return ((eax >> 26) + 1);
 		return ((eax >> 26) + 1);
@@ -1047,16 +969,17 @@ static void srat_detect_node(void)
 #ifdef CONFIG_NUMA
 #ifdef CONFIG_NUMA
 	unsigned node;
 	unsigned node;
 	int cpu = smp_processor_id();
 	int cpu = smp_processor_id();
+	int apicid = hard_smp_processor_id();
 
 
 	/* Don't do the funky fallback heuristics the AMD version employs
 	/* Don't do the funky fallback heuristics the AMD version employs
 	   for now. */
 	   for now. */
-	node = apicid_to_node[hard_smp_processor_id()];
+	node = apicid_to_node[apicid];
 	if (node == NUMA_NO_NODE)
 	if (node == NUMA_NO_NODE)
 		node = first_node(node_online_map);
 		node = first_node(node_online_map);
 	numa_set_node(cpu, node);
 	numa_set_node(cpu, node);
 
 
 	if (acpi_numa > 0)
 	if (acpi_numa > 0)
-		printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
+		printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
 #endif
 #endif
 }
 }
 
 
@@ -1066,6 +989,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 	unsigned n;
 	unsigned n;
 
 
 	init_intel_cacheinfo(c);
 	init_intel_cacheinfo(c);
+	if (c->cpuid_level > 9 ) {
+		unsigned eax = cpuid_eax(10);
+		/* Check for version and the number of counters */
+		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
+			set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
+	}
+
 	n = c->extended_cpuid_level;
 	n = c->extended_cpuid_level;
 	if (n >= 0x80000008) {
 	if (n >= 0x80000008) {
 		unsigned eax = cpuid_eax(0x80000008);
 		unsigned eax = cpuid_eax(0x80000008);
@@ -1157,7 +1087,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 	}
 	}
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
-	phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
+	c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
 #endif
 #endif
 }
 }
 
 
@@ -1284,7 +1214,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
 		NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
-		NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow",
+		NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow",
 
 
 		/* Transmeta-defined */
 		/* Transmeta-defined */
 		"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
 		"recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
@@ -1295,7 +1225,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		/* Other (Linux-defined) */
 		/* Other (Linux-defined) */
 		"cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
 		"cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
 		"constant_tsc", NULL, NULL,
 		"constant_tsc", NULL, NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		"up", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 
 
@@ -1365,9 +1295,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 	if (smp_num_siblings * c->x86_max_cores > 1) {
 	if (smp_num_siblings * c->x86_max_cores > 1) {
 		int cpu = c - cpu_data;
 		int cpu = c - cpu_data;
-		seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
+		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
 		seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
 		seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
-		seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
+		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
 		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
 		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
 	}
 	}
 #endif	
 #endif	
@@ -1441,7 +1371,7 @@ struct seq_operations cpuinfo_op = {
 	.show =	show_cpuinfo,
 	.show =	show_cpuinfo,
 };
 };
 
 
-#ifdef CONFIG_INPUT_PCSPKR
+#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE)
 #include <linux/platform_device.h>
 #include <linux/platform_device.h>
 static __init int add_pcspkr(void)
 static __init int add_pcspkr(void)
 {
 {

+ 2 - 1
arch/x86_64/kernel/setup64.c

@@ -3,7 +3,6 @@
  * Copyright (C) 1995  Linus Torvalds
  * Copyright (C) 1995  Linus Torvalds
  * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
  * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
  * See setup.c for older changelog.
  * See setup.c for older changelog.
- * $Id: setup64.c,v 1.12 2002/03/21 10:09:17 ak Exp $
  */ 
  */ 
 #include <linux/config.h>
 #include <linux/config.h>
 #include <linux/init.h>
 #include <linux/init.h>
@@ -31,6 +30,7 @@ char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
 
 
 struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
 struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(_cpu_pda);
 struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
 struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
 
 
 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
@@ -38,6 +38,7 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
 char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
 char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
 
 
 unsigned long __supported_pte_mask __read_mostly = ~0UL;
 unsigned long __supported_pte_mask __read_mostly = ~0UL;
+EXPORT_SYMBOL(__supported_pte_mask);
 static int do_not_nx __cpuinitdata = 0;
 static int do_not_nx __cpuinitdata = 0;
 
 
 /* noexec=on|off
 /* noexec=on|off

+ 0 - 3
arch/x86_64/kernel/signal.c

@@ -7,8 +7,6 @@
  *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
  *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
  *  2000-2002   x86-64 support by Andi Kleen
  *  2000-2002   x86-64 support by Andi Kleen
- * 
- *  $Id: signal.c,v 1.18 2001/10/17 22:30:37 ak Exp $
  */
  */
 
 
 #include <linux/sched.h>
 #include <linux/sched.h>
@@ -239,7 +237,6 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
 	rsp = regs->rsp - 128;
 	rsp = regs->rsp - 128;
 
 
 	/* This is the X/Open sanctioned signal stack switching.  */
 	/* This is the X/Open sanctioned signal stack switching.  */
-	/* RED-PEN: redzone on that stack? */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
 	if (ka->sa.sa_flags & SA_ONSTACK) {
 		if (sas_ss_flags(rsp) == 0)
 		if (sas_ss_flags(rsp) == 0)
 			rsp = current->sas_ss_sp + current->sas_ss_size;
 			rsp = current->sas_ss_sp + current->sas_ss_size;

+ 7 - 3
arch/x86_64/kernel/smp.c

@@ -224,6 +224,7 @@ void flush_tlb_current_task(void)
 		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
 		flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
 	preempt_enable();
 	preempt_enable();
 }
 }
+EXPORT_SYMBOL(flush_tlb_current_task);
 
 
 void flush_tlb_mm (struct mm_struct * mm)
 void flush_tlb_mm (struct mm_struct * mm)
 {
 {
@@ -244,6 +245,7 @@ void flush_tlb_mm (struct mm_struct * mm)
 
 
 	preempt_enable();
 	preempt_enable();
 }
 }
+EXPORT_SYMBOL(flush_tlb_mm);
 
 
 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 {
 {
@@ -266,6 +268,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 
 
 	preempt_enable();
 	preempt_enable();
 }
 }
+EXPORT_SYMBOL(flush_tlb_page);
 
 
 static void do_flush_tlb_all(void* info)
 static void do_flush_tlb_all(void* info)
 {
 {
@@ -443,6 +446,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
 	spin_unlock(&call_lock);
 	spin_unlock(&call_lock);
 	return 0;
 	return 0;
 }
 }
+EXPORT_SYMBOL(smp_call_function);
 
 
 void smp_stop_cpu(void)
 void smp_stop_cpu(void)
 {
 {
@@ -460,7 +464,7 @@ static void smp_really_stop_cpu(void *dummy)
 {
 {
 	smp_stop_cpu(); 
 	smp_stop_cpu(); 
 	for (;;) 
 	for (;;) 
-		asm("hlt"); 
+		halt();
 } 
 } 
 
 
 void smp_send_stop(void)
 void smp_send_stop(void)
@@ -520,13 +524,13 @@ asmlinkage void smp_call_function_interrupt(void)
 
 
 int safe_smp_processor_id(void)
 int safe_smp_processor_id(void)
 {
 {
-	int apicid, i;
+	unsigned apicid, i;
 
 
 	if (disable_apic)
 	if (disable_apic)
 		return 0;
 		return 0;
 
 
 	apicid = hard_smp_processor_id();
 	apicid = hard_smp_processor_id();
-	if (x86_cpu_to_apicid[apicid] == apicid)
+	if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
 		return apicid;
 		return apicid;
 
 
 	for (i = 0; i < NR_CPUS; ++i) {
 	for (i = 0; i < NR_CPUS; ++i) {

+ 14 - 9
arch/x86_64/kernel/smpboot.c

@@ -63,13 +63,11 @@
 
 
 /* Number of siblings per CPU package */
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
 int smp_num_siblings = 1;
-/* Package ID of each logical CPU */
-u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
-/* core ID of each logical CPU */
-u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
+EXPORT_SYMBOL(smp_num_siblings);
 
 
 /* Last level cache ID of each logical CPU */
 /* Last level cache ID of each logical CPU */
 u8 cpu_llc_id[NR_CPUS] __cpuinitdata  = {[0 ... NR_CPUS-1] = BAD_APICID};
 u8 cpu_llc_id[NR_CPUS] __cpuinitdata  = {[0 ... NR_CPUS-1] = BAD_APICID};
+EXPORT_SYMBOL(cpu_llc_id);
 
 
 /* Bitmask of currently online CPUs */
 /* Bitmask of currently online CPUs */
 cpumask_t cpu_online_map __read_mostly;
 cpumask_t cpu_online_map __read_mostly;
@@ -82,18 +80,21 @@ EXPORT_SYMBOL(cpu_online_map);
  */
  */
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
 cpumask_t cpu_callout_map;
+EXPORT_SYMBOL(cpu_callout_map);
 
 
 cpumask_t cpu_possible_map;
 cpumask_t cpu_possible_map;
 EXPORT_SYMBOL(cpu_possible_map);
 EXPORT_SYMBOL(cpu_possible_map);
 
 
 /* Per CPU bogomips and other parameters */
 /* Per CPU bogomips and other parameters */
 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
 struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+EXPORT_SYMBOL(cpu_data);
 
 
 /* Set when the idlers are all forked */
 /* Set when the idlers are all forked */
 int smp_threads_ready;
 int smp_threads_ready;
 
 
 /* representing HT siblings of each logical CPU */
 /* representing HT siblings of each logical CPU */
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(cpu_sibling_map);
 
 
 /* representing HT and core siblings of each logical CPU */
 /* representing HT and core siblings of each logical CPU */
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
@@ -472,8 +473,8 @@ static inline void set_cpu_sibling_map(int cpu)
 
 
 	if (smp_num_siblings > 1) {
 	if (smp_num_siblings > 1) {
 		for_each_cpu_mask(i, cpu_sibling_setup_map) {
 		for_each_cpu_mask(i, cpu_sibling_setup_map) {
-			if (phys_proc_id[cpu] == phys_proc_id[i] &&
-			    cpu_core_id[cpu] == cpu_core_id[i]) {
+			if (c[cpu].phys_proc_id == c[i].phys_proc_id &&
+			    c[cpu].cpu_core_id == c[i].cpu_core_id) {
 				cpu_set(i, cpu_sibling_map[cpu]);
 				cpu_set(i, cpu_sibling_map[cpu]);
 				cpu_set(cpu, cpu_sibling_map[i]);
 				cpu_set(cpu, cpu_sibling_map[i]);
 				cpu_set(i, cpu_core_map[cpu]);
 				cpu_set(i, cpu_core_map[cpu]);
@@ -500,7 +501,7 @@ static inline void set_cpu_sibling_map(int cpu)
 			cpu_set(i, c[cpu].llc_shared_map);
 			cpu_set(i, c[cpu].llc_shared_map);
 			cpu_set(cpu, c[i].llc_shared_map);
 			cpu_set(cpu, c[i].llc_shared_map);
 		}
 		}
-		if (phys_proc_id[cpu] == phys_proc_id[i]) {
+		if (c[cpu].phys_proc_id == c[i].phys_proc_id) {
 			cpu_set(i, cpu_core_map[cpu]);
 			cpu_set(i, cpu_core_map[cpu]);
 			cpu_set(cpu, cpu_core_map[i]);
 			cpu_set(cpu, cpu_core_map[i]);
 			/*
 			/*
@@ -797,6 +798,8 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
 	}
 	}
 
 
 
 
+	alternatives_smp_switch(1);
+
 	c_idle.idle = get_idle_for_cpu(cpu);
 	c_idle.idle = get_idle_for_cpu(cpu);
 
 
 	if (c_idle.idle) {
 	if (c_idle.idle) {
@@ -1199,8 +1202,8 @@ static void remove_siblinginfo(int cpu)
 		cpu_clear(cpu, cpu_sibling_map[sibling]);
 		cpu_clear(cpu, cpu_sibling_map[sibling]);
 	cpus_clear(cpu_sibling_map[cpu]);
 	cpus_clear(cpu_sibling_map[cpu]);
 	cpus_clear(cpu_core_map[cpu]);
 	cpus_clear(cpu_core_map[cpu]);
-	phys_proc_id[cpu] = BAD_APICID;
-	cpu_core_id[cpu] = BAD_APICID;
+	c[cpu].phys_proc_id = 0;
+	c[cpu].cpu_core_id = 0;
 	cpu_clear(cpu, cpu_sibling_setup_map);
 	cpu_clear(cpu, cpu_sibling_setup_map);
 }
 }
 
 
@@ -1259,6 +1262,8 @@ void __cpu_die(unsigned int cpu)
 		/* They ack this in play_dead by setting CPU_DEAD */
 		/* They ack this in play_dead by setting CPU_DEAD */
 		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
 		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
 			printk ("CPU %d is now offline\n", cpu);
 			printk ("CPU %d is now offline\n", cpu);
+			if (1 == num_online_cpus())
+				alternatives_smp_switch(0);
 			return;
 			return;
 		}
 		}
 		msleep(100);
 		msleep(100);

+ 202 - 0
arch/x86_64/kernel/tce.c

@@ -0,0 +1,202 @@
+/*
+ * Derived from arch/powerpc/platforms/pseries/iommu.c
+ *
+ * Copyright (C) 2006 Jon Mason <jdmason@us.ibm.com>, IBM Corporation
+ * Copyright (C) 2006 Muli Ben-Yehuda <muli@il.ibm.com>, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/bootmem.h>
+#include <asm/tce.h>
+#include <asm/calgary.h>
+#include <asm/proto.h>
+
+/* flush a tce at 'tceaddr' to main memory */
+static inline void flush_tce(void* tceaddr)
+{
+	/* a single tce can't cross a cache line */
+	if (cpu_has_clflush)
+		asm volatile("clflush (%0)" :: "r" (tceaddr));
+	else
+		asm volatile("wbinvd":::"memory");
+}
+
+void tce_build(struct iommu_table *tbl, unsigned long index,
+	unsigned int npages, unsigned long uaddr, int direction)
+{
+	u64* tp;
+	u64 t;
+	u64 rpn;
+
+	t = (1 << TCE_READ_SHIFT);
+	if (direction != DMA_TO_DEVICE)
+		t |= (1 << TCE_WRITE_SHIFT);
+
+	tp = ((u64*)tbl->it_base) + index;
+
+	while (npages--) {
+		rpn = (virt_to_bus((void*)uaddr)) >> PAGE_SHIFT;
+		t &= ~TCE_RPN_MASK;
+		t |= (rpn << TCE_RPN_SHIFT);
+
+		*tp = cpu_to_be64(t);
+		flush_tce(tp);
+
+		uaddr += PAGE_SIZE;
+		tp++;
+	}
+}
+
+void tce_free(struct iommu_table *tbl, long index, unsigned int npages)
+{
+	u64* tp;
+
+	tp  = ((u64*)tbl->it_base) + index;
+
+	while (npages--) {
+		*tp = cpu_to_be64(0);
+		flush_tce(tp);
+		tp++;
+	}
+}
+
+static inline unsigned int table_size_to_number_of_entries(unsigned char size)
+{
+	/*
+	 * size is the order of the table, 0-7
+	 * smallest table is 8K entries, so shift result by 13 to
+	 * multiply by 8K
+	 */
+	return (1 << size) << 13;
+}
+
+static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl)
+{
+	unsigned int bitmapsz;
+	unsigned int tce_table_index;
+	unsigned long bmppages;
+	int ret;
+
+	tbl->it_busno = dev->bus->number;
+
+	/* set the tce table size - measured in entries */
+	tbl->it_size = table_size_to_number_of_entries(specified_table_size);
+
+	tce_table_index = bus_to_phb(tbl->it_busno);
+	tbl->it_base = (unsigned long)tce_table_kva[tce_table_index];
+	if (!tbl->it_base) {
+		printk(KERN_ERR "Calgary: iommu_table_setparms: "
+		       "no table allocated?!\n");
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	/*
+	 * number of bytes needed for the bitmap size in number of
+	 * entries; we need one bit per entry
+	 */
+	bitmapsz = tbl->it_size / BITS_PER_BYTE;
+	bmppages = __get_free_pages(GFP_KERNEL, get_order(bitmapsz));
+	if (!bmppages) {
+		printk(KERN_ERR "Calgary: cannot allocate bitmap\n");
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	tbl->it_map = (unsigned long*)bmppages;
+
+	memset(tbl->it_map, 0, bitmapsz);
+
+	tbl->it_hint = 0;
+
+	spin_lock_init(&tbl->it_lock);
+
+	return 0;
+
+done:
+	return ret;
+}
+
+int build_tce_table(struct pci_dev *dev, void __iomem *bbar)
+{
+	struct iommu_table *tbl;
+	int ret;
+
+	if (dev->sysdata) {
+		printk(KERN_ERR "Calgary: dev %p has sysdata %p\n",
+		       dev, dev->sysdata);
+		BUG();
+	}
+
+	tbl = kzalloc(sizeof(struct iommu_table), GFP_KERNEL);
+	if (!tbl) {
+		printk(KERN_ERR "Calgary: error allocating iommu_table\n");
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	ret = tce_table_setparms(dev, tbl);
+	if (ret)
+		goto free_tbl;
+
+	tce_free(tbl, 0, tbl->it_size);
+
+	tbl->bbar = bbar;
+
+	/*
+	 * NUMA is already using the bus's sysdata pointer, so we use
+	 * the bus's pci_dev's sysdata instead.
+	 */
+	dev->sysdata = tbl;
+
+	return 0;
+
+free_tbl:
+	kfree(tbl);
+done:
+	return ret;
+}
+
+void* alloc_tce_table(void)
+{
+	unsigned int size;
+
+	size = table_size_to_number_of_entries(specified_table_size);
+	size *= TCE_ENTRY_SIZE;
+
+	return __alloc_bootmem_low(size, size, 0);
+}
+
+void free_tce_table(void *tbl)
+{
+	unsigned int size;
+
+	if (!tbl)
+		return;
+
+	size = table_size_to_number_of_entries(specified_table_size);
+	size *= TCE_ENTRY_SIZE;
+
+	free_bootmem(__pa(tbl), size);
+}

+ 41 - 46
arch/x86_64/kernel/time.c

@@ -8,7 +8,7 @@
  *  Copyright (c) 1995  Markus Kuhn
  *  Copyright (c) 1995  Markus Kuhn
  *  Copyright (c) 1996  Ingo Molnar
  *  Copyright (c) 1996  Ingo Molnar
  *  Copyright (c) 1998  Andrea Arcangeli
  *  Copyright (c) 1998  Andrea Arcangeli
- *  Copyright (c) 2002  Vojtech Pavlik
+ *  Copyright (c) 2002,2006  Vojtech Pavlik
  *  Copyright (c) 2003  Andi Kleen
  *  Copyright (c) 2003  Andi Kleen
  *  RTC support code taken from arch/i386/kernel/timers/time_hpet.c
  *  RTC support code taken from arch/i386/kernel/timers/time_hpet.c
  */
  */
@@ -51,14 +51,21 @@ extern int using_apic_timer;
 static char *time_init_gtod(void);
 static char *time_init_gtod(void);
 
 
 DEFINE_SPINLOCK(rtc_lock);
 DEFINE_SPINLOCK(rtc_lock);
+EXPORT_SYMBOL(rtc_lock);
 DEFINE_SPINLOCK(i8253_lock);
 DEFINE_SPINLOCK(i8253_lock);
 
 
 int nohpet __initdata = 0;
 int nohpet __initdata = 0;
 static int notsc __initdata = 0;
 static int notsc __initdata = 0;
 
 
-#undef HPET_HACK_ENABLE_DANGEROUS
+#define USEC_PER_TICK (USEC_PER_SEC / HZ)
+#define NSEC_PER_TICK (NSEC_PER_SEC / HZ)
+#define FSEC_PER_TICK (FSEC_PER_SEC / HZ)
+
+#define NS_SCALE	10 /* 2^10, carefully chosen */
+#define US_SCALE	32 /* 2^32, arbitralrily chosen */
 
 
 unsigned int cpu_khz;					/* TSC clocks / usec, not used here */
 unsigned int cpu_khz;					/* TSC clocks / usec, not used here */
+EXPORT_SYMBOL(cpu_khz);
 static unsigned long hpet_period;			/* fsecs / HPET clock */
 static unsigned long hpet_period;			/* fsecs / HPET clock */
 unsigned long hpet_tick;				/* HPET clocks / interrupt */
 unsigned long hpet_tick;				/* HPET clocks / interrupt */
 int hpet_use_timer;				/* Use counter of hpet for time keeping, otherwise PIT */
 int hpet_use_timer;				/* Use counter of hpet for time keeping, otherwise PIT */
@@ -90,7 +97,7 @@ static inline unsigned int do_gettimeoffset_tsc(void)
 	t = get_cycles_sync();
 	t = get_cycles_sync();
 	if (t < vxtime.last_tsc) 
 	if (t < vxtime.last_tsc) 
 		t = vxtime.last_tsc; /* hack */
 		t = vxtime.last_tsc; /* hack */
-	x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32;
+	x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> US_SCALE;
 	return x;
 	return x;
 }
 }
 
 
@@ -98,7 +105,7 @@ static inline unsigned int do_gettimeoffset_hpet(void)
 {
 {
 	/* cap counter read to one tick to avoid inconsistencies */
 	/* cap counter read to one tick to avoid inconsistencies */
 	unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
 	unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last;
-	return (min(counter,hpet_tick) * vxtime.quot) >> 32;
+	return (min(counter,hpet_tick) * vxtime.quot) >> US_SCALE;
 }
 }
 
 
 unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
 unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
@@ -118,7 +125,7 @@ void do_gettimeofday(struct timeval *tv)
 		seq = read_seqbegin(&xtime_lock);
 		seq = read_seqbegin(&xtime_lock);
 
 
 		sec = xtime.tv_sec;
 		sec = xtime.tv_sec;
-		usec = xtime.tv_nsec / 1000;
+		usec = xtime.tv_nsec / NSEC_PER_USEC;
 
 
 		/* i386 does some correction here to keep the clock 
 		/* i386 does some correction here to keep the clock 
 		   monotonous even when ntpd is fixing drift.
 		   monotonous even when ntpd is fixing drift.
@@ -129,14 +136,14 @@ void do_gettimeofday(struct timeval *tv)
 		   in arch/x86_64/kernel/vsyscall.c and export all needed
 		   in arch/x86_64/kernel/vsyscall.c and export all needed
 		   variables in vmlinux.lds. -AK */ 
 		   variables in vmlinux.lds. -AK */ 
 
 
-		t = (jiffies - wall_jiffies) * (1000000L / HZ) +
+		t = (jiffies - wall_jiffies) * USEC_PER_TICK +
 			do_gettimeoffset();
 			do_gettimeoffset();
 		usec += t;
 		usec += t;
 
 
 	} while (read_seqretry(&xtime_lock, seq));
 	} while (read_seqretry(&xtime_lock, seq));
 
 
-	tv->tv_sec = sec + usec / 1000000;
-	tv->tv_usec = usec % 1000000;
+	tv->tv_sec = sec + usec / USEC_PER_SEC;
+	tv->tv_usec = usec % USEC_PER_SEC;
 }
 }
 
 
 EXPORT_SYMBOL(do_gettimeofday);
 EXPORT_SYMBOL(do_gettimeofday);
@@ -157,8 +164,8 @@ int do_settimeofday(struct timespec *tv)
 
 
 	write_seqlock_irq(&xtime_lock);
 	write_seqlock_irq(&xtime_lock);
 
 
-	nsec -= do_gettimeoffset() * 1000 +
-		(jiffies - wall_jiffies) * (NSEC_PER_SEC/HZ);
+	nsec -= do_gettimeoffset() * NSEC_PER_USEC +
+		(jiffies - wall_jiffies) * NSEC_PER_TICK;
 
 
 	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
 	wtm_sec  = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
 	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
 	wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
@@ -288,7 +295,7 @@ unsigned long long monotonic_clock(void)
 			this_offset = hpet_readl(HPET_COUNTER);
 			this_offset = hpet_readl(HPET_COUNTER);
 		} while (read_seqretry(&xtime_lock, seq));
 		} while (read_seqretry(&xtime_lock, seq));
 		offset = (this_offset - last_offset);
 		offset = (this_offset - last_offset);
-		offset *= (NSEC_PER_SEC/HZ) / hpet_tick;
+		offset *= NSEC_PER_TICK / hpet_tick;
 	} else {
 	} else {
 		do {
 		do {
 			seq = read_seqbegin(&xtime_lock);
 			seq = read_seqbegin(&xtime_lock);
@@ -297,7 +304,8 @@ unsigned long long monotonic_clock(void)
 			base = monotonic_base;
 			base = monotonic_base;
 		} while (read_seqretry(&xtime_lock, seq));
 		} while (read_seqretry(&xtime_lock, seq));
 		this_offset = get_cycles_sync();
 		this_offset = get_cycles_sync();
-		offset = (this_offset - last_offset)*1000 / cpu_khz; 
+		/* FIXME: 1000 or 1000000? */
+		offset = (this_offset - last_offset)*1000 / cpu_khz;
 	}
 	}
 	return base + offset;
 	return base + offset;
 }
 }
@@ -382,7 +390,7 @@ void main_timer_handler(struct pt_regs *regs)
 		}
 		}
 
 
 		monotonic_base += 
 		monotonic_base += 
-			(offset - vxtime.last)*(NSEC_PER_SEC/HZ) / hpet_tick;
+			(offset - vxtime.last) * NSEC_PER_TICK / hpet_tick;
 
 
 		vxtime.last = offset;
 		vxtime.last = offset;
 #ifdef CONFIG_X86_PM_TIMER
 #ifdef CONFIG_X86_PM_TIMER
@@ -391,24 +399,25 @@ void main_timer_handler(struct pt_regs *regs)
 #endif
 #endif
 	} else {
 	} else {
 		offset = (((tsc - vxtime.last_tsc) *
 		offset = (((tsc - vxtime.last_tsc) *
-			   vxtime.tsc_quot) >> 32) - (USEC_PER_SEC / HZ);
+			   vxtime.tsc_quot) >> US_SCALE) - USEC_PER_TICK;
 
 
 		if (offset < 0)
 		if (offset < 0)
 			offset = 0;
 			offset = 0;
 
 
-		if (offset > (USEC_PER_SEC / HZ)) {
-			lost = offset / (USEC_PER_SEC / HZ);
-			offset %= (USEC_PER_SEC / HZ);
+		if (offset > USEC_PER_TICK) {
+			lost = offset / USEC_PER_TICK;
+			offset %= USEC_PER_TICK;
 		}
 		}
 
 
-		monotonic_base += (tsc - vxtime.last_tsc)*1000000/cpu_khz ;
+		/* FIXME: 1000 or 1000000? */
+		monotonic_base += (tsc - vxtime.last_tsc) * 1000000 / cpu_khz;
 
 
 		vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
 		vxtime.last_tsc = tsc - vxtime.quot * delay / vxtime.tsc_quot;
 
 
 		if ((((tsc - vxtime.last_tsc) *
 		if ((((tsc - vxtime.last_tsc) *
-		      vxtime.tsc_quot) >> 32) < offset)
+		      vxtime.tsc_quot) >> US_SCALE) < offset)
 			vxtime.last_tsc = tsc -
 			vxtime.last_tsc = tsc -
-				(((long) offset << 32) / vxtime.tsc_quot) - 1;
+				(((long) offset << US_SCALE) / vxtime.tsc_quot) - 1;
 	}
 	}
 
 
 	if (lost > 0) {
 	if (lost > 0) {
@@ -468,16 +477,15 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 }
 }
 
 
 static unsigned int cyc2ns_scale __read_mostly;
 static unsigned int cyc2ns_scale __read_mostly;
-#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
 
 
 static inline void set_cyc2ns_scale(unsigned long cpu_khz)
 static inline void set_cyc2ns_scale(unsigned long cpu_khz)
 {
 {
-	cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
+	cyc2ns_scale = (NSEC_PER_MSEC << NS_SCALE) / cpu_khz;
 }
 }
 
 
 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 {
 {
-	return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+	return (cyc * cyc2ns_scale) >> NS_SCALE;
 }
 }
 
 
 unsigned long long sched_clock(void)
 unsigned long long sched_clock(void)
@@ -490,7 +498,7 @@ unsigned long long sched_clock(void)
            Disadvantage is a small drift between CPUs in some configurations,
            Disadvantage is a small drift between CPUs in some configurations,
 	   but that should be tolerable. */
 	   but that should be tolerable. */
 	if (__vxtime.mode == VXTIME_HPET)
 	if (__vxtime.mode == VXTIME_HPET)
-		return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> 32;
+		return (hpet_readl(HPET_COUNTER) * vxtime.quot) >> US_SCALE;
 #endif
 #endif
 
 
 	/* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
 	/* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
@@ -633,7 +641,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 
 
 		cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
 		cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
 		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
 		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
-			vxtime.tsc_quot = (1000L << 32) / cpu_khz;
+			vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
 	}
 	}
 	
 	
 	set_cyc2ns_scale(cpu_khz_ref);
 	set_cyc2ns_scale(cpu_khz_ref);
@@ -789,8 +797,8 @@ static int hpet_timer_stop_set_go(unsigned long tick)
 	if (hpet_use_timer) {
 	if (hpet_use_timer) {
 		hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
 		hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL |
 		    HPET_TN_32BIT, HPET_T0_CFG);
 		    HPET_TN_32BIT, HPET_T0_CFG);
-		hpet_writel(hpet_tick, HPET_T0_CMP);
-		hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */
+		hpet_writel(hpet_tick, HPET_T0_CMP); /* next interrupt */
+		hpet_writel(hpet_tick, HPET_T0_CMP); /* period */
 		cfg |= HPET_CFG_LEGACY;
 		cfg |= HPET_CFG_LEGACY;
 	}
 	}
 /*
 /*
@@ -825,8 +833,7 @@ static int hpet_init(void)
 	if (hpet_period < 100000 || hpet_period > 100000000)
 	if (hpet_period < 100000 || hpet_period > 100000000)
 		return -1;
 		return -1;
 
 
-	hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) /
-		hpet_period;
+	hpet_tick = (FSEC_PER_TICK + hpet_period / 2) / hpet_period;
 
 
 	hpet_use_timer = (id & HPET_ID_LEGSUP);
 	hpet_use_timer = (id & HPET_ID_LEGSUP);
 
 
@@ -890,18 +897,6 @@ void __init time_init(void)
 	char *timename;
 	char *timename;
 	char *gtod;
 	char *gtod;
 
 
-#ifdef HPET_HACK_ENABLE_DANGEROUS
-        if (!vxtime.hpet_address) {
-		printk(KERN_WARNING "time.c: WARNING: Enabling HPET base "
-		       "manually!\n");
-                outl(0x800038a0, 0xcf8);
-                outl(0xff000001, 0xcfc);
-                outl(0x800038a0, 0xcf8);
-                vxtime.hpet_address = inl(0xcfc) & 0xfffffffe;
-		printk(KERN_WARNING "time.c: WARNING: Enabled HPET "
-		       "at %#lx.\n", vxtime.hpet_address);
-        }
-#endif
 	if (nohpet)
 	if (nohpet)
 		vxtime.hpet_address = 0;
 		vxtime.hpet_address = 0;
 
 
@@ -912,7 +907,7 @@ void __init time_init(void)
 	                        -xtime.tv_sec, -xtime.tv_nsec);
 	                        -xtime.tv_sec, -xtime.tv_nsec);
 
 
 	if (!hpet_init())
 	if (!hpet_init())
-                vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period;
+                vxtime_hz = (FSEC_PER_SEC + hpet_period / 2) / hpet_period;
 	else
 	else
 		vxtime.hpet_address = 0;
 		vxtime.hpet_address = 0;
 
 
@@ -941,8 +936,8 @@ void __init time_init(void)
 	       vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod);
 	       vxtime_hz / 1000000, vxtime_hz % 1000000, timename, gtod);
 	printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
 	printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
 		cpu_khz / 1000, cpu_khz % 1000);
 		cpu_khz / 1000, cpu_khz % 1000);
-	vxtime.quot = (1000000L << 32) / vxtime_hz;
-	vxtime.tsc_quot = (1000L << 32) / cpu_khz;
+	vxtime.quot = (USEC_PER_SEC << US_SCALE) / vxtime_hz;
+	vxtime.tsc_quot = (USEC_PER_MSEC << US_SCALE) / cpu_khz;
 	vxtime.last_tsc = get_cycles_sync();
 	vxtime.last_tsc = get_cycles_sync();
 	setup_irq(0, &irq0);
 	setup_irq(0, &irq0);
 
 
@@ -956,10 +951,10 @@ void __init time_init(void)
 __cpuinit int unsynchronized_tsc(void)
 __cpuinit int unsynchronized_tsc(void)
 {
 {
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
-	if (oem_force_hpet_timer())
+	if (apic_is_clustered_box())
 		return 1;
 		return 1;
  	/* Intel systems are normally all synchronized. Exceptions
  	/* Intel systems are normally all synchronized. Exceptions
- 	   are handled in the OEM check above. */
+ 	   are handled in the check above. */
  	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
  	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
  		return 0;
  		return 0;
 #endif
 #endif

+ 72 - 11
arch/x86_64/kernel/traps.c

@@ -6,8 +6,6 @@
  *
  *
  *  Pentium III FXSR, SSE support
  *  Pentium III FXSR, SSE support
  *	Gareth Hughes <gareth@valinux.com>, May 2000
  *	Gareth Hughes <gareth@valinux.com>, May 2000
- *
- *  $Id: traps.c,v 1.36 2002/03/24 11:09:10 ak Exp $
  */
  */
 
 
 /*
 /*
@@ -31,6 +29,7 @@
 #include <linux/nmi.h>
 #include <linux/nmi.h>
 #include <linux/kprobes.h>
 #include <linux/kprobes.h>
 #include <linux/kexec.h>
 #include <linux/kexec.h>
+#include <linux/unwind.h>
 
 
 #include <asm/system.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
@@ -41,7 +40,7 @@
 #include <asm/i387.h>
 #include <asm/i387.h>
 #include <asm/kdebug.h>
 #include <asm/kdebug.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
-
+#include <asm/unwind.h>
 #include <asm/smp.h>
 #include <asm/smp.h>
 #include <asm/pgalloc.h>
 #include <asm/pgalloc.h>
 #include <asm/pda.h>
 #include <asm/pda.h>
@@ -71,6 +70,7 @@ asmlinkage void machine_check(void);
 asmlinkage void spurious_interrupt_bug(void);
 asmlinkage void spurious_interrupt_bug(void);
 
 
 ATOMIC_NOTIFIER_HEAD(die_chain);
 ATOMIC_NOTIFIER_HEAD(die_chain);
+EXPORT_SYMBOL(die_chain);
 
 
 int register_die_notifier(struct notifier_block *nb)
 int register_die_notifier(struct notifier_block *nb)
 {
 {
@@ -107,7 +107,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
 	preempt_enable_no_resched();
 	preempt_enable_no_resched();
 }
 }
 
 
-static int kstack_depth_to_print = 10;
+static int kstack_depth_to_print = 12;
+static int call_trace = 1;
 
 
 #ifdef CONFIG_KALLSYMS
 #ifdef CONFIG_KALLSYMS
 #include <linux/kallsyms.h> 
 #include <linux/kallsyms.h> 
@@ -191,6 +192,25 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
 	return NULL;
 	return NULL;
 }
 }
 
 
+static int show_trace_unwind(struct unwind_frame_info *info, void *context)
+{
+	int i = 11, n = 0;
+
+	while (unwind(info) == 0 && UNW_PC(info)) {
+		++n;
+		if (i > 50) {
+			printk("\n       ");
+			i = 7;
+		} else
+			i += printk(" ");
+		i += printk_address(UNW_PC(info));
+		if (arch_unw_user_mode(info))
+			break;
+	}
+	printk("\n");
+	return n;
+}
+
 /*
 /*
  * x86-64 can have upto three kernel stacks: 
  * x86-64 can have upto three kernel stacks: 
  * process stack
  * process stack
@@ -198,15 +218,39 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
  */
  */
 
 
-void show_trace(unsigned long *stack)
+void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
 {
 {
 	const unsigned cpu = safe_smp_processor_id();
 	const unsigned cpu = safe_smp_processor_id();
 	unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
 	unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
-	int i;
+	int i = 11;
 	unsigned used = 0;
 	unsigned used = 0;
 
 
 	printk("\nCall Trace:");
 	printk("\nCall Trace:");
 
 
+	if (!tsk)
+		tsk = current;
+
+	if (call_trace >= 0) {
+		int unw_ret = 0;
+		struct unwind_frame_info info;
+
+		if (regs) {
+			if (unwind_init_frame_info(&info, tsk, regs) == 0)
+				unw_ret = show_trace_unwind(&info, NULL);
+		} else if (tsk == current)
+			unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
+		else {
+			if (unwind_init_blocked(&info, tsk) == 0)
+				unw_ret = show_trace_unwind(&info, NULL);
+		}
+		if (unw_ret > 0) {
+			if (call_trace > 0)
+				return;
+			printk("Legacy call trace:");
+			i = 18;
+		}
+	}
+
 #define HANDLE_STACK(cond) \
 #define HANDLE_STACK(cond) \
 	do while (cond) { \
 	do while (cond) { \
 		unsigned long addr = *stack++; \
 		unsigned long addr = *stack++; \
@@ -229,7 +273,7 @@ void show_trace(unsigned long *stack)
 		} \
 		} \
 	} while (0)
 	} while (0)
 
 
-	for(i = 11; ; ) {
+	for(; ; ) {
 		const char *id;
 		const char *id;
 		unsigned long *estack_end;
 		unsigned long *estack_end;
 		estack_end = in_exception_stack(cpu, (unsigned long)stack,
 		estack_end = in_exception_stack(cpu, (unsigned long)stack,
@@ -264,7 +308,7 @@ void show_trace(unsigned long *stack)
 	printk("\n");
 	printk("\n");
 }
 }
 
 
-void show_stack(struct task_struct *tsk, unsigned long * rsp)
+static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
 {
 {
 	unsigned long *stack;
 	unsigned long *stack;
 	int i;
 	int i;
@@ -298,7 +342,12 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp)
 		printk("%016lx ", *stack++);
 		printk("%016lx ", *stack++);
 		touch_nmi_watchdog();
 		touch_nmi_watchdog();
 	}
 	}
-	show_trace((unsigned long *)rsp);
+	show_trace(tsk, regs, rsp);
+}
+
+void show_stack(struct task_struct *tsk, unsigned long * rsp)
+{
+	_show_stack(tsk, NULL, rsp);
 }
 }
 
 
 /*
 /*
@@ -307,7 +356,7 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp)
 void dump_stack(void)
 void dump_stack(void)
 {
 {
 	unsigned long dummy;
 	unsigned long dummy;
-	show_trace(&dummy);
+	show_trace(NULL, NULL, &dummy);
 }
 }
 
 
 EXPORT_SYMBOL(dump_stack);
 EXPORT_SYMBOL(dump_stack);
@@ -334,7 +383,7 @@ void show_registers(struct pt_regs *regs)
 	if (in_kernel) {
 	if (in_kernel) {
 
 
 		printk("Stack: ");
 		printk("Stack: ");
-		show_stack(NULL, (unsigned long*)rsp);
+		_show_stack(NULL, regs, (unsigned long*)rsp);
 
 
 		printk("\nCode: ");
 		printk("\nCode: ");
 		if (regs->rip < PAGE_OFFSET)
 		if (regs->rip < PAGE_OFFSET)
@@ -383,6 +432,7 @@ void out_of_line_bug(void)
 { 
 { 
 	BUG(); 
 	BUG(); 
 } 
 } 
+EXPORT_SYMBOL(out_of_line_bug);
 #endif
 #endif
 
 
 static DEFINE_SPINLOCK(die_lock);
 static DEFINE_SPINLOCK(die_lock);
@@ -1012,3 +1062,14 @@ static int __init kstack_setup(char *s)
 }
 }
 __setup("kstack=", kstack_setup);
 __setup("kstack=", kstack_setup);
 
 
+static int __init call_trace_setup(char *s)
+{
+	if (strcmp(s, "old") == 0)
+		call_trace = -1;
+	else if (strcmp(s, "both") == 0)
+		call_trace = 0;
+	else if (strcmp(s, "new") == 0)
+		call_trace = 1;
+	return 1;
+}
+__setup("call_trace=", call_trace_setup);

+ 29 - 0
arch/x86_64/kernel/vmlinux.lds.S

@@ -45,6 +45,15 @@ SECTIONS
 
 
   RODATA
   RODATA
 
 
+#ifdef CONFIG_STACK_UNWIND
+  . = ALIGN(8);
+  .eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {
+	__start_unwind = .;
+  	*(.eh_frame)
+	__end_unwind = .;
+  }
+#endif
+
 				/* Data */
 				/* Data */
   .data : AT(ADDR(.data) - LOAD_OFFSET) {
   .data : AT(ADDR(.data) - LOAD_OFFSET) {
 	*(.data)
 	*(.data)
@@ -131,6 +140,26 @@ SECTIONS
 	*(.data.page_aligned)
 	*(.data.page_aligned)
   }
   }
 
 
+  /* might get freed after init */
+  . = ALIGN(4096);
+  __smp_alt_begin = .;
+  __smp_alt_instructions = .;
+  .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
+	*(.smp_altinstructions)
+  }
+  __smp_alt_instructions_end = .;
+  . = ALIGN(8);
+  __smp_locks = .;
+  .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+	*(.smp_locks)
+  }
+  __smp_locks_end = .;
+  .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
+	*(.smp_altinstr_replacement)
+  }
+  . = ALIGN(4096);
+  __smp_alt_end = .;
+
   . = ALIGN(4096);		/* Init code and data */
   . = ALIGN(4096);		/* Init code and data */
   __init_begin = .;
   __init_begin = .;
   .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
   .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {

+ 2 - 2
arch/x86_64/kernel/vsyscall.c

@@ -107,7 +107,7 @@ static __always_inline long time_syscall(long *t)
 
 
 int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
 int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
 {
 {
-	if (unlikely(!__sysctl_vsyscall))
+	if (!__sysctl_vsyscall)
 		return gettimeofday(tv,tz);
 		return gettimeofday(tv,tz);
 	if (tv)
 	if (tv)
 		do_vgettimeofday(tv);
 		do_vgettimeofday(tv);
@@ -120,7 +120,7 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
  * unlikely */
  * unlikely */
 time_t __vsyscall(1) vtime(time_t *t)
 time_t __vsyscall(1) vtime(time_t *t)
 {
 {
-	if (unlikely(!__sysctl_vsyscall))
+	if (!__sysctl_vsyscall)
 		return time_syscall(t);
 		return time_syscall(t);
 	else if (t)
 	else if (t)
 		*t = __xtime.tv_sec;		
 		*t = __xtime.tv_sec;		

+ 5 - 109
arch/x86_64/kernel/x8664_ksyms.c

@@ -1,66 +1,21 @@
+/* Exports for assembly files.
+   All C exports should go in the respective C files. */
+
 #include <linux/config.h>
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/smp.h>
 #include <linux/smp.h>
-#include <linux/user.h>
-#include <linux/sched.h>
-#include <linux/in6.h>
-#include <linux/interrupt.h>
-#include <linux/smp_lock.h>
-#include <linux/pm.h>
-#include <linux/pci.h>
-#include <linux/apm_bios.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/syscalls.h>
-#include <linux/tty.h>
 
 
 #include <asm/semaphore.h>
 #include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/processor.h>
-#include <asm/i387.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
-#include <asm/checksum.h>
-#include <asm/io.h>
-#include <asm/delay.h>
-#include <asm/irq.h>
-#include <asm/mmx.h>
-#include <asm/desc.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/nmi.h>
-#include <asm/kdebug.h>
-#include <asm/unistd.h>
-#include <asm/tlbflush.h>
-#include <asm/kdebug.h>
-
-extern spinlock_t rtc_lock;
 
 
-#ifdef CONFIG_SMP
-extern void __write_lock_failed(rwlock_t *rw);
-extern void __read_lock_failed(rwlock_t *rw);
-#endif
-
-/* platform dependent support */
-EXPORT_SYMBOL(boot_cpu_data);
-//EXPORT_SYMBOL(dump_fpu);
-EXPORT_SYMBOL(__ioremap);
-EXPORT_SYMBOL(ioremap_nocache);
-EXPORT_SYMBOL(iounmap);
 EXPORT_SYMBOL(kernel_thread);
 EXPORT_SYMBOL(kernel_thread);
-EXPORT_SYMBOL(pm_idle);
-EXPORT_SYMBOL(pm_power_off);
 
 
 EXPORT_SYMBOL(__down_failed);
 EXPORT_SYMBOL(__down_failed);
 EXPORT_SYMBOL(__down_failed_interruptible);
 EXPORT_SYMBOL(__down_failed_interruptible);
 EXPORT_SYMBOL(__down_failed_trylock);
 EXPORT_SYMBOL(__down_failed_trylock);
 EXPORT_SYMBOL(__up_wakeup);
 EXPORT_SYMBOL(__up_wakeup);
-/* Networking helper routines. */
-EXPORT_SYMBOL(csum_partial_copy_nocheck);
-EXPORT_SYMBOL(ip_compute_csum);
-/* Delay loops */
-EXPORT_SYMBOL(__udelay);
-EXPORT_SYMBOL(__ndelay);
-EXPORT_SYMBOL(__delay);
-EXPORT_SYMBOL(__const_udelay);
 
 
 EXPORT_SYMBOL(__get_user_1);
 EXPORT_SYMBOL(__get_user_1);
 EXPORT_SYMBOL(__get_user_2);
 EXPORT_SYMBOL(__get_user_2);
@@ -71,42 +26,20 @@ EXPORT_SYMBOL(__put_user_2);
 EXPORT_SYMBOL(__put_user_4);
 EXPORT_SYMBOL(__put_user_4);
 EXPORT_SYMBOL(__put_user_8);
 EXPORT_SYMBOL(__put_user_8);
 
 
-EXPORT_SYMBOL(strncpy_from_user);
-EXPORT_SYMBOL(__strncpy_from_user);
-EXPORT_SYMBOL(clear_user);
-EXPORT_SYMBOL(__clear_user);
 EXPORT_SYMBOL(copy_user_generic);
 EXPORT_SYMBOL(copy_user_generic);
 EXPORT_SYMBOL(copy_from_user);
 EXPORT_SYMBOL(copy_from_user);
 EXPORT_SYMBOL(copy_to_user);
 EXPORT_SYMBOL(copy_to_user);
-EXPORT_SYMBOL(copy_in_user);
-EXPORT_SYMBOL(strnlen_user);
-
-#ifdef CONFIG_PCI
-EXPORT_SYMBOL(pci_mem_start);
-#endif
 
 
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(clear_page);
 EXPORT_SYMBOL(clear_page);
 
 
-EXPORT_SYMBOL(_cpu_pda);
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
-EXPORT_SYMBOL(cpu_data);
+extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
+extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
 EXPORT_SYMBOL(__write_lock_failed);
 EXPORT_SYMBOL(__write_lock_failed);
 EXPORT_SYMBOL(__read_lock_failed);
 EXPORT_SYMBOL(__read_lock_failed);
-
-EXPORT_SYMBOL(smp_call_function);
-EXPORT_SYMBOL(cpu_callout_map);
-#endif
-
-#ifdef CONFIG_VT
-EXPORT_SYMBOL(screen_info);
 #endif
 #endif
 
 
-EXPORT_SYMBOL(rtc_lock);
-
-EXPORT_SYMBOL_GPL(set_nmi_callback);
-EXPORT_SYMBOL_GPL(unset_nmi_callback);
-
 /* Export string functions. We normally rely on gcc builtin for most of these,
 /* Export string functions. We normally rely on gcc builtin for most of these,
    but gcc sometimes decides not to inline them. */    
    but gcc sometimes decides not to inline them. */    
 #undef memcpy
 #undef memcpy
@@ -114,51 +47,14 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback);
 #undef memmove
 #undef memmove
 
 
 extern void * memset(void *,int,__kernel_size_t);
 extern void * memset(void *,int,__kernel_size_t);
-extern size_t strlen(const char *);
-extern void * memmove(void * dest,const void *src,size_t count);
 extern void * memcpy(void *,const void *,__kernel_size_t);
 extern void * memcpy(void *,const void *,__kernel_size_t);
 extern void * __memcpy(void *,const void *,__kernel_size_t);
 extern void * __memcpy(void *,const void *,__kernel_size_t);
 
 
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(__memcpy);
 EXPORT_SYMBOL(__memcpy);
 
 
-#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
-/* prototypes are wrong, these are assembly with custom calling functions */
-extern void rwsem_down_read_failed_thunk(void);
-extern void rwsem_wake_thunk(void);
-extern void rwsem_downgrade_thunk(void);
-extern void rwsem_down_write_failed_thunk(void);
-EXPORT_SYMBOL(rwsem_down_read_failed_thunk);
-EXPORT_SYMBOL(rwsem_wake_thunk);
-EXPORT_SYMBOL(rwsem_downgrade_thunk);
-EXPORT_SYMBOL(rwsem_down_write_failed_thunk);
-#endif
-
 EXPORT_SYMBOL(empty_zero_page);
 EXPORT_SYMBOL(empty_zero_page);
-
-EXPORT_SYMBOL(die_chain);
-
-#ifdef CONFIG_SMP
-EXPORT_SYMBOL(cpu_sibling_map);
-EXPORT_SYMBOL(smp_num_siblings);
-#endif
-
-#ifdef CONFIG_BUG
-EXPORT_SYMBOL(out_of_line_bug);
-#endif
-
 EXPORT_SYMBOL(init_level4_pgt);
 EXPORT_SYMBOL(init_level4_pgt);
-
-extern unsigned long __supported_pte_mask;
-EXPORT_SYMBOL(__supported_pte_mask);
-
-#ifdef CONFIG_SMP
-EXPORT_SYMBOL(flush_tlb_page);
-#endif
-
-EXPORT_SYMBOL(cpu_khz);
-
 EXPORT_SYMBOL(load_gs_index);
 EXPORT_SYMBOL(load_gs_index);
 
 

+ 1 - 0
arch/x86_64/lib/csum-partial.c

@@ -147,4 +147,5 @@ unsigned short ip_compute_csum(unsigned char * buff, int len)
 {
 {
 	return csum_fold(csum_partial(buff,len,0));
 	return csum_fold(csum_partial(buff,len,0));
 }
 }
+EXPORT_SYMBOL(ip_compute_csum);
 
 

+ 1 - 0
arch/x86_64/lib/csum-wrappers.c

@@ -109,6 +109,7 @@ csum_partial_copy_nocheck(const unsigned char *src, unsigned char *dst, int len,
 { 
 { 
 	return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL);
 	return csum_partial_copy_generic(src,dst,len,sum,NULL,NULL);
 } 
 } 
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
 
 
 unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr,
 unsigned short csum_ipv6_magic(struct in6_addr *saddr, struct in6_addr *daddr,
 			       __u32 len, unsigned short proto, unsigned int sum) 
 			       __u32 len, unsigned short proto, unsigned int sum) 

+ 5 - 0
arch/x86_64/lib/delay.c

@@ -9,6 +9,7 @@
  */
  */
 
 
 #include <linux/config.h>
 #include <linux/config.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
 #include <asm/delay.h>
 #include <asm/delay.h>
@@ -36,18 +37,22 @@ void __delay(unsigned long loops)
 	}
 	}
 	while((now-bclock) < loops);
 	while((now-bclock) < loops);
 }
 }
+EXPORT_SYMBOL(__delay);
 
 
 inline void __const_udelay(unsigned long xloops)
 inline void __const_udelay(unsigned long xloops)
 {
 {
 	__delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32);
 	__delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32);
 }
 }
+EXPORT_SYMBOL(__const_udelay);
 
 
 void __udelay(unsigned long usecs)
 void __udelay(unsigned long usecs)
 {
 {
 	__const_udelay(usecs * 0x000010c6);  /* 2**32 / 1000000 */
 	__const_udelay(usecs * 0x000010c6);  /* 2**32 / 1000000 */
 }
 }
+EXPORT_SYMBOL(__udelay);
 
 
 void __ndelay(unsigned long nsecs)
 void __ndelay(unsigned long nsecs)
 {
 {
 	__const_udelay(nsecs * 0x00005);  /* 2**32 / 1000000000 (rounded up) */
 	__const_udelay(nsecs * 0x00005);  /* 2**32 / 1000000000 (rounded up) */
 }
 }
+EXPORT_SYMBOL(__ndelay);

+ 3 - 1
arch/x86_64/lib/memmove.c

@@ -3,12 +3,13 @@
  */
  */
 #define _STRING_C
 #define _STRING_C
 #include <linux/string.h>
 #include <linux/string.h>
+#include <linux/module.h>
 
 
 #undef memmove
 #undef memmove
 void *memmove(void * dest,const void *src,size_t count)
 void *memmove(void * dest,const void *src,size_t count)
 {
 {
 	if (dest < src) { 
 	if (dest < src) { 
-		__inline_memcpy(dest,src,count);
+		return memcpy(dest,src,count);
 	} else {
 	} else {
 		char *p = (char *) dest + count;
 		char *p = (char *) dest + count;
 		char *s = (char *) src + count;
 		char *s = (char *) src + count;
@@ -17,3 +18,4 @@ void *memmove(void * dest,const void *src,size_t count)
 	}
 	}
 	return dest;
 	return dest;
 } 
 } 
+EXPORT_SYMBOL(memmove);

+ 11 - 2
arch/x86_64/lib/usercopy.c

@@ -5,6 +5,7 @@
  * Copyright 1997 Linus Torvalds
  * Copyright 1997 Linus Torvalds
  * Copyright 2002 Andi Kleen <ak@suse.de>
  * Copyright 2002 Andi Kleen <ak@suse.de>
  */
  */
+#include <linux/module.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 
 
 /*
 /*
@@ -47,15 +48,17 @@ __strncpy_from_user(char *dst, const char __user *src, long count)
 	__do_strncpy_from_user(dst, src, count, res);
 	__do_strncpy_from_user(dst, src, count, res);
 	return res;
 	return res;
 }
 }
+EXPORT_SYMBOL(__strncpy_from_user);
 
 
 long
 long
 strncpy_from_user(char *dst, const char __user *src, long count)
 strncpy_from_user(char *dst, const char __user *src, long count)
 {
 {
 	long res = -EFAULT;
 	long res = -EFAULT;
 	if (access_ok(VERIFY_READ, src, 1))
 	if (access_ok(VERIFY_READ, src, 1))
-		__do_strncpy_from_user(dst, src, count, res);
+		return __strncpy_from_user(dst, src, count);
 	return res;
 	return res;
 }
 }
+EXPORT_SYMBOL(strncpy_from_user);
 
 
 /*
 /*
  * Zero Userspace
  * Zero Userspace
@@ -94,7 +97,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
 		  [zero] "r" (0UL), [eight] "r" (8UL));
 		  [zero] "r" (0UL), [eight] "r" (8UL));
 	return size;
 	return size;
 }
 }
-
+EXPORT_SYMBOL(__clear_user);
 
 
 unsigned long clear_user(void __user *to, unsigned long n)
 unsigned long clear_user(void __user *to, unsigned long n)
 {
 {
@@ -102,6 +105,7 @@ unsigned long clear_user(void __user *to, unsigned long n)
 		return __clear_user(to, n);
 		return __clear_user(to, n);
 	return n;
 	return n;
 }
 }
+EXPORT_SYMBOL(clear_user);
 
 
 /*
 /*
  * Return the size of a string (including the ending 0)
  * Return the size of a string (including the ending 0)
@@ -125,6 +129,7 @@ long __strnlen_user(const char __user *s, long n)
 		s++;
 		s++;
 	}
 	}
 }
 }
+EXPORT_SYMBOL(__strnlen_user);
 
 
 long strnlen_user(const char __user *s, long n)
 long strnlen_user(const char __user *s, long n)
 {
 {
@@ -132,6 +137,7 @@ long strnlen_user(const char __user *s, long n)
 		return 0;
 		return 0;
 	return __strnlen_user(s, n);
 	return __strnlen_user(s, n);
 }
 }
+EXPORT_SYMBOL(strnlen_user);
 
 
 long strlen_user(const char __user *s)
 long strlen_user(const char __user *s)
 {
 {
@@ -147,6 +153,7 @@ long strlen_user(const char __user *s)
 		s++;
 		s++;
 	}
 	}
 }
 }
+EXPORT_SYMBOL(strlen_user);
 
 
 unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len)
 unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len)
 {
 {
@@ -155,3 +162,5 @@ unsigned long copy_in_user(void __user *to, const void __user *from, unsigned le
 	} 
 	} 
 	return len;		
 	return len;		
 }
 }
+EXPORT_SYMBOL(copy_in_user);
+

+ 5 - 3
arch/x86_64/mm/fault.c

@@ -195,7 +195,7 @@ void dump_pagetable(unsigned long address)
 	printk("PGD %lx ", pgd_val(*pgd));
 	printk("PGD %lx ", pgd_val(*pgd));
 	if (!pgd_present(*pgd)) goto ret; 
 	if (!pgd_present(*pgd)) goto ret; 
 
 
-	pud = __pud_offset_k((pud_t *)pgd_page(*pgd), address);
+	pud = pud_offset(pgd, address);
 	if (bad_address(pud)) goto bad;
 	if (bad_address(pud)) goto bad;
 	printk("PUD %lx ", pud_val(*pud));
 	printk("PUD %lx ", pud_val(*pud));
 	if (!pud_present(*pud))	goto ret;
 	if (!pud_present(*pud))	goto ret;
@@ -445,8 +445,10 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	if (!(vma->vm_flags & VM_GROWSDOWN))
 	if (!(vma->vm_flags & VM_GROWSDOWN))
 		goto bad_area;
 		goto bad_area;
 	if (error_code & 4) {
 	if (error_code & 4) {
-		// XXX: align red zone size with ABI 
-		if (address + 128 < regs->rsp)
+		/* Allow userspace just enough access below the stack pointer
+		 * to let the 'enter' instruction work.
+		 */
+		if (address + 65536 + 32 * sizeof(unsigned long) < regs->rsp)
 			goto bad_area;
 			goto bad_area;
 	}
 	}
 	if (expand_stack(vma, address))
 	if (expand_stack(vma, address))

+ 22 - 26
arch/x86_64/mm/init.c

@@ -41,8 +41,6 @@
 #include <asm/proto.h>
 #include <asm/proto.h>
 #include <asm/smp.h>
 #include <asm/smp.h>
 #include <asm/sections.h>
 #include <asm/sections.h>
-#include <asm/dma-mapping.h>
-#include <asm/swiotlb.h>
 
 
 #ifndef Dprintk
 #ifndef Dprintk
 #define Dprintk(x...)
 #define Dprintk(x...)
@@ -90,8 +88,6 @@ void show_mem(void)
 	printk(KERN_INFO "%lu pages swap cached\n",cached);
 	printk(KERN_INFO "%lu pages swap cached\n",cached);
 }
 }
 
 
-/* References to section boundaries */
-
 int after_bootmem;
 int after_bootmem;
 
 
 static __init void *spp_getpage(void)
 static __init void *spp_getpage(void)
@@ -261,9 +257,10 @@ phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
 	for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
 	for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
 		unsigned long entry;
 		unsigned long entry;
 
 
-		if (address > end) {
-			for (; i < PTRS_PER_PMD; i++, pmd++)
-				set_pmd(pmd, __pmd(0));
+		if (address >= end) {
+			if (!after_bootmem)
+				for (; i < PTRS_PER_PMD; i++, pmd++)
+					set_pmd(pmd, __pmd(0));
 			break;
 			break;
 		}
 		}
 		entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
 		entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
@@ -341,7 +338,8 @@ static void __init find_early_table_space(unsigned long end)
 	table_end = table_start;
 	table_end = table_start;
 
 
 	early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
 	early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
-		end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
+		end, table_start << PAGE_SHIFT,
+		(table_start << PAGE_SHIFT) + tables);
 }
 }
 
 
 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
@@ -372,7 +370,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
 		pud_t *pud;
 		pud_t *pud;
 
 
 		if (after_bootmem)
 		if (after_bootmem)
-			pud = pud_offset_k(pgd, start & PGDIR_MASK);
+			pud = pud_offset(pgd, start & PGDIR_MASK);
 		else
 		else
 			pud = alloc_low_page(&map, &pud_phys);
 			pud = alloc_low_page(&map, &pud_phys);
 
 
@@ -587,10 +585,7 @@ void __init mem_init(void)
 {
 {
 	long codesize, reservedpages, datasize, initsize;
 	long codesize, reservedpages, datasize, initsize;
 
 
-#ifdef CONFIG_SWIOTLB
-	pci_swiotlb_init();
-#endif
-	no_iommu_init();
+	pci_iommu_alloc();
 
 
 	/* How many end-of-memory variables you have, grandma! */
 	/* How many end-of-memory variables you have, grandma! */
 	max_low_pfn = end_pfn;
 	max_low_pfn = end_pfn;
@@ -644,20 +639,29 @@ void __init mem_init(void)
 #endif
 #endif
 }
 }
 
 
-void free_initmem(void)
+void free_init_pages(char *what, unsigned long begin, unsigned long end)
 {
 {
 	unsigned long addr;
 	unsigned long addr;
 
 
-	addr = (unsigned long)(&__init_begin);
-	for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+	if (begin >= end)
+		return;
+
+	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
 		ClearPageReserved(virt_to_page(addr));
 		ClearPageReserved(virt_to_page(addr));
 		init_page_count(virt_to_page(addr));
 		init_page_count(virt_to_page(addr));
 		memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); 
 		memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); 
 		free_page(addr);
 		free_page(addr);
 		totalram_pages++;
 		totalram_pages++;
 	}
 	}
+}
+
+void free_initmem(void)
+{
 	memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
 	memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
-	printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10);
+	free_init_pages("unused kernel memory",
+			(unsigned long)(&__init_begin),
+			(unsigned long)(&__init_end));
 }
 }
 
 
 #ifdef CONFIG_DEBUG_RODATA
 #ifdef CONFIG_DEBUG_RODATA
@@ -686,15 +690,7 @@ void mark_rodata_ro(void)
 #ifdef CONFIG_BLK_DEV_INITRD
 #ifdef CONFIG_BLK_DEV_INITRD
 void free_initrd_mem(unsigned long start, unsigned long end)
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
 {
-	if (start >= end)
-		return;
-	printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
-	for (; start < end; start += PAGE_SIZE) {
-		ClearPageReserved(virt_to_page(start));
-		init_page_count(virt_to_page(start));
-		free_page(start);
-		totalram_pages++;
-	}
+	free_init_pages("initrd memory", start, end);
 }
 }
 #endif
 #endif
 
 

+ 5 - 0
arch/x86_64/mm/ioremap.c

@@ -11,6 +11,7 @@
 #include <linux/vmalloc.h>
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
+#include <linux/module.h>
 #include <asm/io.h>
 #include <asm/io.h>
 #include <asm/pgalloc.h>
 #include <asm/pgalloc.h>
 #include <asm/fixmap.h>
 #include <asm/fixmap.h>
@@ -219,6 +220,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
 	}
 	}
 	return (__force void __iomem *) (offset + (char *)addr);
 	return (__force void __iomem *) (offset + (char *)addr);
 }
 }
+EXPORT_SYMBOL(__ioremap);
 
 
 /**
 /**
  * ioremap_nocache     -   map bus memory into CPU space
  * ioremap_nocache     -   map bus memory into CPU space
@@ -246,6 +248,7 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
 {
 {
 	return __ioremap(phys_addr, size, _PAGE_PCD);
 	return __ioremap(phys_addr, size, _PAGE_PCD);
 }
 }
+EXPORT_SYMBOL(ioremap_nocache);
 
 
 /**
 /**
  * iounmap - Free a IO remapping
  * iounmap - Free a IO remapping
@@ -291,3 +294,5 @@ void iounmap(volatile void __iomem *addr)
 	BUG_ON(p != o || o == NULL);
 	BUG_ON(p != o || o == NULL);
 	kfree(p); 
 	kfree(p); 
 }
 }
+EXPORT_SYMBOL(iounmap);
+

+ 5 - 5
arch/x86_64/pci/k8-bus.c

@@ -2,6 +2,7 @@
 #include <linux/pci.h>
 #include <linux/pci.h>
 #include <asm/mpspec.h>
 #include <asm/mpspec.h>
 #include <linux/cpumask.h>
 #include <linux/cpumask.h>
+#include <asm/k8.h>
 
 
 /*
 /*
  * This discovers the pcibus <-> node mapping on AMD K8.
  * This discovers the pcibus <-> node mapping on AMD K8.
@@ -18,7 +19,6 @@
 #define NR_LDT_BUS_NUMBER_REGISTERS 3
 #define NR_LDT_BUS_NUMBER_REGISTERS 3
 #define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF)
 #define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF)
 #define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF)
 #define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF)
-#define PCI_DEVICE_ID_K8HTCONFIG 0x1100
 
 
 /**
 /**
  * fill_mp_bus_to_cpumask()
  * fill_mp_bus_to_cpumask()
@@ -28,8 +28,7 @@
 __init static int
 __init static int
 fill_mp_bus_to_cpumask(void)
 fill_mp_bus_to_cpumask(void)
 {
 {
-	struct pci_dev *nb_dev = NULL;
-	int i, j;
+	int i, j, k;
 	u32 ldtbus, nid;
 	u32 ldtbus, nid;
 	static int lbnr[3] = {
 	static int lbnr[3] = {
 		LDT_BUS_NUMBER_REGISTER_0,
 		LDT_BUS_NUMBER_REGISTER_0,
@@ -37,8 +36,9 @@ fill_mp_bus_to_cpumask(void)
 		LDT_BUS_NUMBER_REGISTER_2
 		LDT_BUS_NUMBER_REGISTER_2
 	};
 	};
 
 
-	while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD,
-			PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) {
+	cache_k8_northbridges();
+	for (k = 0; k < num_k8_northbridges; k++) {
+		struct pci_dev *nb_dev = k8_northbridges[k];
 		pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid);
 		pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid);
 
 
 		for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) {
 		for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) {

+ 6 - 6
drivers/acpi/processor_idle.c

@@ -206,11 +206,11 @@ acpi_processor_power_activate(struct acpi_processor *pr,
 
 
 static void acpi_safe_halt(void)
 static void acpi_safe_halt(void)
 {
 {
-	clear_thread_flag(TIF_POLLING_NRFLAG);
+	current_thread_info()->status &= ~TS_POLLING;
 	smp_mb__after_clear_bit();
 	smp_mb__after_clear_bit();
 	if (!need_resched())
 	if (!need_resched())
 		safe_halt();
 		safe_halt();
-	set_thread_flag(TIF_POLLING_NRFLAG);
+	current_thread_info()->status |= TS_POLLING;
 }
 }
 
 
 static atomic_t c3_cpu_count;
 static atomic_t c3_cpu_count;
@@ -330,10 +330,10 @@ static void acpi_processor_idle(void)
 	 * Invoke the current Cx state to put the processor to sleep.
 	 * Invoke the current Cx state to put the processor to sleep.
 	 */
 	 */
 	if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) {
 	if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) {
-		clear_thread_flag(TIF_POLLING_NRFLAG);
+		current_thread_info()->status &= ~TS_POLLING;
 		smp_mb__after_clear_bit();
 		smp_mb__after_clear_bit();
 		if (need_resched()) {
 		if (need_resched()) {
-			set_thread_flag(TIF_POLLING_NRFLAG);
+			current_thread_info()->status |= TS_POLLING;
 			local_irq_enable();
 			local_irq_enable();
 			return;
 			return;
 		}
 		}
@@ -376,7 +376,7 @@ static void acpi_processor_idle(void)
 #endif
 #endif
 		/* Re-enable interrupts */
 		/* Re-enable interrupts */
 		local_irq_enable();
 		local_irq_enable();
-		set_thread_flag(TIF_POLLING_NRFLAG);
+		current_thread_info()->status |= TS_POLLING;
 		/* Compute time (ticks) that we were actually asleep */
 		/* Compute time (ticks) that we were actually asleep */
 		sleep_ticks =
 		sleep_ticks =
 		    ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
 		    ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
@@ -420,7 +420,7 @@ static void acpi_processor_idle(void)
 #endif
 #endif
 		/* Re-enable interrupts */
 		/* Re-enable interrupts */
 		local_irq_enable();
 		local_irq_enable();
-		set_thread_flag(TIF_POLLING_NRFLAG);
+		current_thread_info()->status |= TS_POLLING;
 		/* Compute time (ticks) that we were actually asleep */
 		/* Compute time (ticks) that we were actually asleep */
 		sleep_ticks =
 		sleep_ticks =
 		    ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD;
 		    ticks_elapsed(t1, t2) - cx->latency_ticks - C3_OVERHEAD;

+ 2 - 2
drivers/char/agp/Kconfig

@@ -55,9 +55,9 @@ config AGP_AMD
 	  X on AMD Irongate, 761, and 762 chipsets.
 	  X on AMD Irongate, 761, and 762 chipsets.
 
 
 config AGP_AMD64
 config AGP_AMD64
-	tristate "AMD Opteron/Athlon64 on-CPU GART support" if !GART_IOMMU
+	tristate "AMD Opteron/Athlon64 on-CPU GART support" if !IOMMU
 	depends on AGP && X86
 	depends on AGP && X86
-	default y if GART_IOMMU
+	default y if IOMMU
 	help
 	help
 	  This option gives you AGP support for the GLX component of
 	  This option gives you AGP support for the GLX component of
 	  X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs.
 	  X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs.

+ 27 - 54
drivers/char/agp/amd64-agp.c

@@ -15,11 +15,9 @@
 #include <linux/agp_backend.h>
 #include <linux/agp_backend.h>
 #include <linux/mmzone.h>
 #include <linux/mmzone.h>
 #include <asm/page.h>		/* PAGE_SIZE */
 #include <asm/page.h>		/* PAGE_SIZE */
+#include <asm/k8.h>
 #include "agp.h"
 #include "agp.h"
 
 
-/* Will need to be increased if AMD64 ever goes >8-way. */
-#define MAX_HAMMER_GARTS   8
-
 /* PTE bits. */
 /* PTE bits. */
 #define GPTE_VALID	1
 #define GPTE_VALID	1
 #define GPTE_COHERENT	2
 #define GPTE_COHERENT	2
@@ -53,28 +51,12 @@
 #define ULI_X86_64_HTT_FEA_REG		0x50
 #define ULI_X86_64_HTT_FEA_REG		0x50
 #define ULI_X86_64_ENU_SCR_REG		0x54
 #define ULI_X86_64_ENU_SCR_REG		0x54
 
 
-static int nr_garts;
-static struct pci_dev * hammers[MAX_HAMMER_GARTS];
-
 static struct resource *aperture_resource;
 static struct resource *aperture_resource;
 static int __initdata agp_try_unsupported = 1;
 static int __initdata agp_try_unsupported = 1;
 
 
-#define for_each_nb() for(gart_iterator=0;gart_iterator<nr_garts;gart_iterator++)
-
-static void flush_amd64_tlb(struct pci_dev *dev)
-{
-	u32 tmp;
-
-	pci_read_config_dword (dev, AMD64_GARTCACHECTL, &tmp);
-	tmp |= INVGART;
-	pci_write_config_dword (dev, AMD64_GARTCACHECTL, tmp);
-}
-
 static void amd64_tlbflush(struct agp_memory *temp)
 static void amd64_tlbflush(struct agp_memory *temp)
 {
 {
-	int gart_iterator;
-	for_each_nb()
-		flush_amd64_tlb(hammers[gart_iterator]);
+	k8_flush_garts();
 }
 }
 
 
 static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
 static int amd64_insert_memory(struct agp_memory *mem, off_t pg_start, int type)
@@ -153,7 +135,7 @@ static int amd64_fetch_size(void)
 	u32 temp;
 	u32 temp;
 	struct aper_size_info_32 *values;
 	struct aper_size_info_32 *values;
 
 
-	dev = hammers[0];
+	dev = k8_northbridges[0];
 	if (dev==NULL)
 	if (dev==NULL)
 		return 0;
 		return 0;
 
 
@@ -201,9 +183,6 @@ static u64 amd64_configure (struct pci_dev *hammer, u64 gatt_table)
 	tmp &= ~(DISGARTCPU | DISGARTIO);
 	tmp &= ~(DISGARTCPU | DISGARTIO);
 	pci_write_config_dword(hammer, AMD64_GARTAPERTURECTL, tmp);
 	pci_write_config_dword(hammer, AMD64_GARTAPERTURECTL, tmp);
 
 
-	/* keep CPU's coherent. */
-	flush_amd64_tlb (hammer);
-
 	return aper_base;
 	return aper_base;
 }
 }
 
 
@@ -222,13 +201,14 @@ static struct aper_size_info_32 amd_8151_sizes[7] =
 static int amd_8151_configure(void)
 static int amd_8151_configure(void)
 {
 {
 	unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real);
 	unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real);
-	int gart_iterator;
+	int i;
 
 
 	/* Configure AGP regs in each x86-64 host bridge. */
 	/* Configure AGP regs in each x86-64 host bridge. */
-	for_each_nb() {
+        for (i = 0; i < num_k8_northbridges; i++) {
 		agp_bridge->gart_bus_addr =
 		agp_bridge->gart_bus_addr =
-				amd64_configure(hammers[gart_iterator],gatt_bus);
+				amd64_configure(k8_northbridges[i], gatt_bus);
 	}
 	}
+	k8_flush_garts();
 	return 0;
 	return 0;
 }
 }
 
 
@@ -236,12 +216,13 @@ static int amd_8151_configure(void)
 static void amd64_cleanup(void)
 static void amd64_cleanup(void)
 {
 {
 	u32 tmp;
 	u32 tmp;
-	int gart_iterator;
-	for_each_nb() {
+	int i;
+        for (i = 0; i < num_k8_northbridges; i++) {
+		struct pci_dev *dev = k8_northbridges[i];
 		/* disable gart translation */
 		/* disable gart translation */
-		pci_read_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, &tmp);
+		pci_read_config_dword (dev, AMD64_GARTAPERTURECTL, &tmp);
 		tmp &= ~AMD64_GARTEN;
 		tmp &= ~AMD64_GARTEN;
-		pci_write_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, tmp);
+		pci_write_config_dword (dev, AMD64_GARTAPERTURECTL, tmp);
 	}
 	}
 }
 }
 
 
@@ -311,7 +292,7 @@ static int __devinit aperture_valid(u64 aper, u32 size)
 /*
 /*
  * W*s centric BIOS sometimes only set up the aperture in the AGP
  * W*s centric BIOS sometimes only set up the aperture in the AGP
  * bridge, not the northbridge. On AMD64 this is handled early
  * bridge, not the northbridge. On AMD64 this is handled early
- * in aperture.c, but when GART_IOMMU is not enabled or we run
+ * in aperture.c, but when IOMMU is not enabled or we run
  * on a 32bit kernel this needs to be redone.
  * on a 32bit kernel this needs to be redone.
  * Unfortunately it is impossible to fix the aperture here because it's too late
  * Unfortunately it is impossible to fix the aperture here because it's too late
  * to allocate that much memory. But at least error out cleanly instead of
  * to allocate that much memory. But at least error out cleanly instead of
@@ -361,17 +342,15 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp,
 
 
 static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr)
 static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr)
 {
 {
-	struct pci_dev *loop_dev = NULL;
-	int i = 0;
-
-	/* cache pci_devs of northbridges. */
-	while ((loop_dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, loop_dev))
-			!= NULL) {
-		if (i == MAX_HAMMER_GARTS) {
-			printk(KERN_ERR PFX "Too many northbridges for AGP\n");
-			return -1;
-		}
-		if (fix_northbridge(loop_dev, pdev, cap_ptr) < 0) {
+	int i;
+
+	if (cache_k8_northbridges() < 0)
+		return -ENODEV;
+
+	i = 0;
+	for (i = 0; i < num_k8_northbridges; i++) {
+		struct pci_dev *dev = k8_northbridges[i];
+		if (fix_northbridge(dev, pdev, cap_ptr) < 0) {
 			printk(KERN_ERR PFX "No usable aperture found.\n");
 			printk(KERN_ERR PFX "No usable aperture found.\n");
 #ifdef __x86_64__
 #ifdef __x86_64__
 			/* should port this to i386 */
 			/* should port this to i386 */
@@ -379,10 +358,8 @@ static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr)
 #endif
 #endif
 			return -1;
 			return -1;
 		}
 		}
-		hammers[i++] = loop_dev;
 	}
 	}
-		nr_garts = i;
-	return i == 0 ? -1 : 0;
+	return 0;
 }
 }
 
 
 /* Handle AMD 8151 quirks */
 /* Handle AMD 8151 quirks */
@@ -450,7 +427,7 @@ static int __devinit uli_agp_init(struct pci_dev *pdev)
 	}
 	}
 
 
 	/* shadow x86-64 registers into ULi registers */
 	/* shadow x86-64 registers into ULi registers */
-	pci_read_config_dword (hammers[0], AMD64_GARTAPERTUREBASE, &httfea);
+	pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &httfea);
 
 
 	/* if x86-64 aperture base is beyond 4G, exit here */
 	/* if x86-64 aperture base is beyond 4G, exit here */
 	if ((httfea & 0x7fff) >> (32 - 25))
 	if ((httfea & 0x7fff) >> (32 - 25))
@@ -513,7 +490,7 @@ static int __devinit nforce3_agp_init(struct pci_dev *pdev)
 	pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp);
 	pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp);
 
 
 	/* shadow x86-64 registers into NVIDIA registers */
 	/* shadow x86-64 registers into NVIDIA registers */
-	pci_read_config_dword (hammers[0], AMD64_GARTAPERTUREBASE, &apbase);
+	pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &apbase);
 
 
 	/* if x86-64 aperture base is beyond 4G, exit here */
 	/* if x86-64 aperture base is beyond 4G, exit here */
 	if ( (apbase & 0x7fff) >> (32 - 25) ) {
 	if ( (apbase & 0x7fff) >> (32 - 25) ) {
@@ -754,10 +731,6 @@ static struct pci_driver agp_amd64_pci_driver = {
 int __init agp_amd64_init(void)
 int __init agp_amd64_init(void)
 {
 {
 	int err = 0;
 	int err = 0;
-	static struct pci_device_id amd64nb[] = {
-		{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
-		{ },
-	};
 
 
 	if (agp_off)
 	if (agp_off)
 		return -EINVAL;
 		return -EINVAL;
@@ -774,7 +747,7 @@ int __init agp_amd64_init(void)
 		}
 		}
 
 
 		/* First check that we have at least one AMD64 NB */
 		/* First check that we have at least one AMD64 NB */
-		if (!pci_dev_present(amd64nb))
+		if (!pci_dev_present(k8_nb_ids))
 			return -ENODEV;
 			return -ENODEV;
 
 
 		/* Look for any AGP bridge */
 		/* Look for any AGP bridge */
@@ -802,7 +775,7 @@ static void __exit agp_amd64_cleanup(void)
 
 
 /* On AMD64 the PCI driver needs to initialize this driver early
 /* On AMD64 the PCI driver needs to initialize this driver early
    for the IOMMU, so it has to be called via a backdoor. */
    for the IOMMU, so it has to be called via a backdoor. */
-#ifndef CONFIG_GART_IOMMU
+#ifndef CONFIG_IOMMU
 module_init(agp_amd64_init);
 module_init(agp_amd64_init);
 module_exit(agp_amd64_cleanup);
 module_exit(agp_amd64_cleanup);
 #endif
 #endif

+ 1 - 0
drivers/pci/msi-apic.c

@@ -4,6 +4,7 @@
 
 
 #include <linux/pci.h>
 #include <linux/pci.h>
 #include <linux/irq.h>
 #include <linux/irq.h>
+#include <asm/smp.h>
 
 
 #include "msi.h"
 #include "msi.h"
 
 

+ 4 - 1
drivers/scsi/aacraid/comminit.c

@@ -104,8 +104,11 @@ static int aac_alloc_comm(struct aac_dev *dev, void **commaddr, unsigned long co
 	 * always true on real computers. It also has some slight problems
 	 * always true on real computers. It also has some slight problems
 	 * with the GART on x86-64. I've btw never tried DMA from PCI space
 	 * with the GART on x86-64. I've btw never tried DMA from PCI space
 	 * on this platform but don't be surprised if its problematic.
 	 * on this platform but don't be surprised if its problematic.
+	 * [AK: something is very very wrong when a driver tests this symbol.
+ 	 *  Someone should figure out what the comment writer really meant here and fix
+	 *  the code. Or just remove that bad code. ]
 	 */
 	 */
-#ifndef CONFIG_GART_IOMMU
+#ifndef CONFIG_IOMMU
 	if ((num_physpages << (PAGE_SHIFT - 12)) <= AAC_MAX_HOSTPHYSMEMPAGES) {
 	if ((num_physpages << (PAGE_SHIFT - 12)) <= AAC_MAX_HOSTPHYSMEMPAGES) {
 		init->HostPhysMemPages = 
 		init->HostPhysMemPages = 
 			cpu_to_le32(num_physpages << (PAGE_SHIFT-12));
 			cpu_to_le32(num_physpages << (PAGE_SHIFT-12));

+ 15 - 1
fs/compat.c

@@ -55,6 +55,20 @@
 
 
 extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
 extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
 
 
+int compat_log = 1;
+
+int compat_printk(const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+	if (!compat_log)
+		return 0;
+	va_start(ap, fmt);
+	ret = vprintk(fmt, ap);
+	va_end(ap);
+	return ret;
+}
+
 /*
 /*
  * Not all architectures have sys_utime, so implement this in terms
  * Not all architectures have sys_utime, so implement this in terms
  * of sys_utimes.
  * of sys_utimes.
@@ -359,7 +373,7 @@ static void compat_ioctl_error(struct file *filp, unsigned int fd,
 	sprintf(buf,"'%c'", (cmd>>24) & 0x3f);
 	sprintf(buf,"'%c'", (cmd>>24) & 0x3f);
 	if (!isprint(buf[1]))
 	if (!isprint(buf[1]))
 		sprintf(buf, "%02x", buf[1]);
 		sprintf(buf, "%02x", buf[1]);
-	printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
+	compat_printk("ioctl32(%s:%d): Unknown cmd fd(%d) "
 			"cmd(%08x){%s} arg(%08x) on %s\n",
 			"cmd(%08x){%s} arg(%08x) on %s\n",
 			current->comm, current->pid,
 			current->comm, current->pid,
 			(int)fd, (unsigned int)cmd, buf,
 			(int)fd, (unsigned int)cmd, buf,

+ 2 - 0
include/asm-i386/alternative.h

@@ -5,6 +5,8 @@
 
 
 #include <asm/types.h>
 #include <asm/types.h>
 
 
+#include <linux/types.h>
+
 struct alt_instr {
 struct alt_instr {
 	u8 *instr; 		/* original instruction */
 	u8 *instr; 		/* original instruction */
 	u8 *replacement;
 	u8 *replacement;

+ 0 - 12
include/asm-i386/apic.h

@@ -111,24 +111,12 @@ extern void init_apic_mappings (void);
 extern void smp_local_timer_interrupt (struct pt_regs * regs);
 extern void smp_local_timer_interrupt (struct pt_regs * regs);
 extern void setup_boot_APIC_clock (void);
 extern void setup_boot_APIC_clock (void);
 extern void setup_secondary_APIC_clock (void);
 extern void setup_secondary_APIC_clock (void);
-extern void setup_apic_nmi_watchdog (void);
-extern int reserve_lapic_nmi(void);
-extern void release_lapic_nmi(void);
-extern void disable_timer_nmi_watchdog(void);
-extern void enable_timer_nmi_watchdog(void);
-extern void nmi_watchdog_tick (struct pt_regs * regs);
 extern int APIC_init_uniprocessor (void);
 extern int APIC_init_uniprocessor (void);
 extern void disable_APIC_timer(void);
 extern void disable_APIC_timer(void);
 extern void enable_APIC_timer(void);
 extern void enable_APIC_timer(void);
 
 
 extern void enable_NMI_through_LVT0 (void * dummy);
 extern void enable_NMI_through_LVT0 (void * dummy);
 
 
-extern unsigned int nmi_watchdog;
-#define NMI_NONE	0
-#define NMI_IO_APIC	1
-#define NMI_LOCAL_APIC	2
-#define NMI_INVALID	3
-
 extern int disable_timer_pin_1;
 extern int disable_timer_pin_1;
 
 
 void smp_send_timer_broadcast_ipi(struct pt_regs *regs);
 void smp_send_timer_broadcast_ipi(struct pt_regs *regs);

+ 1 - 0
include/asm-i386/cpufeature.h

@@ -72,6 +72,7 @@
 #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
 #define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
 #define X86_FEATURE_UP		(3*32+ 9) /* smp kernel running on up */
 #define X86_FEATURE_UP		(3*32+ 9) /* smp kernel running on up */
 #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
 
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* Streaming SIMD Extensions-3 */

+ 54 - 0
include/asm-i386/dwarf2.h

@@ -0,0 +1,54 @@
+#ifndef _DWARF2_H
+#define _DWARF2_H
+
+#include <linux/config.h>
+
+#ifndef __ASSEMBLY__
+#warning "asm/dwarf2.h should be only included in pure assembly files"
+#endif
+
+/*
+   Macros for dwarf2 CFI unwind table entries.
+   See "as.info" for details on these pseudo ops. Unfortunately
+   they are only supported in very new binutils, so define them
+   away for older version.
+ */
+
+#ifdef CONFIG_UNWIND_INFO
+
+#define CFI_STARTPROC .cfi_startproc
+#define CFI_ENDPROC .cfi_endproc
+#define CFI_DEF_CFA .cfi_def_cfa
+#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
+#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
+#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset
+#define CFI_OFFSET .cfi_offset
+#define CFI_REL_OFFSET .cfi_rel_offset
+#define CFI_REGISTER .cfi_register
+#define CFI_RESTORE .cfi_restore
+#define CFI_REMEMBER_STATE .cfi_remember_state
+#define CFI_RESTORE_STATE .cfi_restore_state
+
+#else
+
+/* Due to the structure of pre-exisiting code, don't use assembler line
+   comment character # to ignore the arguments. Instead, use a dummy macro. */
+.macro ignore a=0, b=0, c=0, d=0
+.endm
+
+#define CFI_STARTPROC	ignore
+#define CFI_ENDPROC	ignore
+#define CFI_DEF_CFA	ignore
+#define CFI_DEF_CFA_REGISTER	ignore
+#define CFI_DEF_CFA_OFFSET	ignore
+#define CFI_ADJUST_CFA_OFFSET	ignore
+#define CFI_OFFSET	ignore
+#define CFI_REL_OFFSET	ignore
+#define CFI_REGISTER	ignore
+#define CFI_RESTORE	ignore
+#define CFI_REMEMBER_STATE ignore
+#define CFI_RESTORE_STATE ignore
+
+#endif
+
+#endif

+ 2 - 0
include/asm-i386/hw_irq.h

@@ -19,6 +19,8 @@
 
 
 struct hw_interrupt_type;
 struct hw_interrupt_type;
 
 
+#define NMI_VECTOR		0x02
+
 /*
 /*
  * Various low-level irq details needed by irq.c, process.c,
  * Various low-level irq details needed by irq.c, process.c,
  * time.c, io_apic.c and smp.c
  * time.c, io_apic.c and smp.c

+ 19 - 0
include/asm-i386/intel_arch_perfmon.h

@@ -0,0 +1,19 @@
+#ifndef X86_INTEL_ARCH_PERFMON_H
+#define X86_INTEL_ARCH_PERFMON_H 1
+
+#define MSR_ARCH_PERFMON_PERFCTR0		0xc1
+#define MSR_ARCH_PERFMON_PERFCTR1		0xc2
+
+#define MSR_ARCH_PERFMON_EVENTSEL0		0x186
+#define MSR_ARCH_PERFMON_EVENTSEL1		0x187
+
+#define ARCH_PERFMON_EVENTSEL0_ENABLE      (1 << 22)
+#define ARCH_PERFMON_EVENTSEL_INT          (1 << 20)
+#define ARCH_PERFMON_EVENTSEL_OS           (1 << 17)
+#define ARCH_PERFMON_EVENTSEL_USR          (1 << 16)
+
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL	(0x3c)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK	(0x00 << 8)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT (1 << 0)
+
+#endif	/* X86_INTEL_ARCH_PERFMON_H */

+ 1 - 0
include/asm-i386/k8.h

@@ -0,0 +1 @@
+#include <asm-x86_64/k8.h>

+ 20 - 6
include/asm-i386/local.h

@@ -55,12 +55,26 @@ static __inline__ void local_sub(long i, local_t *v)
  * much more efficient than these naive implementations.  Note they take
  * much more efficient than these naive implementations.  Note they take
  * a variable, not an address.
  * a variable, not an address.
  */
  */
-#define cpu_local_read(v)	local_read(&__get_cpu_var(v))
-#define cpu_local_set(v, i)	local_set(&__get_cpu_var(v), (i))
-#define cpu_local_inc(v)	local_inc(&__get_cpu_var(v))
-#define cpu_local_dec(v)	local_dec(&__get_cpu_var(v))
-#define cpu_local_add(i, v)	local_add((i), &__get_cpu_var(v))
-#define cpu_local_sub(i, v)	local_sub((i), &__get_cpu_var(v))
+
+/* Need to disable preemption for the cpu local counters otherwise we could
+   still access a variable of a previous CPU in a non atomic way. */
+#define cpu_local_wrap_v(v)	 	\
+	({ local_t res__;		\
+	   preempt_disable(); 		\
+	   res__ = (v);			\
+	   preempt_enable();		\
+	   res__; })
+#define cpu_local_wrap(v)		\
+	({ preempt_disable();		\
+	   v;				\
+	   preempt_enable(); })		\
+
+#define cpu_local_read(v)    cpu_local_wrap_v(local_read(&__get_cpu_var(v)))
+#define cpu_local_set(v, i)  cpu_local_wrap(local_set(&__get_cpu_var(v), (i)))
+#define cpu_local_inc(v)     cpu_local_wrap(local_inc(&__get_cpu_var(v)))
+#define cpu_local_dec(v)     cpu_local_wrap(local_dec(&__get_cpu_var(v)))
+#define cpu_local_add(i, v)  cpu_local_wrap(local_add((i), &__get_cpu_var(v)))
+#define cpu_local_sub(i, v)  cpu_local_wrap(local_sub((i), &__get_cpu_var(v)))
 
 
 #define __cpu_local_inc(v)	cpu_local_inc(v)
 #define __cpu_local_inc(v)	cpu_local_inc(v)
 #define __cpu_local_dec(v)	cpu_local_dec(v)
 #define __cpu_local_dec(v)	cpu_local_dec(v)

+ 5 - 2
include/asm-i386/mach-default/mach_ipi.h

@@ -1,6 +1,9 @@
 #ifndef __ASM_MACH_IPI_H
 #ifndef __ASM_MACH_IPI_H
 #define __ASM_MACH_IPI_H
 #define __ASM_MACH_IPI_H
 
 
+/* Avoid include hell */
+#define NMI_VECTOR 0x02
+
 void send_IPI_mask_bitmask(cpumask_t mask, int vector);
 void send_IPI_mask_bitmask(cpumask_t mask, int vector);
 void __send_IPI_shortcut(unsigned int shortcut, int vector);
 void __send_IPI_shortcut(unsigned int shortcut, int vector);
 
 
@@ -13,7 +16,7 @@ static inline void send_IPI_mask(cpumask_t mask, int vector)
 
 
 static inline void __local_send_IPI_allbutself(int vector)
 static inline void __local_send_IPI_allbutself(int vector)
 {
 {
-	if (no_broadcast) {
+	if (no_broadcast || vector == NMI_VECTOR) {
 		cpumask_t mask = cpu_online_map;
 		cpumask_t mask = cpu_online_map;
 
 
 		cpu_clear(smp_processor_id(), mask);
 		cpu_clear(smp_processor_id(), mask);
@@ -24,7 +27,7 @@ static inline void __local_send_IPI_allbutself(int vector)
 
 
 static inline void __local_send_IPI_all(int vector)
 static inline void __local_send_IPI_all(int vector)
 {
 {
-	if (no_broadcast)
+	if (no_broadcast || vector == NMI_VECTOR)
 		send_IPI_mask(cpu_online_map, vector);
 		send_IPI_mask(cpu_online_map, vector);
 	else
 	else
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector);
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector);

+ 21 - 7
include/asm-i386/nmi.h

@@ -5,24 +5,38 @@
 #define ASM_NMI_H
 #define ASM_NMI_H
 
 
 #include <linux/pm.h>
 #include <linux/pm.h>
- 
+
 struct pt_regs;
 struct pt_regs;
- 
+
 typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu);
 typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu);
- 
-/** 
+
+/**
  * set_nmi_callback
  * set_nmi_callback
  *
  *
  * Set a handler for an NMI. Only one handler may be
  * Set a handler for an NMI. Only one handler may be
  * set. Return 1 if the NMI was handled.
  * set. Return 1 if the NMI was handled.
  */
  */
 void set_nmi_callback(nmi_callback_t callback);
 void set_nmi_callback(nmi_callback_t callback);
- 
-/** 
+
+/**
  * unset_nmi_callback
  * unset_nmi_callback
  *
  *
  * Remove the handler previously set.
  * Remove the handler previously set.
  */
  */
 void unset_nmi_callback(void);
 void unset_nmi_callback(void);
- 
+
+extern void setup_apic_nmi_watchdog (void);
+extern int reserve_lapic_nmi(void);
+extern void release_lapic_nmi(void);
+extern void disable_timer_nmi_watchdog(void);
+extern void enable_timer_nmi_watchdog(void);
+extern void nmi_watchdog_tick (struct pt_regs * regs);
+
+extern unsigned int nmi_watchdog;
+#define NMI_DEFAULT     -1
+#define NMI_NONE	0
+#define NMI_IO_APIC	1
+#define NMI_LOCAL_APIC	2
+#define NMI_INVALID	3
+
 #endif /* ASM_NMI_H */
 #endif /* ASM_NMI_H */

+ 2 - 1
include/asm-i386/processor.h

@@ -112,6 +112,7 @@ extern char ignore_fpu_irq;
 extern void identify_cpu(struct cpuinfo_x86 *);
 extern void identify_cpu(struct cpuinfo_x86 *);
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern void print_cpu_info(struct cpuinfo_x86 *);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
 extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
+extern unsigned short num_cache_leaves;
 
 
 #ifdef CONFIG_X86_HT
 #ifdef CONFIG_X86_HT
 extern void detect_ht(struct cpuinfo_x86 *c);
 extern void detect_ht(struct cpuinfo_x86 *c);
@@ -554,7 +555,7 @@ extern void prepare_to_copy(struct task_struct *tsk);
 extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
 
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
-void show_trace(struct task_struct *task, unsigned long *stack);
+void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack);
 
 
 unsigned long get_wchan(struct task_struct *p);
 unsigned long get_wchan(struct task_struct *p);
 
 

+ 4 - 3
include/asm-i386/thread_info.h

@@ -140,8 +140,7 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__;
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_SECCOMP		8	/* secure computing */
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal() */
 #define TIF_RESTORE_SIGMASK	9	/* restore signal mask in do_signal() */
-#define TIF_POLLING_NRFLAG	16	/* true if poll_idle() is polling TIF_NEED_RESCHED */
-#define TIF_MEMDIE		17
+#define TIF_MEMDIE		16
 
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
 #define _TIF_NOTIFY_RESUME	(1<<TIF_NOTIFY_RESUME)
@@ -153,7 +152,6 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__;
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
 #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
-#define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
 
 
 /* work to do on interrupt/exception return */
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK \
 #define _TIF_WORK_MASK \
@@ -170,6 +168,9 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__;
  * have to worry about atomic accesses.
  * have to worry about atomic accesses.
  */
  */
 #define TS_USEDFPU		0x0001	/* FPU was used by this task this quantum (SMP) */
 #define TS_USEDFPU		0x0001	/* FPU was used by this task this quantum (SMP) */
+#define TS_POLLING		0x0002	/* True if in idle loop and not sleeping */
+
+#define tsk_is_polling(t) ((t)->thread_info->status & TS_POLLING)
 
 
 #endif /* __KERNEL__ */
 #endif /* __KERNEL__ */
 
 

+ 98 - 0
include/asm-i386/unwind.h

@@ -0,0 +1,98 @@
+#ifndef _ASM_I386_UNWIND_H
+#define _ASM_I386_UNWIND_H
+
+/*
+ * Copyright (C) 2002-2006 Novell, Inc.
+ *	Jan Beulich <jbeulich@novell.com>
+ * This code is released under version 2 of the GNU GPL.
+ */
+
+#ifdef CONFIG_STACK_UNWIND
+
+#include <linux/sched.h>
+#include <asm/fixmap.h>
+#include <asm/ptrace.h>
+#include <asm/uaccess.h>
+
+struct unwind_frame_info
+{
+	struct pt_regs regs;
+	struct task_struct *task;
+};
+
+#define UNW_PC(frame)        (frame)->regs.eip
+#define UNW_SP(frame)        (frame)->regs.esp
+#ifdef CONFIG_FRAME_POINTER
+#define UNW_FP(frame)        (frame)->regs.ebp
+#define FRAME_RETADDR_OFFSET 4
+#define FRAME_LINK_OFFSET    0
+#define STACK_BOTTOM(tsk)    STACK_LIMIT((tsk)->thread.esp0)
+#define STACK_TOP(tsk)       ((tsk)->thread.esp0)
+#endif
+#define STACK_LIMIT(ptr)     (((ptr) - 1) & ~(THREAD_SIZE - 1))
+
+#define UNW_REGISTER_INFO \
+	PTREGS_INFO(eax), \
+	PTREGS_INFO(ecx), \
+	PTREGS_INFO(edx), \
+	PTREGS_INFO(ebx), \
+	PTREGS_INFO(esp), \
+	PTREGS_INFO(ebp), \
+	PTREGS_INFO(esi), \
+	PTREGS_INFO(edi), \
+	PTREGS_INFO(eip)
+
+static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
+                                            /*const*/ struct pt_regs *regs)
+{
+	if (user_mode_vm(regs))
+		info->regs = *regs;
+	else {
+		memcpy(&info->regs, regs, offsetof(struct pt_regs, esp));
+		info->regs.esp = (unsigned long)&regs->esp;
+		info->regs.xss = __KERNEL_DS;
+	}
+}
+
+static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
+{
+	memset(&info->regs, 0, sizeof(info->regs));
+	info->regs.eip = info->task->thread.eip;
+	info->regs.xcs = __KERNEL_CS;
+	__get_user(info->regs.ebp, (long *)info->task->thread.esp);
+	info->regs.esp = info->task->thread.esp;
+	info->regs.xss = __KERNEL_DS;
+	info->regs.xds = __USER_DS;
+	info->regs.xes = __USER_DS;
+}
+
+extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *,
+                                               asmlinkage int (*callback)(struct unwind_frame_info *,
+                                                                          void *arg),
+                                               void *arg);
+
+static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
+{
+#if 0 /* This can only work when selector register and EFLAGS saves/restores
+         are properly annotated (and tracked in UNW_REGISTER_INFO). */
+	return user_mode_vm(&info->regs);
+#else
+	return info->regs.eip < PAGE_OFFSET
+	       || (info->regs.eip >= __fix_to_virt(FIX_VSYSCALL)
+	            && info->regs.eip < __fix_to_virt(FIX_VSYSCALL) + PAGE_SIZE)
+	       || info->regs.esp < PAGE_OFFSET;
+#endif
+}
+
+#else
+
+#define UNW_PC(frame) ((void)(frame), 0)
+
+static inline int arch_unw_user_mode(const void *info)
+{
+	return 0;
+}
+
+#endif
+
+#endif /* _ASM_I386_UNWIND_H */

+ 5 - 0
include/asm-ia64/thread_info.h

@@ -27,6 +27,7 @@ struct thread_info {
 	__u32 flags;			/* thread_info flags (see TIF_*) */
 	__u32 flags;			/* thread_info flags (see TIF_*) */
 	__u32 cpu;			/* current CPU */
 	__u32 cpu;			/* current CPU */
 	__u32 last_cpu;			/* Last CPU thread ran on */
 	__u32 last_cpu;			/* Last CPU thread ran on */
+	__u32 status;			/* Thread synchronous flags */
 	mm_segment_t addr_limit;	/* user-level address space limit */
 	mm_segment_t addr_limit;	/* user-level address space limit */
 	int preempt_count;		/* 0=premptable, <0=BUG; will also serve as bh-counter */
 	int preempt_count;		/* 0=premptable, <0=BUG; will also serve as bh-counter */
 	struct restart_block restart_block;
 	struct restart_block restart_block;
@@ -103,4 +104,8 @@ struct thread_info {
 /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
 /* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
 #define TIF_WORK_MASK		(TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))
 #define TIF_WORK_MASK		(TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))
 
 
+#define TS_POLLING		1 	/* true if in idle loop and not sleeping */
+
+#define tsk_is_polling(t) ((t)->thread_info->status & TS_POLLING)
+
 #endif /* _ASM_IA64_THREAD_INFO_H */
 #endif /* _ASM_IA64_THREAD_INFO_H */

Beberapa file tidak ditampilkan karena terlalu banyak file yang berubah dalam diff ini