Эх сурвалжийг харах

Merge branch 'master' into upstream

Jeff Garzik 19 жил өмнө
parent
commit
aebb1153ac
100 өөрчлөгдсөн 5187 нэмэгдсэн , 2882 устгасан
  1. 2 3
      Documentation/ABI/removed/devfs
  2. 88 0
      Documentation/ABI/testing/sysfs-power
  3. 62 61
      Documentation/DocBook/usb.tmpl
  4. 2 1
      Documentation/HOWTO
  5. 3 0
      Documentation/devices.txt
  6. 48 8
      Documentation/feature-removal-schedule.txt
  7. 9 5
      Documentation/filesystems/proc.txt
  8. 6 1
      Documentation/i2c/busses/i2c-viapro
  9. 13 2
      Documentation/i2c/i2c-stub
  10. 5 0
      Documentation/kbuild/makefiles.txt
  11. 8 3
      Documentation/kernel-parameters.txt
  12. 46 0
      Documentation/nommu-mmap.txt
  13. 253 0
      Documentation/pcieaer-howto.txt
  14. 535 190
      Documentation/power/devices.txt
  15. 51 77
      Documentation/sh/new-machine.txt
  16. 33 0
      Documentation/sh/register-banks.txt
  17. 4 7
      Documentation/usb/error-codes.txt
  18. 5 0
      Documentation/usb/usb-serial.txt
  19. 7 0
      Documentation/x86_64/boot-options.txt
  20. 99 0
      Documentation/x86_64/kernel-stacks
  21. 5 1
      Makefile
  22. 1 14
      arch/arm/mach-pxa/corgi.c
  23. 1 1
      arch/arm/plat-omap/usb.c
  24. 4 2
      arch/avr32/mm/tlb.c
  25. 20 11
      arch/i386/Kconfig
  26. 8 0
      arch/i386/Makefile
  27. 75 22
      arch/i386/boot/edd.S
  28. 2 2
      arch/i386/boot/setup.S
  29. 501 562
      arch/i386/defconfig
  30. 2 1
      arch/i386/kernel/Makefile
  31. 2 0
      arch/i386/kernel/acpi/Makefile
  32. 172 9
      arch/i386/kernel/acpi/boot.c
  33. 5 1
      arch/i386/kernel/acpi/earlyquirk.c
  34. 28 3
      arch/i386/kernel/apic.c
  35. 3 4
      arch/i386/kernel/cpu/amd.c
  36. 12 12
      arch/i386/kernel/cpu/centaur.c
  37. 4 4
      arch/i386/kernel/cpu/common.c
  38. 0 2
      arch/i386/kernel/cpu/cpu.h
  39. 20 22
      arch/i386/kernel/cpu/cyrix.c
  40. 1 2
      arch/i386/kernel/cpu/intel.c
  41. 1 1
      arch/i386/kernel/cpu/mcheck/Makefile
  42. 9 17
      arch/i386/kernel/cpu/mcheck/p4.c
  43. 180 0
      arch/i386/kernel/cpu/mcheck/therm_throt.c
  44. 4 5
      arch/i386/kernel/cpu/nexgen.c
  45. 2 2
      arch/i386/kernel/cpu/proc.c
  46. 2 2
      arch/i386/kernel/cpu/rise.c
  47. 3 4
      arch/i386/kernel/cpu/transmeta.c
  48. 1 6
      arch/i386/kernel/cpu/umc.c
  49. 19 3
      arch/i386/kernel/crash.c
  50. 74 36
      arch/i386/kernel/entry.S
  51. 67 0
      arch/i386/kernel/head.S
  52. 5 1
      arch/i386/kernel/i8259.c
  53. 67 58
      arch/i386/kernel/io_apic.c
  54. 50 90
      arch/i386/kernel/machine_kexec.c
  55. 5 3
      arch/i386/kernel/mca.c
  56. 511 263
      arch/i386/kernel/microcode.c
  57. 21 49
      arch/i386/kernel/mpparse.c
  58. 651 289
      arch/i386/kernel/nmi.c
  59. 3 11
      arch/i386/kernel/process.c
  60. 5 5
      arch/i386/kernel/ptrace.c
  61. 147 15
      arch/i386/kernel/relocate_kernel.S
  62. 0 134
      arch/i386/kernel/semaphore.c
  63. 140 253
      arch/i386/kernel/setup.c
  64. 18 1
      arch/i386/kernel/smpboot.c
  65. 2 95
      arch/i386/kernel/srat.c
  66. 0 98
      arch/i386/kernel/stacktrace.c
  67. 1 0
      arch/i386/kernel/syscall_table.S
  68. 19 4
      arch/i386/kernel/time.c
  69. 3 18
      arch/i386/kernel/topology.c
  70. 134 97
      arch/i386/kernel/traps.c
  71. 1 1
      arch/i386/kernel/tsc.c
  72. 1 1
      arch/i386/lib/Makefile
  73. 217 0
      arch/i386/lib/semaphore.S
  74. 1 0
      arch/i386/mach-generic/bigsmp.c
  75. 1 0
      arch/i386/mach-generic/es7000.c
  76. 30 30
      arch/i386/mach-generic/probe.c
  77. 1 0
      arch/i386/mach-generic/summit.c
  78. 22 52
      arch/i386/mm/discontig.c
  79. 1 1
      arch/i386/mm/extable.c
  80. 8 17
      arch/i386/mm/fault.c
  81. 1 1
      arch/i386/mm/highmem.c
  82. 12 26
      arch/i386/mm/init.c
  83. 59 29
      arch/i386/oprofile/nmi_int.c
  84. 25 10
      arch/i386/oprofile/nmi_timer_int.c
  85. 42 12
      arch/i386/oprofile/op_model_athlon.c
  86. 74 78
      arch/i386/oprofile/op_model_p4.c
  87. 53 12
      arch/i386/oprofile/op_model_ppro.c
  88. 1 0
      arch/i386/oprofile/op_x86_model.h
  89. 1 1
      arch/i386/pci/Makefile
  90. 4 0
      arch/i386/pci/common.c
  91. 16 9
      arch/i386/pci/direct.c
  92. 52 0
      arch/i386/pci/early.c
  93. 7 2
      arch/i386/pci/init.c
  94. 39 2
      arch/i386/pci/mmconfig.c
  95. 5 2
      arch/i386/pci/pci.h
  96. 11 0
      arch/ia64/Kconfig
  97. 1 1
      arch/ia64/ia32/sys_ia32.c
  98. 5 0
      arch/ia64/kernel/Makefile
  99. 2 2
      arch/ia64/kernel/entry.S
  100. 205 0
      arch/ia64/kernel/esi.c

+ 2 - 3
Documentation/ABI/obsolete/devfs → Documentation/ABI/removed/devfs

@@ -1,13 +1,12 @@
 What:		devfs
 What:		devfs
-Date:		July 2005
+Date:		July 2005 (scheduled), finally removed in kernel v2.6.18
 Contact:	Greg Kroah-Hartman <gregkh@suse.de>
 Contact:	Greg Kroah-Hartman <gregkh@suse.de>
 Description:
 Description:
 	devfs has been unmaintained for a number of years, has unfixable
 	devfs has been unmaintained for a number of years, has unfixable
 	races, contains a naming policy within the kernel that is
 	races, contains a naming policy within the kernel that is
 	against the LSB, and can be replaced by using udev.
 	against the LSB, and can be replaced by using udev.
-	The files fs/devfs/*, include/linux/devfs_fs*.h will be removed,
+	The files fs/devfs/*, include/linux/devfs_fs*.h were removed,
 	along with the the assorted devfs function calls throughout the
 	along with the the assorted devfs function calls throughout the
 	kernel tree.
 	kernel tree.
 
 
 Users:
 Users:
-

+ 88 - 0
Documentation/ABI/testing/sysfs-power

@@ -0,0 +1,88 @@
+What:		/sys/power/
+Date:		August 2006
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/power directory will contain files that will
+		provide a unified interface to the power management
+		subsystem.
+
+What:		/sys/power/state
+Date:		August 2006
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/power/state file controls the system power state.
+		Reading from this file returns what states are supported,
+		which is hard-coded to 'standby' (Power-On Suspend), 'mem'
+		(Suspend-to-RAM), and 'disk' (Suspend-to-Disk).
+
+		Writing to this file one of these strings causes the system to
+		transition into that state. Please see the file
+		Documentation/power/states.txt for a description of each of
+		these states.
+
+What:		/sys/power/disk
+Date:		August 2006
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/power/disk file controls the operating mode of the
+		suspend-to-disk mechanism.  Reading from this file returns
+		the name of the method by which the system will be put to
+		sleep on the next suspend.  There are four methods supported:
+		'firmware' - means that the memory image will be saved to disk
+		by some firmware, in which case we also assume that the
+		firmware will handle the system suspend.
+		'platform' - the memory image will be saved by the kernel and
+		the system will be put to sleep by the platform driver (e.g.
+		ACPI or other PM registers).
+		'shutdown' - the memory image will be saved by the kernel and
+		the system will be powered off.
+		'reboot' - the memory image will be saved by the kernel and
+		the system will be rebooted.
+
+		The suspend-to-disk method may be chosen by writing to this
+		file one of the accepted strings:
+
+		'firmware'
+		'platform'
+		'shutdown'
+		'reboot'
+
+		It will only change to 'firmware' or 'platform' if the system
+		supports that.
+
+What:		/sys/power/image_size
+Date:		August 2006
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/power/image_size file controls the size of the image
+		created by the suspend-to-disk mechanism.  It can be written a
+		string representing a non-negative integer that will be used
+		as an upper limit of the image size, in bytes.  The kernel's
+		suspend-to-disk code will do its best to ensure the image size
+		will not exceed this number.  However, if it turns out to be
+		impossible, the kernel will try to suspend anyway using the
+		smallest image possible.  In particular, if "0" is written to
+		this file, the suspend image will be as small as possible.
+
+		Reading from this file will display the current image size
+		limit, which is set to 500 MB by default.
+
+What:		/sys/power/pm_trace
+Date:		August 2006
+Contact:	Rafael J. Wysocki <rjw@sisk.pl>
+Description:
+		The /sys/power/pm_trace file controls the code which saves the
+		last PM event point in the RTC across reboots, so that you can
+		debug a machine that just hangs during suspend (or more
+		commonly, during resume).  Namely, the RTC is only used to save
+		the last PM event point if this file contains '1'.  Initially
+		it contains '0' which may be changed to '1' by writing a
+		string representing a nonzero integer into it.
+
+		To use this debugging feature you should attempt to suspend
+		the machine, then reboot it and run
+
+		dmesg -s 1000000 | grep 'hash matches'
+
+		CAUTION: Using it will cause your machine's real-time (CMOS)
+		clock to be set to a random invalid time after a resume.

+ 62 - 61
Documentation/DocBook/usb.tmpl

@@ -43,59 +43,52 @@
 
 
     <para>A Universal Serial Bus (USB) is used to connect a host,
     <para>A Universal Serial Bus (USB) is used to connect a host,
     such as a PC or workstation, to a number of peripheral
     such as a PC or workstation, to a number of peripheral
-    devices.  USB uses a tree structure, with the host at the
+    devices.  USB uses a tree structure, with the host as the
     root (the system's master), hubs as interior nodes, and
     root (the system's master), hubs as interior nodes, and
-    peripheral devices as leaves (and slaves).
+    peripherals as leaves (and slaves).
     Modern PCs support several such trees of USB devices, usually
     Modern PCs support several such trees of USB devices, usually
     one USB 2.0 tree (480 Mbit/sec each) with
     one USB 2.0 tree (480 Mbit/sec each) with
     a few USB 1.1 trees (12 Mbit/sec each) that are used when you
     a few USB 1.1 trees (12 Mbit/sec each) that are used when you
     connect a USB 1.1 device directly to the machine's "root hub".
     connect a USB 1.1 device directly to the machine's "root hub".
     </para>
     </para>
 
 
-    <para>That master/slave asymmetry was designed in part for
-    ease of use.  It is not physically possible to assemble
-    (legal) USB cables incorrectly:  all upstream "to-the-host"
-    connectors are the rectangular type, matching the sockets on
-    root hubs, and the downstream type are the squarish type
-    (or they are built in to the peripheral).
-    Software doesn't need to deal with distributed autoconfiguration
-    since the pre-designated master node manages all that.
-    At the electrical level, bus protocol overhead is reduced by
-    eliminating arbitration and moving scheduling into host software.
+    <para>That master/slave asymmetry was designed-in for a number of
+    reasons, one being ease of use.  It is not physically possible to
+    assemble (legal) USB cables incorrectly:  all upstream "to the host"
+    connectors are the rectangular type (matching the sockets on
+    root hubs), and all downstream connectors are the squarish type
+    (or they are built into the peripheral).
+    Also, the host software doesn't need to deal with distributed
+    auto-configuration since the pre-designated master node manages all that.
+    And finally, at the electrical level, bus protocol overhead is reduced by
+    eliminating arbitration and moving scheduling into the host software.
     </para>
     </para>
 
 
-    <para>USB 1.0 was announced in January 1996, and was revised
+    <para>USB 1.0 was announced in January 1996 and was revised
     as USB 1.1 (with improvements in hub specification and
     as USB 1.1 (with improvements in hub specification and
     support for interrupt-out transfers) in September 1998.
     support for interrupt-out transfers) in September 1998.
-    USB 2.0 was released in April 2000, including high speed
-    transfers and transaction translating hubs (used for USB 1.1
+    USB 2.0 was released in April 2000, adding high-speed
+    transfers and transaction-translating hubs (used for USB 1.1
     and 1.0 backward compatibility).
     and 1.0 backward compatibility).
     </para>
     </para>
 
 
-    <para>USB support was added to Linux early in the 2.2 kernel series
-    shortly before the 2.3 development forked off.  Updates
-    from 2.3 were regularly folded back into 2.2 releases, bringing
-    new features such as <filename>/sbin/hotplug</filename> support,
-    more drivers, and more robustness.
-    The 2.5 kernel series continued such improvements, and also
-    worked on USB 2.0 support,
-    higher performance,
-    better consistency between host controller drivers,
-    API simplification (to make bugs less likely),
-    and providing internal "kerneldoc" documentation.
+    <para>Kernel developers added USB support to Linux early in the 2.2 kernel
+    series, shortly before 2.3 development forked.  Updates from 2.3 were
+    regularly folded back into 2.2 releases, which improved reliability and
+    brought <filename>/sbin/hotplug</filename> support as well more drivers.
+    Such improvements were continued in the 2.5 kernel series, where they added
+    USB 2.0 support, improved performance, and made the host controller drivers
+    (HCDs) more consistent.  They also simplified the API (to make bugs less
+    likely) and added internal "kerneldoc" documentation.
     </para>
     </para>
 
 
     <para>Linux can run inside USB devices as well as on
     <para>Linux can run inside USB devices as well as on
     the hosts that control the devices.
     the hosts that control the devices.
-    Because the Linux 2.x USB support evolved to support mass market
-    platforms such as Apple Macintosh or PC-compatible systems,
-    it didn't address design concerns for those types of USB systems.
-    So it can't be used inside mass-market PDAs, or other peripherals.
-    USB device drivers running inside those Linux peripherals
+    But USB device drivers running inside those peripherals
     don't do the same things as the ones running inside hosts,
     don't do the same things as the ones running inside hosts,
-    and so they've been given a different name:
-    they're called <emphasis>gadget drivers</emphasis>.
-    This document does not present gadget drivers.
+    so they've been given a different name:
+    <emphasis>gadget drivers</emphasis>.
+    This document does not cover gadget drivers.
     </para>
     </para>
 
 
     </chapter>
     </chapter>
@@ -103,17 +96,14 @@
 <chapter id="host">
 <chapter id="host">
     <title>USB Host-Side API Model</title>
     <title>USB Host-Side API Model</title>
 
 
-    <para>Within the kernel,
-    host-side drivers for USB devices talk to the "usbcore" APIs.
-    There are two types of public "usbcore" APIs, targetted at two different
-    layers of USB driver.  Those are
-    <emphasis>general purpose</emphasis> drivers, exposed through
-    driver frameworks such as block, character, or network devices;
-    and drivers that are <emphasis>part of the core</emphasis>,
-    which are involved in managing a USB bus.
-    Such core drivers include the <emphasis>hub</emphasis> driver,
-    which manages trees of USB devices, and several different kinds
-    of <emphasis>host controller driver (HCD)</emphasis>,
+    <para>Host-side drivers for USB devices talk to the "usbcore" APIs.
+    There are two.  One is intended for
+    <emphasis>general-purpose</emphasis> drivers (exposed through
+    driver frameworks), and the other is for drivers that are
+    <emphasis>part of the core</emphasis>.
+    Such core drivers include the <emphasis>hub</emphasis> driver
+    (which manages trees of USB devices) and several different kinds
+    of <emphasis>host controller drivers</emphasis>,
     which control individual busses.
     which control individual busses.
     </para>
     </para>
 
 
@@ -122,21 +112,21 @@
      
      
     <itemizedlist>
     <itemizedlist>
 
 
-	<listitem><para>USB supports four kinds of data transfer
-	(control, bulk, interrupt, and isochronous).  Two transfer
-	types use bandwidth as it's available (control and bulk),
-	while the other two types of transfer (interrupt and isochronous)
+	<listitem><para>USB supports four kinds of data transfers
+	(control, bulk, interrupt, and isochronous).  Two of them (control
+	and bulk) use bandwidth as it's available,
+	while the other two (interrupt and isochronous)
 	are scheduled to provide guaranteed bandwidth.
 	are scheduled to provide guaranteed bandwidth.
 	</para></listitem>
 	</para></listitem>
 
 
 	<listitem><para>The device description model includes one or more
 	<listitem><para>The device description model includes one or more
 	"configurations" per device, only one of which is active at a time.
 	"configurations" per device, only one of which is active at a time.
-	Devices that are capable of high speed operation must also support
-	full speed configurations, along with a way to ask about the
-	"other speed" configurations that might be used.
+	Devices that are capable of high-speed operation must also support
+	full-speed configurations, along with a way to ask about the
+	"other speed" configurations which might be used.
 	</para></listitem>
 	</para></listitem>
 
 
-	<listitem><para>Configurations have one or more "interface", each
+	<listitem><para>Configurations have one or more "interfaces", each
 	of which may have "alternate settings".  Interfaces may be
 	of which may have "alternate settings".  Interfaces may be
 	standardized by USB "Class" specifications, or may be specific to
 	standardized by USB "Class" specifications, or may be specific to
 	a vendor or device.</para>
 	a vendor or device.</para>
@@ -162,7 +152,7 @@
 	</para></listitem>
 	</para></listitem>
 
 
 	<listitem><para>The Linux USB API supports synchronous calls for
 	<listitem><para>The Linux USB API supports synchronous calls for
-	control and bulk messaging.
+	control and bulk messages.
 	It also supports asynchnous calls for all kinds of data transfer,
 	It also supports asynchnous calls for all kinds of data transfer,
 	using request structures called "URBs" (USB Request Blocks).
 	using request structures called "URBs" (USB Request Blocks).
 	</para></listitem>
 	</para></listitem>
@@ -463,14 +453,25 @@
 	    file in your Linux kernel sources.
 	    file in your Linux kernel sources.
 	    </para>
 	    </para>
 
 
-	    <para>Otherwise the main use for this file from programs
-	    is to poll() it to get notifications of usb devices
-	    as they're plugged or unplugged.
-	    To see what changed, you'd need to read the file and
-	    compare "before" and "after" contents, scan the filesystem,
-	    or see its hotplug event.
+	    <para>This file, in combination with the poll() system call, can
+	    also be used to detect when devices are added or removed:
+<programlisting>int fd;
+struct pollfd pfd;
+
+fd = open("/proc/bus/usb/devices", O_RDONLY);
+pfd = { fd, POLLIN, 0 };
+for (;;) {
+	/* The first time through, this call will return immediately. */
+	poll(&amp;pfd, 1, -1);
+
+	/* To see what's changed, compare the file's previous and current
+	   contents or scan the filesystem.  (Scanning is more precise.) */
+}</programlisting>
+	    Note that this behavior is intended to be used for informational
+	    and debug purposes.  It would be more appropriate to use programs
+	    such as udev or HAL to initialize a device or start a user-mode
+	    helper program, for instance.
 	    </para>
 	    </para>
-
 	</sect1>
 	</sect1>
 
 
 	<sect1>
 	<sect1>

+ 2 - 1
Documentation/HOWTO

@@ -358,7 +358,8 @@ Here is a list of some of the different kernel trees available:
   quilt trees:
   quilt trees:
     - USB, PCI, Driver Core, and I2C, Greg Kroah-Hartman <gregkh@suse.de>
     - USB, PCI, Driver Core, and I2C, Greg Kroah-Hartman <gregkh@suse.de>
 	kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
 	kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
-
+    - x86-64, partly i386, Andi Kleen <ak@suse.de>
+        ftp.firstfloor.org:/pub/ak/x86_64/quilt/
 
 
 Bug Reporting
 Bug Reporting
 -------------
 -------------

+ 3 - 0
Documentation/devices.txt

@@ -2543,6 +2543,9 @@ Your cooperation is appreciated.
 		 64 = /dev/usb/rio500	Diamond Rio 500
 		 64 = /dev/usb/rio500	Diamond Rio 500
 		 65 = /dev/usb/usblcd	USBLCD Interface (info@usblcd.de)
 		 65 = /dev/usb/usblcd	USBLCD Interface (info@usblcd.de)
 		 66 = /dev/usb/cpad0	Synaptics cPad (mouse/LCD)
 		 66 = /dev/usb/cpad0	Synaptics cPad (mouse/LCD)
+		 67 = /dev/usb/adutux0	1st Ontrak ADU device
+		    ...
+		 76 = /dev/usb/adutux10	10th Ontrak ADU device
 		 96 = /dev/usb/hiddev0	1st USB HID device
 		 96 = /dev/usb/hiddev0	1st USB HID device
 		    ...
 		    ...
 		111 = /dev/usb/hiddev15	16th USB HID device
 		111 = /dev/usb/hiddev15	16th USB HID device

+ 48 - 8
Documentation/feature-removal-schedule.txt

@@ -6,6 +6,21 @@ be removed from this file.
 
 
 ---------------------------
 ---------------------------
 
 
+What:	/sys/devices/.../power/state
+	dev->power.power_state
+	dpm_runtime_{suspend,resume)()
+When:	July 2007
+Why:	Broken design for runtime control over driver power states, confusing
+	driver-internal runtime power management with:  mechanisms to support
+	system-wide sleep state transitions; event codes that distinguish
+	different phases of swsusp "sleep" transitions; and userspace policy
+	inputs.  This framework was never widely used, and most attempts to
+	use it were broken.  Drivers should instead be exposing domain-specific
+	interfaces either to kernel or to userspace.
+Who:	Pavel Machek <pavel@suse.cz>
+
+---------------------------
+
 What:	RAW driver (CONFIG_RAW_DRIVER)
 What:	RAW driver (CONFIG_RAW_DRIVER)
 When:	December 2005
 When:	December 2005
 Why:	declared obsolete since kernel 2.6.3
 Why:	declared obsolete since kernel 2.6.3
@@ -55,6 +70,18 @@ Who:	Mauro Carvalho Chehab <mchehab@brturbo.com.br>
 
 
 ---------------------------
 ---------------------------
 
 
+What:	sys_sysctl
+When:	January 2007
+Why:	The same information is available through /proc/sys and that is the
+	interface user space prefers to use. And there do not appear to be
+	any existing user in user space of sys_sysctl.  The additional
+	maintenance overhead of keeping a set of binary names gets
+	in the way of doing a good job of maintaining this interface.
+
+Who:	Eric Biederman <ebiederm@xmission.com>
+
+---------------------------
+
 What:	PCMCIA control ioctl (needed for pcmcia-cs [cardmgr, cardctl])
 What:	PCMCIA control ioctl (needed for pcmcia-cs [cardmgr, cardctl])
 When:	November 2005
 When:	November 2005
 Files:	drivers/pcmcia/: pcmcia_ioctl.c
 Files:	drivers/pcmcia/: pcmcia_ioctl.c
@@ -202,14 +229,6 @@ Who:	Nick Piggin <npiggin@suse.de>
 
 
 ---------------------------
 ---------------------------
 
 
-What:	Support for the MIPS EV96100 evaluation board
-When:	September 2006
-Why:	Does no longer build since at least November 15, 2003, apparently
-	no userbase left.
-Who:	Ralf Baechle <ralf@linux-mips.org>
-
----------------------------
-
 What:	Support for the Momentum / PMC-Sierra Jaguar ATX evaluation board
 What:	Support for the Momentum / PMC-Sierra Jaguar ATX evaluation board
 When:	September 2006
 When:	September 2006
 Why:	Does no longer build since quite some time, and was never popular,
 Why:	Does no longer build since quite some time, and was never popular,
@@ -294,3 +313,24 @@ Why:	The frame diverter is included in most distribution kernels, but is
 	It is not clear if anyone is still using it.
 	It is not clear if anyone is still using it.
 Who:	Stephen Hemminger <shemminger@osdl.org>
 Who:	Stephen Hemminger <shemminger@osdl.org>
 
 
+---------------------------
+
+
+What:	PHYSDEVPATH, PHYSDEVBUS, PHYSDEVDRIVER in the uevent environment
+When:	Oktober 2008
+Why:	The stacking of class devices makes these values misleading and
+	inconsistent.
+	Class devices should not carry any of these properties, and bus
+	devices have SUBSYTEM and DRIVER as a replacement.
+Who:	Kay Sievers <kay.sievers@suse.de>
+
+---------------------------
+
+What:	i2c-isa
+When:	December 2006
+Why:	i2c-isa is a non-sense and doesn't fit in the device driver
+	model. Drivers relying on it are better implemented as platform
+	drivers.
+Who:	Jean Delvare <khali@linux-fr.org>
+
+---------------------------

+ 9 - 5
Documentation/filesystems/proc.txt

@@ -1124,11 +1124,15 @@ debugging information is displayed on console.
 NMI switch that most IA32 servers have fires unknown NMI up, for example.
 NMI switch that most IA32 servers have fires unknown NMI up, for example.
 If a system hangs up, try pressing the NMI switch.
 If a system hangs up, try pressing the NMI switch.
 
 
-[NOTE]
-   This function and oprofile share a NMI callback. Therefore this function
-   cannot be enabled when oprofile is activated.
-   And NMI watchdog will be disabled when the value in this file is set to
-   non-zero.
+nmi_watchdog
+------------
+
+Enables/Disables the NMI watchdog on x86 systems.  When the value is non-zero
+the NMI watchdog is enabled and will continuously test all online cpus to
+determine whether or not they are still functioning properly.
+
+Because the NMI watchdog shares registers with oprofile, by disabling the NMI
+watchdog, oprofile may have more registers to utilize.
 
 
 
 
 2.4 /proc/sys/vm - The virtual memory subsystem
 2.4 /proc/sys/vm - The virtual memory subsystem

+ 6 - 1
Documentation/i2c/busses/i2c-viapro

@@ -7,9 +7,12 @@ Supported adapters:
   * VIA Technologies, Inc. VT82C686A/B
   * VIA Technologies, Inc. VT82C686A/B
     Datasheet: Sometimes available at the VIA website
     Datasheet: Sometimes available at the VIA website
 
 
-  * VIA Technologies, Inc. VT8231, VT8233, VT8233A, VT8235, VT8237R
+  * VIA Technologies, Inc. VT8231, VT8233, VT8233A
     Datasheet: available on request from VIA
     Datasheet: available on request from VIA
 
 
+  * VIA Technologies, Inc. VT8235, VT8237R, VT8237A, VT8251
+    Datasheet: available on request and under NDA from VIA
+
 Authors:
 Authors:
 	Kyösti Mälkki <kmalkki@cc.hut.fi>,
 	Kyösti Mälkki <kmalkki@cc.hut.fi>,
 	Mark D. Studebaker <mdsxyz123@yahoo.com>,
 	Mark D. Studebaker <mdsxyz123@yahoo.com>,
@@ -39,6 +42,8 @@ Your lspci -n listing must show one of these :
  device 1106:8235   (VT8231 function 4)
  device 1106:8235   (VT8231 function 4)
  device 1106:3177   (VT8235)
  device 1106:3177   (VT8235)
  device 1106:3227   (VT8237R)
  device 1106:3227   (VT8237R)
+ device 1106:3337   (VT8237A)
+ device 1106:3287   (VT8251)
 
 
 If none of these show up, you should look in the BIOS for settings like
 If none of these show up, you should look in the BIOS for settings like
 enable ACPI / SMBus or even USB.
 enable ACPI / SMBus or even USB.

+ 13 - 2
Documentation/i2c/i2c-stub

@@ -6,9 +6,12 @@ This module is a very simple fake I2C/SMBus driver.  It implements four
 types of SMBus commands: write quick, (r/w) byte, (r/w) byte data, and
 types of SMBus commands: write quick, (r/w) byte, (r/w) byte data, and
 (r/w) word data.
 (r/w) word data.
 
 
+You need to provide a chip address as a module parameter when loading
+this driver, which will then only react to SMBus commands to this address.
+
 No hardware is needed nor associated with this module.  It will accept write
 No hardware is needed nor associated with this module.  It will accept write
-quick commands to all addresses; it will respond to the other commands (also
-to all addresses) by reading from or writing to an array in memory.  It will
+quick commands to one address; it will respond to the other commands (also
+to one address) by reading from or writing to an array in memory.  It will
 also spam the kernel logs for every command it handles.
 also spam the kernel logs for every command it handles.
 
 
 A pointer register with auto-increment is implemented for all byte
 A pointer register with auto-increment is implemented for all byte
@@ -21,6 +24,11 @@ The typical use-case is like this:
 	3. load the target sensors chip driver module
 	3. load the target sensors chip driver module
 	4. observe its behavior in the kernel log
 	4. observe its behavior in the kernel log
 
 
+PARAMETERS:
+
+int chip_addr:
+	The SMBus address to emulate a chip at.
+
 CAVEATS:
 CAVEATS:
 
 
 There are independent arrays for byte/data and word/data commands.  Depending
 There are independent arrays for byte/data and word/data commands.  Depending
@@ -33,6 +41,9 @@ If the hardware for your driver has banked registers (e.g. Winbond sensors
 chips) this module will not work well - although it could be extended to
 chips) this module will not work well - although it could be extended to
 support that pretty easily.
 support that pretty easily.
 
 
+Only one chip address is supported - although this module could be
+extended to support more.
+
 If you spam it hard enough, printk can be lossy.  This module really wants
 If you spam it hard enough, printk can be lossy.  This module really wants
 something like relayfs.
 something like relayfs.
 
 

+ 5 - 0
Documentation/kbuild/makefiles.txt

@@ -421,6 +421,11 @@ more details, with real examples.
 	The second argument is optional, and if supplied will be used
 	The second argument is optional, and if supplied will be used
 	if first argument is not supported.
 	if first argument is not supported.
 
 
+    as-instr
+	as-instr checks if the assembler reports a specific instruction
+	and then outputs either option1 or option2
+	C escapes are supported in the test instruction
+
     cc-option
     cc-option
 	cc-option is used to check if $(CC) supports a given option, and not
 	cc-option is used to check if $(CC) supports a given option, and not
 	supported to use an optional second option.
 	supported to use an optional second option.

+ 8 - 3
Documentation/kernel-parameters.txt

@@ -573,8 +573,6 @@ running once the system is up.
 	gscd=		[HW,CD]
 	gscd=		[HW,CD]
 			Format: <io>
 			Format: <io>
 
 
-	gt96100eth=	[NET] MIPS GT96100 Advanced Communication Controller
-
 	gus=		[HW,OSS]
 	gus=		[HW,OSS]
 			Format: <io>,<irq>,<dma>,<dma16>
 			Format: <io>,<irq>,<dma>,<dma16>
 
 
@@ -1240,7 +1238,11 @@ running once the system is up.
 				bootloader. This is currently used on
 				bootloader. This is currently used on
 				IXP2000 systems where the bus has to be
 				IXP2000 systems where the bus has to be
 				configured a certain way for adjunct CPUs.
 				configured a certain way for adjunct CPUs.
-
+		noearly		[X86] Don't do any early type 1 scanning.
+				This might help on some broken boards which
+				machine check when some devices' config space
+				is read. But various workarounds are disabled
+				and some IOMMU drivers will not work.
 	pcmv=		[HW,PCMCIA] BadgePAD 4
 	pcmv=		[HW,PCMCIA] BadgePAD 4
 
 
 	pd.		[PARIDE]
 	pd.		[PARIDE]
@@ -1368,6 +1370,9 @@ running once the system is up.
 			Reserves a hole at the top of the kernel virtual
 			Reserves a hole at the top of the kernel virtual
 			address space.
 			address space.
 
 
+	reset_devices	[KNL] Force drivers to reset the underlying device
+			during initialization.
+
 	resume=		[SWSUSP]
 	resume=		[SWSUSP]
 			Specify the partition device for software suspend
 			Specify the partition device for software suspend
 
 

+ 46 - 0
Documentation/nommu-mmap.txt

@@ -116,6 +116,9 @@ FURTHER NOTES ON NO-MMU MMAP
  (*) A list of all the mappings on the system is visible through /proc/maps in
  (*) A list of all the mappings on the system is visible through /proc/maps in
      no-MMU mode.
      no-MMU mode.
 
 
+ (*) A list of all the mappings in use by a process is visible through
+     /proc/<pid>/maps in no-MMU mode.
+
  (*) Supplying MAP_FIXED or a requesting a particular mapping address will
  (*) Supplying MAP_FIXED or a requesting a particular mapping address will
      result in an error.
      result in an error.
 
 
@@ -125,6 +128,49 @@ FURTHER NOTES ON NO-MMU MMAP
      error will result if they don't. This is most likely to be encountered
      error will result if they don't. This is most likely to be encountered
      with character device files, pipes, fifos and sockets.
      with character device files, pipes, fifos and sockets.
 
 
+
+==========================
+INTERPROCESS SHARED MEMORY
+==========================
+
+Both SYSV IPC SHM shared memory and POSIX shared memory is supported in NOMMU
+mode.  The former through the usual mechanism, the latter through files created
+on ramfs or tmpfs mounts.
+
+
+=======
+FUTEXES
+=======
+
+Futexes are supported in NOMMU mode if the arch supports them.  An error will
+be given if an address passed to the futex system call lies outside the
+mappings made by a process or if the mapping in which the address lies does not
+support futexes (such as an I/O chardev mapping).
+
+
+=============
+NO-MMU MREMAP
+=============
+
+The mremap() function is partially supported.  It may change the size of a
+mapping, and may move it[*] if MREMAP_MAYMOVE is specified and if the new size
+of the mapping exceeds the size of the slab object currently occupied by the
+memory to which the mapping refers, or if a smaller slab object could be used.
+
+MREMAP_FIXED is not supported, though it is ignored if there's no change of
+address and the object does not need to be moved.
+
+Shared mappings may not be moved.  Shareable mappings may not be moved either,
+even if they are not currently shared.
+
+The mremap() function must be given an exact match for base address and size of
+a previously mapped object.  It may not be used to create holes in existing
+mappings, move parts of existing mappings or resize parts of mappings.  It must
+act on a complete mapping.
+
+[*] Not currently supported.
+
+
 ============================================
 ============================================
 PROVIDING SHAREABLE CHARACTER DEVICE SUPPORT
 PROVIDING SHAREABLE CHARACTER DEVICE SUPPORT
 ============================================
 ============================================

+ 253 - 0
Documentation/pcieaer-howto.txt

@@ -0,0 +1,253 @@
+   The PCI Express Advanced Error Reporting Driver Guide HOWTO
+		T. Long Nguyen	<tom.l.nguyen@intel.com>
+		Yanmin Zhang	<yanmin.zhang@intel.com>
+				07/29/2006
+
+
+1. Overview
+
+1.1 About this guide
+
+This guide describes the basics of the PCI Express Advanced Error
+Reporting (AER) driver and provides information on how to use it, as
+well as how to enable the drivers of endpoint devices to conform with
+PCI Express AER driver.
+
+1.2 Copyright © Intel Corporation 2006.
+
+1.3 What is the PCI Express AER Driver?
+
+PCI Express error signaling can occur on the PCI Express link itself
+or on behalf of transactions initiated on the link. PCI Express
+defines two error reporting paradigms: the baseline capability and
+the Advanced Error Reporting capability. The baseline capability is
+required of all PCI Express components providing a minimum defined
+set of error reporting requirements. Advanced Error Reporting
+capability is implemented with a PCI Express advanced error reporting
+extended capability structure providing more robust error reporting.
+
+The PCI Express AER driver provides the infrastructure to support PCI
+Express Advanced Error Reporting capability. The PCI Express AER
+driver provides three basic functions:
+
+-	Gathers the comprehensive error information if errors occurred.
+-	Reports error to the users.
+-	Performs error recovery actions.
+
+AER driver only attaches root ports which support PCI-Express AER
+capability.
+
+
+2. User Guide
+
+2.1 Include the PCI Express AER Root Driver into the Linux Kernel
+
+The PCI Express AER Root driver is a Root Port service driver attached
+to the PCI Express Port Bus driver. If a user wants to use it, the driver
+has to be compiled. Option CONFIG_PCIEAER supports this capability. It
+depends on CONFIG_PCIEPORTBUS, so pls. set CONFIG_PCIEPORTBUS=y and
+CONFIG_PCIEAER = y.
+
+2.2 Load PCI Express AER Root Driver
+There is a case where a system has AER support in BIOS. Enabling the AER
+Root driver and having AER support in BIOS may result unpredictable
+behavior. To avoid this conflict, a successful load of the AER Root driver
+requires ACPI _OSC support in the BIOS to allow the AER Root driver to
+request for native control of AER. See the PCI FW 3.0 Specification for
+details regarding OSC usage. Currently, lots of firmwares don't provide
+_OSC support while they use PCI Express. To support such firmwares,
+forceload, a parameter of type bool, could enable AER to continue to
+be initiated although firmwares have no _OSC support. To enable the
+walkaround, pls. add aerdriver.forceload=y to kernel boot parameter line
+when booting kernel. Note that forceload=n by default.
+
+2.3 AER error output
+When a PCI-E AER error is captured, an error message will be outputed to
+console. If it's a correctable error, it is outputed as a warning.
+Otherwise, it is printed as an error. So users could choose different
+log level to filter out correctable error messages.
+
+Below shows an example.
++------ PCI-Express Device Error -----+
+Error Severity          : Uncorrected (Fatal)
+PCIE Bus Error type     : Transaction Layer
+Unsupported Request     : First
+Requester ID            : 0500
+VendorID=8086h, DeviceID=0329h, Bus=05h, Device=00h, Function=00h
+TLB Header:
+04000001 00200a03 05010000 00050100
+
+In the example, 'Requester ID' means the ID of the device who sends
+the error message to root port. Pls. refer to pci express specs for
+other fields.
+
+
+3. Developer Guide
+
+To enable AER aware support requires a software driver to configure
+the AER capability structure within its device and to provide callbacks.
+
+To support AER better, developers need understand how AER does work
+firstly.
+
+PCI Express errors are classified into two types: correctable errors
+and uncorrectable errors. This classification is based on the impacts
+of those errors, which may result in degraded performance or function
+failure.
+
+Correctable errors pose no impacts on the functionality of the
+interface. The PCI Express protocol can recover without any software
+intervention or any loss of data. These errors are detected and
+corrected by hardware. Unlike correctable errors, uncorrectable
+errors impact functionality of the interface. Uncorrectable errors
+can cause a particular transaction or a particular PCI Express link
+to be unreliable. Depending on those error conditions, uncorrectable
+errors are further classified into non-fatal errors and fatal errors.
+Non-fatal errors cause the particular transaction to be unreliable,
+but the PCI Express link itself is fully functional. Fatal errors, on
+the other hand, cause the link to be unreliable.
+
+When AER is enabled, a PCI Express device will automatically send an
+error message to the PCIE root port above it when the device captures
+an error. The Root Port, upon receiving an error reporting message,
+internally processes and logs the error message in its PCI Express
+capability structure. Error information being logged includes storing
+the error reporting agent's requestor ID into the Error Source
+Identification Registers and setting the error bits of the Root Error
+Status Register accordingly. If AER error reporting is enabled in Root
+Error Command Register, the Root Port generates an interrupt if an
+error is detected.
+
+Note that the errors as described above are related to the PCI Express
+hierarchy and links. These errors do not include any device specific
+errors because device specific errors will still get sent directly to
+the device driver.
+
+3.1 Configure the AER capability structure
+
+AER aware drivers of PCI Express component need change the device
+control registers to enable AER. They also could change AER registers,
+including mask and severity registers. Helper function
+pci_enable_pcie_error_reporting could be used to enable AER. See
+section 3.3.
+
+3.2. Provide callbacks
+
+3.2.1 callback reset_link to reset pci express link
+
+This callback is used to reset the pci express physical link when a
+fatal error happens. The root port aer service driver provides a
+default reset_link function, but different upstream ports might
+have different specifications to reset pci express link, so all
+upstream ports should provide their own reset_link functions.
+
+In struct pcie_port_service_driver, a new pointer, reset_link, is
+added.
+
+pci_ers_result_t (*reset_link) (struct pci_dev *dev);
+
+Section 3.2.2.2 provides more detailed info on when to call
+reset_link.
+
+3.2.2 PCI error-recovery callbacks
+
+The PCI Express AER Root driver uses error callbacks to coordinate
+with downstream device drivers associated with a hierarchy in question
+when performing error recovery actions.
+
+Data struct pci_driver has a pointer, err_handler, to point to
+pci_error_handlers who consists of a couple of callback function
+pointers. AER driver follows the rules defined in
+pci-error-recovery.txt except pci express specific parts (e.g.
+reset_link). Pls. refer to pci-error-recovery.txt for detailed
+definitions of the callbacks.
+
+Below sections specify when to call the error callback functions.
+
+3.2.2.1 Correctable errors
+
+Correctable errors pose no impacts on the functionality of
+the interface. The PCI Express protocol can recover without any
+software intervention or any loss of data. These errors do not
+require any recovery actions. The AER driver clears the device's
+correctable error status register accordingly and logs these errors.
+
+3.2.2.2 Non-correctable (non-fatal and fatal) errors
+
+If an error message indicates a non-fatal error, performing link reset
+at upstream is not required. The AER driver calls error_detected(dev,
+pci_channel_io_normal) to all drivers associated within a hierarchy in
+question. for example,
+EndPoint<==>DownstreamPort B<==>UpstreamPort A<==>RootPort.
+If Upstream port A captures an AER error, the hierarchy consists of
+Downstream port B and EndPoint.
+
+A driver may return PCI_ERS_RESULT_CAN_RECOVER,
+PCI_ERS_RESULT_DISCONNECT, or PCI_ERS_RESULT_NEED_RESET, depending on
+whether it can recover or the AER driver calls mmio_enabled as next.
+
+If an error message indicates a fatal error, kernel will broadcast
+error_detected(dev, pci_channel_io_frozen) to all drivers within
+a hierarchy in question. Then, performing link reset at upstream is
+necessary. As different kinds of devices might use different approaches
+to reset link, AER port service driver is required to provide the
+function to reset link. Firstly, kernel looks for if the upstream
+component has an aer driver. If it has, kernel uses the reset_link
+callback of the aer driver. If the upstream component has no aer driver
+and the port is downstream port, we will use the aer driver of the
+root port who reports the AER error. As for upstream ports,
+they should provide their own aer service drivers with reset_link
+function. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER and
+reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes
+to mmio_enabled.
+
+3.3 helper functions
+
+3.3.1 int pci_find_aer_capability(struct pci_dev *dev);
+pci_find_aer_capability locates the PCI Express AER capability
+in the device configuration space. If the device doesn't support
+PCI-Express AER, the function returns 0.
+
+3.3.2 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
+pci_enable_pcie_error_reporting enables the device to send error
+messages to root port when an error is detected. Note that devices
+don't enable the error reporting by default, so device drivers need
+call this function to enable it.
+
+3.3.3 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
+pci_disable_pcie_error_reporting disables the device to send error
+messages to root port when an error is detected.
+
+3.3.4 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
+pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
+error status register.
+
+3.4 Frequent Asked Questions
+
+Q: What happens if a PCI Express device driver does not provide an
+error recovery handler (pci_driver->err_handler is equal to NULL)?
+
+A: The devices attached with the driver won't be recovered. If the
+error is fatal, kernel will print out warning messages. Please refer
+to section 3 for more information.
+
+Q: What happens if an upstream port service driver does not provide
+callback reset_link?
+
+A: Fatal error recovery will fail if the errors are reported by the
+upstream ports who are attached by the service driver.
+
+Q: How does this infrastructure deal with driver that is not PCI
+Express aware?
+
+A: This infrastructure calls the error callback functions of the
+driver when an error happens. But if the driver is not aware of
+PCI Express, the device might not report its own errors to root
+port.
+
+Q: What modifications will that driver need to make it compatible
+with the PCI Express AER Root driver?
+
+A: It could call the helper functions to enable AER in devices and
+cleanup uncorrectable status register. Pls. refer to section 3.3.
+

+ 535 - 190
Documentation/power/devices.txt

@@ -1,208 +1,553 @@
+Most of the code in Linux is device drivers, so most of the Linux power
+management code is also driver-specific.  Most drivers will do very little;
+others, especially for platforms with small batteries (like cell phones),
+will do a lot.
+
+This writeup gives an overview of how drivers interact with system-wide
+power management goals, emphasizing the models and interfaces that are
+shared by everything that hooks up to the driver model core.  Read it as
+background for the domain-specific work you'd do with any specific driver.
+
+
+Two Models for Device Power Management
+======================================
+Drivers will use one or both of these models to put devices into low-power
+states:
+
+    System Sleep model:
+	Drivers can enter low power states as part of entering system-wide
+	low-power states like "suspend-to-ram", or (mostly for systems with
+	disks) "hibernate" (suspend-to-disk).
+
+	This is something that device, bus, and class drivers collaborate on
+	by implementing various role-specific suspend and resume methods to
+	cleanly power down hardware and software subsystems, then reactivate
+	them without loss of data.
+
+	Some drivers can manage hardware wakeup events, which make the system
+	leave that low-power state.  This feature may be disabled using the
+	relevant /sys/devices/.../power/wakeup file; enabling it may cost some
+	power usage, but let the whole system enter low power states more often.
+
+    Runtime Power Management model:
+	Drivers may also enter low power states while the system is running,
+	independently of other power management activity.  Upstream drivers
+	will normally not know (or care) if the device is in some low power
+	state when issuing requests; the driver will auto-resume anything
+	that's needed when it gets a request.
+
+	This doesn't have, or need much infrastructure; it's just something you
+	should do when writing your drivers.  For example, clk_disable() unused
+	clocks as part of minimizing power drain for currently-unused hardware.
+	Of course, sometimes clusters of drivers will collaborate with each
+	other, which could involve task-specific power management.
+
+There's not a lot to be said about those low power states except that they
+are very system-specific, and often device-specific.  Also, that if enough
+drivers put themselves into low power states (at "runtime"), the effect may be
+the same as entering some system-wide low-power state (system sleep) ... and
+that synergies exist, so that several drivers using runtime pm might put the
+system into a state where even deeper power saving options are available.
+
+Most suspended devices will have quiesced all I/O:  no more DMA or irqs, no
+more data read or written, and requests from upstream drivers are no longer
+accepted.  A given bus or platform may have different requirements though.
+
+Examples of hardware wakeup events include an alarm from a real time clock,
+network wake-on-LAN packets, keyboard or mouse activity, and media insertion
+or removal (for PCMCIA, MMC/SD, USB, and so on).
+
+
+Interfaces for Entering System Sleep States
+===========================================
+Most of the programming interfaces a device driver needs to know about
+relate to that first model:  entering a system-wide low power state,
+rather than just minimizing power consumption by one device.
+
+
+Bus Driver Methods
+------------------
+The core methods to suspend and resume devices reside in struct bus_type.
+These are mostly of interest to people writing infrastructure for busses
+like PCI or USB, or because they define the primitives that device drivers
+may need to apply in domain-specific ways to their devices:
 
 
-Device Power Management
+struct bus_type {
+	...
+	int  (*suspend)(struct device *dev, pm_message_t state);
+	int  (*suspend_late)(struct device *dev, pm_message_t state);
 
 
+	int  (*resume_early)(struct device *dev);
+	int  (*resume)(struct device *dev);
+};
 
 
-Device power management encompasses two areas - the ability to save
-state and transition a device to a low-power state when the system is
-entering a low-power state; and the ability to transition a device to
-a low-power state while the system is running (and independently of
-any other power management activity). 
+Bus drivers implement those methods as appropriate for the hardware and
+the drivers using it; PCI works differently from USB, and so on.  Not many
+people write bus drivers; most driver code is a "device driver" that
+builds on top of bus-specific framework code.
+
+For more information on these driver calls, see the description later;
+they are called in phases for every device, respecting the parent-child
+sequencing in the driver model tree.  Note that as this is being written,
+only the suspend() and resume() are widely available; not many bus drivers
+leverage all of those phases, or pass them down to lower driver levels.
+
+
+/sys/devices/.../power/wakeup files
+-----------------------------------
+All devices in the driver model have two flags to control handling of
+wakeup events, which are hardware signals that can force the device and/or
+system out of a low power state.  These are initialized by bus or device
+driver code using device_init_wakeup(dev,can_wakeup).
+
+The "can_wakeup" flag just records whether the device (and its driver) can
+physically support wakeup events.  When that flag is clear, the sysfs
+"wakeup" file is empty, and device_may_wakeup() returns false.
+
+For devices that can issue wakeup events, a separate flag controls whether
+that device should try to use its wakeup mechanism.  The initial value of
+device_may_wakeup() will be true, so that the device's "wakeup" file holds
+the value "enabled".  Userspace can change that to "disabled" so that
+device_may_wakeup() returns false; or change it back to "enabled" (so that
+it returns true again).
+
+
+EXAMPLE:  PCI Device Driver Methods
+-----------------------------------
+PCI framework software calls these methods when the PCI device driver bound
+to a device device has provided them:
+
+struct pci_driver {
+	...
+	int  (*suspend)(struct pci_device *pdev, pm_message_t state);
+	int  (*suspend_late)(struct pci_device *pdev, pm_message_t state);
+
+	int  (*resume_early)(struct pci_device *pdev);
+	int  (*resume)(struct pci_device *pdev);
+};
 
 
+Drivers will implement those methods, and call PCI-specific procedures
+like pci_set_power_state(), pci_enable_wake(), pci_save_state(), and
+pci_restore_state() to manage PCI-specific mechanisms.  (PCI config space
+could be saved during driver probe, if it weren't for the fact that some
+systems rely on userspace tweaking using setpci.)  Devices are suspended
+before their bridges enter low power states, and likewise bridges resume
+before their devices.
+
+
+Upper Layers of Driver Stacks
+-----------------------------
+Device drivers generally have at least two interfaces, and the methods
+sketched above are the ones which apply to the lower level (nearer PCI, USB,
+or other bus hardware).  The network and block layers are examples of upper
+level interfaces, as is a character device talking to userspace.
+
+Power management requests normally need to flow through those upper levels,
+which often use domain-oriented requests like "blank that screen".  In
+some cases those upper levels will have power management intelligence that
+relates to end-user activity, or other devices that work in cooperation.
+
+When those interfaces are structured using class interfaces, there is a
+standard way to have the upper layer stop issuing requests to a given
+class device (and restart later):
+
+struct class {
+	...
+	int  (*suspend)(struct device *dev, pm_message_t state);
+	int  (*resume)(struct device *dev);
+};
 
 
-Methods
+Those calls are issued in specific phases of the process by which the
+system enters a low power "suspend" state, or resumes from it.
+
+
+Calling Drivers to Enter System Sleep States
+============================================
+When the system enters a low power state, each device's driver is asked
+to suspend the device by putting it into state compatible with the target
+system state.  That's usually some version of "off", but the details are
+system-specific.  Also, wakeup-enabled devices will usually stay partly
+functional in order to wake the system.
+
+When the system leaves that low power state, the device's driver is asked
+to resume it.  The suspend and resume operations always go together, and
+both are multi-phase operations.
+
+For simple drivers, suspend might quiesce the device using the class code
+and then turn its hardware as "off" as possible with late_suspend.  The
+matching resume calls would then completely reinitialize the hardware
+before reactivating its class I/O queues.
+
+More power-aware drivers drivers will use more than one device low power
+state, either at runtime or during system sleep states, and might trigger
+system wakeup events.
+
+
+Call Sequence Guarantees
+------------------------
+To ensure that bridges and similar links needed to talk to a device are
+available when the device is suspended or resumed, the device tree is
+walked in a bottom-up order to suspend devices.  A top-down order is
+used to resume those devices.
+
+The ordering of the device tree is defined by the order in which devices
+get registered:  a child can never be registered, probed or resumed before
+its parent; and can't be removed or suspended after that parent.
+
+The policy is that the device tree should match hardware bus topology.
+(Or at least the control bus, for devices which use multiple busses.)
+
+
+Suspending Devices
+------------------
+Suspending a given device is done in several phases.  Suspending the
+system always includes every phase, executing calls for every device
+before the next phase begins.  Not all busses or classes support all
+these callbacks; and not all drivers use all the callbacks.
+
+The phases are seen by driver notifications issued in this order:
+
+   1	class.suspend(dev, message) is called after tasks are frozen, for
+	devices associated with a class that has such a method.  This
+	method may sleep.
+
+	Since I/O activity usually comes from such higher layers, this is
+	a good place to quiesce all drivers of a given type (and keep such
+	code out of those drivers).
+
+   2	bus.suspend(dev, message) is called next.  This method may sleep,
+	and is often morphed into a device driver call with bus-specific
+	parameters and/or rules.
+
+	This call should handle parts of device suspend logic that require
+	sleeping.  It probably does work to quiesce the device which hasn't
+	been abstracted into class.suspend() or bus.suspend_late().
+
+   3	bus.suspend_late(dev, message) is called with IRQs disabled, and
+	with only one CPU active.  Until the bus.resume_early() phase
+	completes (see later), IRQs are not enabled again.  This method
+	won't be exposed by all busses; for message based busses like USB,
+	I2C, or SPI, device interactions normally require IRQs.  This bus
+	call may be morphed into a driver call with bus-specific parameters.
+
+	This call might save low level hardware state that might otherwise
+	be lost in the upcoming low power state, and actually put the
+	device into a low power state ... so that in some cases the device
+	may stay partly usable until this late.  This "late" call may also
+	help when coping with hardware that behaves badly.
+
+The pm_message_t parameter is currently used to refine those semantics
+(described later).
+
+At the end of those phases, drivers should normally have stopped all I/O
+transactions (DMA, IRQs), saved enough state that they can re-initialize
+or restore previous state (as needed by the hardware), and placed the
+device into a low-power state.  On many platforms they will also use
+clk_disable() to gate off one or more clock sources; sometimes they will
+also switch off power supplies, or reduce voltages.  Drivers which have
+runtime PM support may already have performed some or all of the steps
+needed to prepare for the upcoming system sleep state.
+
+When any driver sees that its device_can_wakeup(dev), it should make sure
+to use the relevant hardware signals to trigger a system wakeup event.
+For example, enable_irq_wake() might identify GPIO signals hooked up to
+a switch or other external hardware, and pci_enable_wake() does something
+similar for PCI's PME# signal.
+
+If a driver (or bus, or class) fails it suspend method, the system won't
+enter the desired low power state; it will resume all the devices it's
+suspended so far.
+
+Note that drivers may need to perform different actions based on the target
+system lowpower/sleep state.  At this writing, there are only platform
+specific APIs through which drivers could determine those target states.
+
+
+Device Low Power (suspend) States
+---------------------------------
+Device low-power states aren't very standard.  One device might only handle
+"on" and "off, while another might support a dozen different versions of
+"on" (how many engines are active?), plus a state that gets back to "on"
+faster than from a full "off".
+
+Some busses define rules about what different suspend states mean.  PCI
+gives one example:  after the suspend sequence completes, a non-legacy
+PCI device may not perform DMA or issue IRQs, and any wakeup events it
+issues would be issued through the PME# bus signal.  Plus, there are
+several PCI-standard device states, some of which are optional.
+
+In contrast, integrated system-on-chip processors often use irqs as the
+wakeup event sources (so drivers would call enable_irq_wake) and might
+be able to treat DMA completion as a wakeup event (sometimes DMA can stay
+active too, it'd only be the CPU and some peripherals that sleep).
+
+Some details here may be platform-specific.  Systems may have devices that
+can be fully active in certain sleep states, such as an LCD display that's
+refreshed using DMA while most of the system is sleeping lightly ... and
+its frame buffer might even be updated by a DSP or other non-Linux CPU while
+the Linux control processor stays idle.
+
+Moreover, the specific actions taken may depend on the target system state.
+One target system state might allow a given device to be very operational;
+another might require a hard shut down with re-initialization on resume.
+And two different target systems might use the same device in different
+ways; the aforementioned LCD might be active in one product's "standby",
+but a different product using the same SOC might work differently.
+
+
+Meaning of pm_message_t.event
+-----------------------------
+Parameters to suspend calls include the device affected and a message of
+type pm_message_t, which has one field:  the event.  If driver does not
+recognize the event code, suspend calls may abort the request and return
+a negative errno.  However, most drivers will be fine if they implement
+PM_EVENT_SUSPEND semantics for all messages.
+
+The event codes are used to refine the goal of suspending the device, and
+mostly matter when creating or resuming system memory image snapshots, as
+used with suspend-to-disk:
+
+    PM_EVENT_SUSPEND -- quiesce the driver and put hardware into a low-power
+	state.  When used with system sleep states like "suspend-to-RAM" or
+	"standby", the upcoming resume() call will often be able to rely on
+	state kept in hardware, or issue system wakeup events.  When used
+	instead with suspend-to-disk, few devices support this capability;
+	most are completely powered off.
+
+    PM_EVENT_FREEZE -- quiesce the driver, but don't necessarily change into
+	any low power mode.  A system snapshot is about to be taken, often
+	followed by a call to the driver's resume() method.  Neither wakeup
+	events nor DMA are allowed.
+
+    PM_EVENT_PRETHAW -- quiesce the driver, knowing that the upcoming resume()
+	will restore a suspend-to-disk snapshot from a different kernel image.
+	Drivers that are smart enough to look at their hardware state during
+	resume() processing need that state to be correct ... a PRETHAW could
+	be used to invalidate that state (by resetting the device), like a
+	shutdown() invocation would before a kexec() or system halt.  Other
+	drivers might handle this the same way as PM_EVENT_FREEZE.  Neither
+	wakeup events nor DMA are allowed.
+
+To enter "standby" (ACPI S1) or "Suspend to RAM" (STR, ACPI S3) states, or
+the similarly named APM states, only PM_EVENT_SUSPEND is used; for "Suspend
+to Disk" (STD, hibernate, ACPI S4), all of those event codes are used.
+
+There's also PM_EVENT_ON, a value which never appears as a suspend event
+but is sometimes used to record the "not suspended" device state.
+
+
+Resuming Devices
+----------------
+Resuming is done in multiple phases, much like suspending, with all
+devices processing each phase's calls before the next phase begins.
+
+The phases are seen by driver notifications issued in this order:
+
+   1	bus.resume_early(dev) is called with IRQs disabled, and with
+   	only one CPU active.  As with bus.suspend_late(), this method
+	won't be supported on busses that require IRQs in order to
+	interact with devices.
+
+	This reverses the effects of bus.suspend_late().
+
+   2	bus.resume(dev) is called next.  This may be morphed into a device
+   	driver call with bus-specific parameters; implementations may sleep.
+
+	This reverses the effects of bus.suspend().
+
+   3	class.resume(dev) is called for devices associated with a class
+	that has such a method.  Implementations may sleep.
+
+	This reverses the effects of class.suspend(), and would usually
+	reactivate the device's I/O queue.
+
+At the end of those phases, drivers should normally be as functional as
+they were before suspending:  I/O can be performed using DMA and IRQs, and
+the relevant clocks are gated on.  The device need not be "fully on"; it
+might be in a runtime lowpower/suspend state that acts as if it were.
+
+However, the details here may again be platform-specific.  For example,
+some systems support multiple "run" states, and the mode in effect at
+the end of resume() might not be the one which preceded suspension.
+That means availability of certain clocks or power supplies changed,
+which could easily affect how a driver works.
+
+
+Drivers need to be able to handle hardware which has been reset since the
+suspend methods were called, for example by complete reinitialization.
+This may be the hardest part, and the one most protected by NDA'd documents
+and chip errata.  It's simplest if the hardware state hasn't changed since
+the suspend() was called, but that can't always be guaranteed.
+
+Drivers must also be prepared to notice that the device has been removed
+while the system was powered off, whenever that's physically possible.
+PCMCIA, MMC, USB, Firewire, SCSI, and even IDE are common examples of busses
+where common Linux platforms will see such removal.  Details of how drivers
+will notice and handle such removals are currently bus-specific, and often
+involve a separate thread.
 
 
-The methods to suspend and resume devices reside in struct bus_type: 
 
 
-struct bus_type {
-       ...
-       int             (*suspend)(struct device * dev, pm_message_t state);
-       int             (*resume)(struct device * dev);
-};
+Note that the bus-specific runtime PM wakeup mechanism can exist, and might
+be defined to share some of the same driver code as for system wakeup.  For
+example, a bus-specific device driver's resume() method might be used there,
+so it wouldn't only be called from bus.resume() during system-wide wakeup.
+See bus-specific information about how runtime wakeup events are handled.
 
 
-Each bus driver is responsible implementing these methods, translating
-the call into a bus-specific request and forwarding the call to the
-bus-specific drivers. For example, PCI drivers implement suspend() and
-resume() methods in struct pci_driver. The PCI core is simply
-responsible for translating the pointers to PCI-specific ones and
-calling the low-level driver.
-
-This is done to a) ease transition to the new power management methods
-and leverage the existing PM code in various bus drivers; b) allow
-buses to implement generic and default PM routines for devices, and c)
-make the flow of execution obvious to the reader. 
-
-
-System Power Management
-
-When the system enters a low-power state, the device tree is walked in
-a depth-first fashion to transition each device into a low-power
-state. The ordering of the device tree is guaranteed by the order in
-which devices get registered - children are never registered before
-their ancestors, and devices are placed at the back of the list when
-registered. By walking the list in reverse order, we are guaranteed to
-suspend devices in the proper order. 
-
-Devices are suspended once with interrupts enabled. Drivers are
-expected to stop I/O transactions, save device state, and place the
-device into a low-power state. Drivers may sleep, allocate memory,
-etc. at will. 
-
-Some devices are broken and will inevitably have problems powering
-down or disabling themselves with interrupts enabled. For these
-special cases, they may return -EAGAIN. This will put the device on a
-list to be taken care of later. When interrupts are disabled, before
-we enter the low-power state, their drivers are called again to put
-their device to sleep. 
-
-On resume, the devices that returned -EAGAIN will be called to power
-themselves back on with interrupts disabled. Once interrupts have been
-re-enabled, the rest of the drivers will be called to resume their
-devices. On resume, a driver is responsible for powering back on each
-device, restoring state, and re-enabling I/O transactions for that
-device. 
 
 
+System Devices
+--------------
 System devices follow a slightly different API, which can be found in
 System devices follow a slightly different API, which can be found in
 
 
 	include/linux/sysdev.h
 	include/linux/sysdev.h
 	drivers/base/sys.c
 	drivers/base/sys.c
 
 
-System devices will only be suspended with interrupts disabled, and
-after all other devices have been suspended. On resume, they will be
-resumed before any other devices, and also with interrupts disabled.
+System devices will only be suspended with interrupts disabled, and after
+all other devices have been suspended.  On resume, they will be resumed
+before any other devices, and also with interrupts disabled.
 
 
+That is, IRQs are disabled, the suspend_late() phase begins, then the
+sysdev_driver.suspend() phase, and the system enters a sleep state.  Then
+the sysdev_driver.resume() phase begins, followed by the resume_early()
+phase, after which IRQs are enabled.
 
 
-Runtime Power Management
-
-Many devices are able to dynamically power down while the system is
-still running. This feature is useful for devices that are not being
-used, and can offer significant power savings on a running system. 
-
-In each device's directory, there is a 'power' directory, which
-contains at least a 'state' file. Reading from this file displays what
-power state the device is currently in. Writing to this file initiates
-a transition to the specified power state, which must be a decimal in
-the range 1-3, inclusive; or 0 for 'On'.
+Code to actually enter and exit the system-wide low power state sometimes
+involves hardware details that are only known to the boot firmware, and
+may leave a CPU running software (from SRAM or flash memory) that monitors
+the system and manages its wakeup sequence.
 
 
-The PM core will call the ->suspend() method in the bus_type object
-that the device belongs to if the specified state is not 0, or
-->resume() if it is. 
 
 
-Nothing will happen if the specified state is the same state the
-device is currently in. 
-
-If the device is already in a low-power state, and the specified state
-is another, but different, low-power state, the ->resume() method will
-first be called to power the device back on, then ->suspend() will be
-called again with the new state. 
-
-The driver is responsible for saving the working state of the device
-and putting it into the low-power state specified. If this was
-successful, it returns 0, and the device's power_state field is
-updated. 
-
-The driver must take care to know whether or not it is able to
-properly resume the device, including all step of reinitialization
-necessary. (This is the hardest part, and the one most protected by
-NDA'd documents). 
-
-The driver must also take care not to suspend a device that is
-currently in use. It is their responsibility to provide their own
-exclusion mechanisms.
-
-The runtime power transition happens with interrupts enabled. If a
-device cannot support being powered down with interrupts, it may
-return -EAGAIN (as it would during a system power management
-transition),  but it will _not_ be called again, and the transaction
-will fail.
-
-There is currently no way to know what states a device or driver
-supports a priori. This will change in the future. 
-
-pm_message_t meaning
-
-pm_message_t has two fields. event ("major"), and flags.  If driver
-does not know event code, it aborts the request, returning error. Some
-drivers may need to deal with special cases based on the actual type
-of suspend operation being done at the system level. This is why
-there are flags.
-
-Event codes are:
-
-ON -- no need to do anything except special cases like broken
-HW.
-
-# NOTIFICATION -- pretty much same as ON?
-
-FREEZE -- stop DMA and interrupts, and be prepared to reinit HW from
-scratch. That probably means stop accepting upstream requests, the
-actual policy of what to do with them being specific to a given
-driver. It's acceptable for a network driver to just drop packets
-while a block driver is expected to block the queue so no request is
-lost. (Use IDE as an example on how to do that). FREEZE requires no
-power state change, and it's expected for drivers to be able to
-quickly transition back to operating state.
-
-SUSPEND -- like FREEZE, but also put hardware into low-power state. If
-there's need to distinguish several levels of sleep, additional flag
-is probably best way to do that.
-
-Transitions are only from a resumed state to a suspended state, never
-between 2 suspended states. (ON -> FREEZE or ON -> SUSPEND can happen,
-FREEZE -> SUSPEND or SUSPEND -> FREEZE can not).
-
-All events are:
-
-[NOTE NOTE NOTE: If you are driver author, you should not care; you
-should only look at event, and ignore flags.]
-
-#Prepare for suspend -- userland is still running but we are going to
-#enter suspend state. This gives drivers chance to load firmware from
-#disk and store it in memory, or do other activities taht require
-#operating userland, ability to kmalloc GFP_KERNEL, etc... All of these
-#are forbiden once the suspend dance is started.. event = ON, flags =
-#PREPARE_TO_SUSPEND
-
-Apm standby -- prepare for APM event. Quiesce devices to make life
-easier for APM BIOS. event = FREEZE, flags = APM_STANDBY
-
-Apm suspend -- same as APM_STANDBY, but it we should probably avoid
-spinning down disks. event = FREEZE, flags = APM_SUSPEND
-
-System halt, reboot -- quiesce devices to make life easier for BIOS. event
-= FREEZE, flags = SYSTEM_HALT or SYSTEM_REBOOT
-
-System shutdown -- at least disks need to be spun down, or data may be
-lost. Quiesce devices, just to make life easier for BIOS. event =
-FREEZE, flags = SYSTEM_SHUTDOWN
-
-Kexec    -- turn off DMAs and put hardware into some state where new
-kernel can take over. event = FREEZE, flags = KEXEC
-
-Powerdown at end of swsusp -- very similar to SYSTEM_SHUTDOWN, except wake
-may need to be enabled on some devices. This actually has at least 3
-subtypes, system can reboot, enter S4 and enter S5 at the end of
-swsusp. event = FREEZE, flags = SWSUSP and one of SYSTEM_REBOOT,
-SYSTEM_SHUTDOWN, SYSTEM_S4
-
-Suspend to ram  -- put devices into low power state. event = SUSPEND,
-flags = SUSPEND_TO_RAM
-
-Freeze for swsusp snapshot -- stop DMA and interrupts. No need to put
-devices into low power mode, but you must be able to reinitialize
-device from scratch in resume method. This has two flavors, its done
-once on suspending kernel, once on resuming kernel. event = FREEZE,
-flags = DURING_SUSPEND or DURING_RESUME
-
-Device detach requested from /sys -- deinitialize device; proably same as
-SYSTEM_SHUTDOWN, I do not understand this one too much. probably event
-= FREEZE, flags = DEV_DETACH.
-
-#These are not really events sent:
-#
-#System fully on -- device is working normally; this is probably never
-#passed to suspend() method... event = ON, flags = 0
-#
-#Ready after resume -- userland is now running, again. Time to free any
-#memory you ate during prepare to suspend... event = ON, flags =
-#READY_AFTER_RESUME
-#
+Runtime Power Management
+========================
+Many devices are able to dynamically power down while the system is still
+running. This feature is useful for devices that are not being used, and
+can offer significant power savings on a running system.  These devices
+often support a range of runtime power states, which might use names such
+as "off", "sleep", "idle", "active", and so on.  Those states will in some
+cases (like PCI) be partially constrained by a bus the device uses, and will
+usually include hardware states that are also used in system sleep states.
+
+However, note that if a driver puts a device into a runtime low power state
+and the system then goes into a system-wide sleep state, it normally ought
+to resume into that runtime low power state rather than "full on".  Such
+distinctions would be part of the driver-internal state machine for that
+hardware; the whole point of runtime power management is to be sure that
+drivers are decoupled in that way from the state machine governing phases
+of the system-wide power/sleep state transitions.
+
+
+Power Saving Techniques
+-----------------------
+Normally runtime power management is handled by the drivers without specific
+userspace or kernel intervention, by device-aware use of techniques like:
+
+    Using information provided by other system layers
+	- stay deeply "off" except between open() and close()
+	- if transceiver/PHY indicates "nobody connected", stay "off"
+	- application protocols may include power commands or hints
+
+    Using fewer CPU cycles
+	- using DMA instead of PIO
+	- removing timers, or making them lower frequency
+	- shortening "hot" code paths
+	- eliminating cache misses
+	- (sometimes) offloading work to device firmware
+
+    Reducing other resource costs
+	- gating off unused clocks in software (or hardware)
+	- switching off unused power supplies
+	- eliminating (or delaying/merging) IRQs
+	- tuning DMA to use word and/or burst modes
+
+    Using device-specific low power states
+	- using lower voltages
+	- avoiding needless DMA transfers
+
+Read your hardware documentation carefully to see the opportunities that
+may be available.  If you can, measure the actual power usage and check
+it against the budget established for your project.
+
+
+Examples:  USB hosts, system timer, system CPU
+----------------------------------------------
+USB host controllers make interesting, if complex, examples.  In many cases
+these have no work to do:  no USB devices are connected, or all of them are
+in the USB "suspend" state.  Linux host controller drivers can then disable
+periodic DMA transfers that would otherwise be a constant power drain on the
+memory subsystem, and enter a suspend state.  In power-aware controllers,
+entering that suspend state may disable the clock used with USB signaling,
+saving a certain amount of power.
+
+The controller will be woken from that state (with an IRQ) by changes to the
+signal state on the data lines of a given port, for example by an existing
+peripheral requesting "remote wakeup" or by plugging a new peripheral.  The
+same wakeup mechanism usually works from "standby" sleep states, and on some
+systems also from "suspend to RAM" (or even "suspend to disk") states.
+(Except that ACPI may be involved instead of normal IRQs, on some hardware.)
+
+System devices like timers and CPUs may have special roles in the platform
+power management scheme.  For example, system timers using a "dynamic tick"
+approach don't just save CPU cycles (by eliminating needless timer IRQs),
+but they may also open the door to using lower power CPU "idle" states that
+cost more than a jiffie to enter and exit.  On x86 systems these are states
+like "C3"; note that periodic DMA transfers from a USB host controller will
+also prevent entry to a C3 state, much like a periodic timer IRQ.
+
+That kind of runtime mechanism interaction is common.  "System On Chip" (SOC)
+processors often have low power idle modes that can't be entered unless
+certain medium-speed clocks (often 12 or 48 MHz) are gated off.  When the
+drivers gate those clocks effectively, then the system idle task may be able
+to use the lower power idle modes and thereby increase battery life.
+
+If the CPU can have a "cpufreq" driver, there also may be opportunities
+to shift to lower voltage settings and reduce the power cost of executing
+a given number of instructions.  (Without voltage adjustment, it's rare
+for cpufreq to save much power; the cost-per-instruction must go down.)
+
+
+/sys/devices/.../power/state files
+==================================
+For now you can also test some of this functionality using sysfs.
+
+	DEPRECATED:  USE "power/state" ONLY FOR DRIVER TESTING, AND
+	AVOID USING dev->power.power_state IN DRIVERS.
+
+	THESE WILL BE REMOVED.  IF THE "power/state" FILE GETS REPLACED,
+	IT WILL BECOME SOMETHING COUPLED TO THE BUS OR DRIVER.
+
+In each device's directory, there is a 'power' directory, which contains
+at least a 'state' file.  The value of this field is effectively boolean,
+PM_EVENT_ON or PM_EVENT_SUSPEND.
+
+   *	Reading from this file displays a value corresponding to
+	the power.power_state.event field.  All nonzero values are
+	displayed as "2", corresponding to a low power state; zero
+	is displayed as "0", corresponding to normal operation.
+
+   *	Writing to this file initiates a transition using the
+   	specified event code number; only '0', '2', and '3' are
+	accepted (without a newline); '2' and '3' are both
+	mapped to PM_EVENT_SUSPEND.
+
+On writes, the PM core relies on that recorded event code and the device/bus
+capabilities to determine whether it uses a partial suspend() or resume()
+sequence to change things so that the recorded event corresponds to the
+numeric parameter.
+
+   -	If the bus requires the irqs-disabled suspend_late()/resume_early()
+	phases, writes fail because those operations are not supported here.
+
+   -	If the recorded value is the expected value, nothing is done.
+
+   -	If the recorded value is nonzero, the device is partially resumed,
+	using the bus.resume() and/or class.resume() methods.
+
+   -	If the target value is nonzero, the device is partially suspended,
+	using the class.suspend() and/or bus.suspend() methods and the
+	PM_EVENT_SUSPEND message.
+
+Drivers have no way to tell whether their suspend() and resume() calls
+have come through the sysfs power/state file or as part of entering a
+system sleep state, except that when accessed through sysfs the normal
+parent/child sequencing rules are ignored.  Drivers (such as bus, bridge,
+or hub drivers) which expose child devices may need to enforce those rules
+on their own.

+ 51 - 77
Documentation/sh/new-machine.txt

@@ -41,11 +41,6 @@ Board-specific code:
         |
         |
 	.. more boards here ...
 	.. more boards here ...
 
 
-It should also be noted that each board is required to have some certain
-headers. At the time of this writing, io.h is the only thing that needs
-to be provided for each board, and can generally just reference generic
-functions (with the exception of isa_port2addr).
-
 Next, for companion chips:
 Next, for companion chips:
 .
 .
 `-- arch
 `-- arch
@@ -104,12 +99,13 @@ and then populate that with sub-directories for each member of the family.
 Both the Solution Engine and the hp6xx boards are an example of this.
 Both the Solution Engine and the hp6xx boards are an example of this.
 
 
 After you have setup your new arch/sh/boards/ directory, remember that you
 After you have setup your new arch/sh/boards/ directory, remember that you
-also must add a directory in include/asm-sh for headers localized to this
-board. In order to interoperate seamlessly with the build system, it's best
-to have this directory the same as the arch/sh/boards/ directory name,
-though if your board is again part of a family, the build system has ways
-of dealing with this, and you can feel free to name the directory after
-the family member itself.
+should also add a directory in include/asm-sh for headers localized to this
+board (if there are going to be more than one). In order to interoperate
+seamlessly with the build system, it's best to have this directory the same
+as the arch/sh/boards/ directory name, though if your board is again part of
+a family, the build system has ways of dealing with this (via incdir-y
+overloading), and you can feel free to name the directory after the family
+member itself.
 
 
 There are a few things that each board is required to have, both in the
 There are a few things that each board is required to have, both in the
 arch/sh/boards and the include/asm-sh/ heirarchy. In order to better
 arch/sh/boards and the include/asm-sh/ heirarchy. In order to better
@@ -122,6 +118,7 @@ might look something like:
  * arch/sh/boards/vapor/setup.c - Setup code for imaginary board
  * arch/sh/boards/vapor/setup.c - Setup code for imaginary board
  */
  */
 #include <linux/init.h>
 #include <linux/init.h>
+#include <asm/rtc.h> /* for board_time_init() */
 
 
 const char *get_system_type(void)
 const char *get_system_type(void)
 {
 {
@@ -152,79 +149,57 @@ int __init platform_setup(void)
 }
 }
 
 
 Our new imaginary board will also have to tie into the machvec in order for it
 Our new imaginary board will also have to tie into the machvec in order for it
-to be of any use. Currently the machvec is slowly on its way out, but is still
-required for the time being. As such, let us take a look at what needs to be
-done for the machvec assignment.
+to be of any use.
 
 
 machvec functions fall into a number of categories:
 machvec functions fall into a number of categories:
 
 
  - I/O functions to IO memory (inb etc) and PCI/main memory (readb etc).
  - I/O functions to IO memory (inb etc) and PCI/main memory (readb etc).
- - I/O remapping functions (ioremap etc)
- - some initialisation functions
- - a 'heartbeat' function
- - some miscellaneous flags
-
-The tree can be built in two ways:
- - as a fully generic build. All drivers are linked in, and all functions
-   go through the machvec
- - as a machine specific build. In this case only the required drivers
-   will be linked in, and some macros may be redefined to not go through
-   the machvec where performance is important (in particular IO functions).
-
-There are three ways in which IO can be performed:
- - none at all. This is really only useful for the 'unknown' machine type,
-   which us designed to run on a machine about which we know nothing, and
-   so all all IO instructions do nothing.
- - fully custom. In this case all IO functions go to a machine specific
-   set of functions which can do what they like
- - a generic set of functions. These will cope with most situations,
-   and rely on a single function, mv_port2addr, which is called through the
-   machine vector, and converts an IO address into a memory address, which
-   can be read from/written to directly.
-
-Thus adding a new machine involves the following steps (I will assume I am
-adding a machine called vapor):
-
- - add a new file include/asm-sh/vapor/io.h which contains prototypes for
+ - I/O mapping functions (ioport_map, ioport_unmap, etc).
+ - a 'heartbeat' function.
+ - PCI and IRQ initialization routines.
+ - Consistent allocators (for boards that need special allocators,
+   particularly for allocating out of some board-specific SRAM for DMA
+   handles).
+
+There are machvec functions added and removed over time, so always be sure to
+consult include/asm-sh/machvec.h for the current state of the machvec.
+
+The kernel will automatically wrap in generic routines for undefined function
+pointers in the machvec at boot time, as machvec functions are referenced
+unconditionally throughout most of the tree. Some boards have incredibly
+sparse machvecs (such as the dreamcast and sh03), whereas others must define
+virtually everything (rts7751r2d).
+
+Adding a new machine is relatively trivial (using vapor as an example):
+
+If the board-specific definitions are quite minimalistic, as is the case for
+the vast majority of boards, simply having a single board-specific header is
+sufficient.
+
+ - add a new file include/asm-sh/vapor.h which contains prototypes for
    any machine specific IO functions prefixed with the machine name, for
    any machine specific IO functions prefixed with the machine name, for
    example vapor_inb. These will be needed when filling out the machine
    example vapor_inb. These will be needed when filling out the machine
    vector.
    vector.
 
 
-   This is the minimum that is required, however there are ample
-   opportunities to optimise this. In particular, by making the prototypes
-   inline function definitions, it is possible to inline the function when
-   building machine specific versions. Note that the machine vector
-   functions will still be needed, so that a module built for a generic
-   setup can be loaded.
-
- - add a new file arch/sh/boards/vapor/mach.c. This contains the definition
-   of the machine vector. When building the machine specific version, this
-   will be the real machine vector (via an alias), while in the generic
-   version is used to initialise the machine vector, and then freed, by
-   making it initdata. This should be defined as:
-
-     struct sh_machine_vector mv_vapor __initmv = {
-       .mv_name = "vapor",
-     }
-     ALIAS_MV(vapor)
-
- - finally add a file arch/sh/boards/vapor/io.c, which contains
-   definitions of the machine specific io functions.
-
-A note about initialisation functions. Three initialisation functions are
-provided in the machine vector:
- - mv_arch_init - called very early on from setup_arch
- - mv_init_irq - called from init_IRQ, after the generic SH interrupt
-   initialisation
- - mv_init_pci - currently not used
-
-Any other remaining functions which need to be called at start up can be
-added to the list using the __initcalls macro (or module_init if the code
-can be built as a module). Many generic drivers probe to see if the device
-they are targeting is present, however this may not always be appropriate,
-so a flag can be added to the machine vector which will be set on those
-machines which have the hardware in question, reducing the probe to a
-single conditional.
+   Note that these prototypes are generated automatically by setting
+   __IO_PREFIX to something sensible. A typical example would be:
+
+	#define __IO_PREFIX vapor
+   	#include <asm/io_generic.h>
+
+   somewhere in the board-specific header. Any boards being ported that still
+   have a legacy io.h should remove it entirely and switch to the new model.
+
+ - Add machine vector definitions to the board's setup.c. At a bare minimum,
+   this must be defined as something like:
+
+	struct sh_machine_vector mv_vapor __initmv = {
+		.mv_name = "vapor",
+	};
+	ALIAS_MV(vapor)
+
+ - finally add a file arch/sh/boards/vapor/io.c, which contains definitions of
+   the machine specific io functions (if there are enough to warrant it).
 
 
 3. Hooking into the Build System
 3. Hooking into the Build System
 ================================
 ================================
@@ -303,4 +278,3 @@ which will in turn copy the defconfig for this board, run it through
 oldconfig (prompting you for any new options since the time of creation),
 oldconfig (prompting you for any new options since the time of creation),
 and start you on your way to having a functional kernel for your new
 and start you on your way to having a functional kernel for your new
 board.
 board.
-

+ 33 - 0
Documentation/sh/register-banks.txt

@@ -0,0 +1,33 @@
+	Notes on register bank usage in the kernel
+	==========================================
+
+Introduction
+------------
+
+The SH-3 and SH-4 CPU families traditionally include a single partial register
+bank (selected by SR.RB, only r0 ... r7 are banked), whereas other families
+may have more full-featured banking or simply no such capabilities at all.
+
+SR.RB banking
+-------------
+
+In the case of this type of banking, banked registers are mapped directly to
+r0 ... r7 if SR.RB is set to the bank we are interested in, otherwise ldc/stc
+can still be used to reference the banked registers (as r0_bank ... r7_bank)
+when in the context of another bank. The developer must keep the SR.RB value
+in mind when writing code that utilizes these banked registers, for obvious
+reasons. Userspace is also not able to poke at the bank1 values, so these can
+be used rather effectively as scratch registers by the kernel.
+
+Presently the kernel uses several of these registers.
+
+	- r0_bank, r1_bank (referenced as k0 and k1, used for scratch
+	  registers when doing exception handling).
+	- r2_bank (used to track the EXPEVT/INTEVT code)
+		- Used by do_IRQ() and friends for doing irq mapping based off
+		  of the interrupt exception vector jump table offset
+	- r6_bank (global interrupt mask)
+		- The SR.IMASK interrupt handler makes use of this to set the
+		  interrupt priority level (used by local_irq_enable())
+	- r7_bank (current)
+

+ 4 - 7
Documentation/usb/error-codes.txt

@@ -98,13 +98,13 @@ one or more packets could finish before an error stops further endpoint I/O.
 			error, a failure to respond (often caused by
 			error, a failure to respond (often caused by
 			device disconnect), or some other fault.
 			device disconnect), or some other fault.
 
 
--ETIMEDOUT (**)		No response packet received within the prescribed
+-ETIME (**)		No response packet received within the prescribed
 			bus turn-around time.  This error may instead be
 			bus turn-around time.  This error may instead be
 			reported as -EPROTO or -EILSEQ.
 			reported as -EPROTO or -EILSEQ.
 
 
-			Note that the synchronous USB message functions
-			also use this code to indicate timeout expired
-			before the transfer completed.
+-ETIMEDOUT		Synchronous USB message functions use this code
+			to indicate timeout expired before the transfer
+			completed, and no other error was reported by HC.
 
 
 -EPIPE (**)		Endpoint stalled.  For non-control endpoints,
 -EPIPE (**)		Endpoint stalled.  For non-control endpoints,
 			reset this status with usb_clear_halt().
 			reset this status with usb_clear_halt().
@@ -163,6 +163,3 @@ usb_get_*/usb_set_*():
 usb_control_msg():
 usb_control_msg():
 usb_bulk_msg():
 usb_bulk_msg():
 -ETIMEDOUT		Timeout expired before the transfer completed.
 -ETIMEDOUT		Timeout expired before the transfer completed.
-			In the future this code may change to -ETIME,
-			whose definition is a closer match to this sort
-			of error.

+ 5 - 0
Documentation/usb/usb-serial.txt

@@ -433,6 +433,11 @@ Options supported:
   See http://www.uuhaus.de/linux/palmconnect.html for up-to-date
   See http://www.uuhaus.de/linux/palmconnect.html for up-to-date
   information on this driver.
   information on this driver.
 
 
+AIRcable USB Dongle Bluetooth driver
+  If there is the cdc_acm driver loaded in the system, you will find that the
+  cdc_acm claims the device before AIRcable can. This is simply corrected
+  by unloading both modules and then loading the aircable module before
+  cdc_acm module
 
 
 Generic Serial driver
 Generic Serial driver
 
 

+ 7 - 0
Documentation/x86_64/boot-options.txt

@@ -245,6 +245,13 @@ Debugging
 		newfallback: use new unwinder but fall back to old if it gets
 		newfallback: use new unwinder but fall back to old if it gets
 			stuck (default)
 			stuck (default)
 
 
+  call_trace=[old|both|newfallback|new]
+		old: use old inexact backtracer
+		new: use new exact dwarf2 unwinder
+ 		both: print entries from both
+		newfallback: use new unwinder but fall back to old if it gets
+			stuck (default)
+
 Misc
 Misc
 
 
   noreplacement  Don't replace instructions with more appropriate ones
   noreplacement  Don't replace instructions with more appropriate ones

+ 99 - 0
Documentation/x86_64/kernel-stacks

@@ -0,0 +1,99 @@
+Most of the text from Keith Owens, hacked by AK
+
+x86_64 page size (PAGE_SIZE) is 4K.
+
+Like all other architectures, x86_64 has a kernel stack for every
+active thread.  These thread stacks are THREAD_SIZE (2*PAGE_SIZE) big.
+These stacks contain useful data as long as a thread is alive or a
+zombie. While the thread is in user space the kernel stack is empty
+except for the thread_info structure at the bottom.
+
+In addition to the per thread stacks, there are specialized stacks
+associated with each cpu.  These stacks are only used while the kernel
+is in control on that cpu, when a cpu returns to user space the
+specialized stacks contain no useful data.  The main cpu stacks is
+
+* Interrupt stack.  IRQSTACKSIZE
+
+  Used for external hardware interrupts.  If this is the first external
+  hardware interrupt (i.e. not a nested hardware interrupt) then the
+  kernel switches from the current task to the interrupt stack.  Like
+  the split thread and interrupt stacks on i386 (with CONFIG_4KSTACKS),
+  this gives more room for kernel interrupt processing without having
+  to increase the size of every per thread stack.
+
+  The interrupt stack is also used when processing a softirq.
+
+Switching to the kernel interrupt stack is done by software based on a
+per CPU interrupt nest counter. This is needed because x86-64 "IST"
+hardware stacks cannot nest without races.
+
+x86_64 also has a feature which is not available on i386, the ability
+to automatically switch to a new stack for designated events such as
+double fault or NMI, which makes it easier to handle these unusual
+events on x86_64.  This feature is called the Interrupt Stack Table
+(IST).  There can be up to 7 IST entries per cpu. The IST code is an
+index into the Task State Segment (TSS), the IST entries in the TSS
+point to dedicated stacks, each stack can be a different size.
+
+An IST is selected by an non-zero value in the IST field of an
+interrupt-gate descriptor.  When an interrupt occurs and the hardware
+loads such a descriptor, the hardware automatically sets the new stack
+pointer based on the IST value, then invokes the interrupt handler.  If
+software wants to allow nested IST interrupts then the handler must
+adjust the IST values on entry to and exit from the interrupt handler.
+(this is occasionally done, e.g. for debug exceptions)
+
+Events with different IST codes (i.e. with different stacks) can be
+nested.  For example, a debug interrupt can safely be interrupted by an
+NMI.  arch/x86_64/kernel/entry.S::paranoidentry adjusts the stack
+pointers on entry to and exit from all IST events, in theory allowing
+IST events with the same code to be nested.  However in most cases, the
+stack size allocated to an IST assumes no nesting for the same code.
+If that assumption is ever broken then the stacks will become corrupt.
+
+The currently assigned IST stacks are :-
+
+* STACKFAULT_STACK.  EXCEPTION_STKSZ (PAGE_SIZE).
+
+  Used for interrupt 12 - Stack Fault Exception (#SS).
+
+  This allows to recover from invalid stack segments. Rarely
+  happens.
+
+* DOUBLEFAULT_STACK.  EXCEPTION_STKSZ (PAGE_SIZE).
+
+  Used for interrupt 8 - Double Fault Exception (#DF).
+
+  Invoked when handling a exception causes another exception. Happens
+  when the kernel is very confused (e.g. kernel stack pointer corrupt)
+  Using a separate stack allows to recover from it well enough in many
+  cases to still output an oops.
+
+* NMI_STACK.  EXCEPTION_STKSZ (PAGE_SIZE).
+
+  Used for non-maskable interrupts (NMI).
+
+  NMI can be delivered at any time, including when the kernel is in the
+  middle of switching stacks.  Using IST for NMI events avoids making
+  assumptions about the previous state of the kernel stack.
+
+* DEBUG_STACK.  DEBUG_STKSZ
+
+  Used for hardware debug interrupts (interrupt 1) and for software
+  debug interrupts (INT3).
+
+  When debugging a kernel, debug interrupts (both hardware and
+  software) can occur at any time.  Using IST for these interrupts
+  avoids making assumptions about the previous state of the kernel
+  stack.
+
+* MCE_STACK.  EXCEPTION_STKSZ (PAGE_SIZE).
+
+  Used for interrupt 18 - Machine Check Exception (#MC).
+
+  MCE can be delivered at any time, including when the kernel is in the
+  middle of switching stacks.  Using IST for MCE events avoids making
+  assumptions about the previous state of the kernel stack.
+
+For more details see the Intel IA32 or AMD AMD64 architecture manuals.

+ 5 - 1
Makefile

@@ -1385,9 +1385,13 @@ endif #ifeq ($(config-targets),1)
 endif #ifeq ($(mixed-targets),1)
 endif #ifeq ($(mixed-targets),1)
 
 
 PHONY += checkstack kernelrelease kernelversion
 PHONY += checkstack kernelrelease kernelversion
+
+# Use $(SUBARCH) here instead of $(ARCH) so that this works for UML.
+# In the UML case, $(SUBARCH) is the name of the underlying
+# architecture, while for all other arches, it is the same as $(ARCH).
 checkstack:
 checkstack:
 	$(OBJDUMP) -d vmlinux $$(find . -name '*.ko') | \
 	$(OBJDUMP) -d vmlinux $$(find . -name '*.ko') | \
-	$(PERL) $(src)/scripts/checkstack.pl $(ARCH)
+	$(PERL) $(src)/scripts/checkstack.pl $(SUBARCH)
 
 
 kernelrelease:
 kernelrelease:
 	$(if $(wildcard include/config/kernel.release), $(Q)echo $(KERNELRELEASE), \
 	$(if $(wildcard include/config/kernel.release), $(Q)echo $(KERNELRELEASE), \

+ 1 - 14
arch/arm/mach-pxa/corgi.c

@@ -284,21 +284,9 @@ static struct pxaficp_platform_data corgi_ficp_platform_data = {
 /*
 /*
  * USB Device Controller
  * USB Device Controller
  */
  */
-static void corgi_udc_command(int cmd)
-{
-	switch(cmd)	{
-	case PXA2XX_UDC_CMD_CONNECT:
-		GPSR(CORGI_GPIO_USB_PULLUP) = GPIO_bit(CORGI_GPIO_USB_PULLUP);
-		break;
-	case PXA2XX_UDC_CMD_DISCONNECT:
-		GPCR(CORGI_GPIO_USB_PULLUP) = GPIO_bit(CORGI_GPIO_USB_PULLUP);
-		break;
-	}
-}
-
 static struct pxa2xx_udc_mach_info udc_info __initdata = {
 static struct pxa2xx_udc_mach_info udc_info __initdata = {
 	/* no connect GPIO; corgi can't tell connection status */
 	/* no connect GPIO; corgi can't tell connection status */
-	.udc_command		= corgi_udc_command,
+	.gpio_pullup		= CORGI_GPIO_USB_PULLUP,
 };
 };
 
 
 
 
@@ -350,7 +338,6 @@ static void __init corgi_init(void)
 	corgi_ssp_set_machinfo(&corgi_ssp_machinfo);
 	corgi_ssp_set_machinfo(&corgi_ssp_machinfo);
 
 
 	pxa_gpio_mode(CORGI_GPIO_IR_ON | GPIO_OUT);
 	pxa_gpio_mode(CORGI_GPIO_IR_ON | GPIO_OUT);
-	pxa_gpio_mode(CORGI_GPIO_USB_PULLUP | GPIO_OUT);
 	pxa_gpio_mode(CORGI_GPIO_HSYNC | GPIO_IN);
 	pxa_gpio_mode(CORGI_GPIO_HSYNC | GPIO_IN);
 
 
  	pxa_set_udc_info(&udc_info);
  	pxa_set_udc_info(&udc_info);

+ 1 - 1
arch/arm/plat-omap/usb.c

@@ -26,7 +26,7 @@
 #include <linux/errno.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/platform_device.h>
-#include <linux/usb_otg.h>
+#include <linux/usb/otg.h>
 
 
 #include <asm/io.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/irq.h>

+ 4 - 2
arch/avr32/mm/tlb.c

@@ -15,7 +15,8 @@
 
 
 void show_dtlb_entry(unsigned int index)
 void show_dtlb_entry(unsigned int index)
 {
 {
-	unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags;
+	unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save;
+	unsigned long flags;
 
 
 	local_irq_save(flags);
 	local_irq_save(flags);
 	mmucr_save = sysreg_read(MMUCR);
 	mmucr_save = sysreg_read(MMUCR);
@@ -305,7 +306,8 @@ static void tlb_stop(struct seq_file *tlb, void *v)
 
 
 static int tlb_show(struct seq_file *tlb, void *v)
 static int tlb_show(struct seq_file *tlb, void *v)
 {
 {
-	unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save, flags;
+	unsigned int tlbehi, tlbehi_save, tlbelo, mmucr, mmucr_save;
+	unsigned long flags;
 	unsigned long *index = v;
 	unsigned long *index = v;
 
 
 	if (*index == 0)
 	if (*index == 0)

+ 20 - 11
arch/i386/Kconfig

@@ -166,7 +166,6 @@ config X86_VISWS
 
 
 config X86_GENERICARCH
 config X86_GENERICARCH
        bool "Generic architecture (Summit, bigsmp, ES7000, default)"
        bool "Generic architecture (Summit, bigsmp, ES7000, default)"
-       depends on SMP
        help
        help
           This option compiles in the Summit, bigsmp, ES7000, default subarchitectures.
           This option compiles in the Summit, bigsmp, ES7000, default subarchitectures.
 	  It is intended for a generic binary kernel.
 	  It is intended for a generic binary kernel.
@@ -263,7 +262,7 @@ source "kernel/Kconfig.preempt"
 
 
 config X86_UP_APIC
 config X86_UP_APIC
 	bool "Local APIC support on uniprocessors"
 	bool "Local APIC support on uniprocessors"
-	depends on !SMP && !(X86_VISWS || X86_VOYAGER)
+	depends on !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH)
 	help
 	help
 	  A local APIC (Advanced Programmable Interrupt Controller) is an
 	  A local APIC (Advanced Programmable Interrupt Controller) is an
 	  integrated interrupt controller in the CPU. If you have a single-CPU
 	  integrated interrupt controller in the CPU. If you have a single-CPU
@@ -288,12 +287,12 @@ config X86_UP_IOAPIC
 
 
 config X86_LOCAL_APIC
 config X86_LOCAL_APIC
 	bool
 	bool
-	depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER)
+	depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH
 	default y
 	default y
 
 
 config X86_IO_APIC
 config X86_IO_APIC
 	bool
 	bool
-	depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER))
+	depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH
 	default y
 	default y
 
 
 config X86_VISWS_APIC
 config X86_VISWS_APIC
@@ -402,6 +401,7 @@ config X86_REBOOTFIXUPS
 
 
 config MICROCODE
 config MICROCODE
 	tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
 	tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
+	select FW_LOADER
 	---help---
 	---help---
 	  If you say Y here and also to "/dev file system support" in the
 	  If you say Y here and also to "/dev file system support" in the
 	  'File systems' section, you will be able to update the microcode on
 	  'File systems' section, you will be able to update the microcode on
@@ -417,6 +417,11 @@ config MICROCODE
 	  To compile this driver as a module, choose M here: the
 	  To compile this driver as a module, choose M here: the
 	  module will be called microcode.
 	  module will be called microcode.
 
 
+config MICROCODE_OLD_INTERFACE
+	bool
+	depends on MICROCODE
+	default y
+
 config X86_MSR
 config X86_MSR
 	tristate "/dev/cpu/*/msr - Model-specific register support"
 	tristate "/dev/cpu/*/msr - Model-specific register support"
 	help
 	help
@@ -599,12 +604,10 @@ config ARCH_SELECT_MEMORY_MODEL
 	def_bool y
 	def_bool y
 	depends on ARCH_SPARSEMEM_ENABLE
 	depends on ARCH_SPARSEMEM_ENABLE
 
 
-source "mm/Kconfig"
+config ARCH_POPULATES_NODE_MAP
+	def_bool y
 
 
-config HAVE_ARCH_EARLY_PFN_TO_NID
-	bool
-	default y
-	depends on NUMA
+source "mm/Kconfig"
 
 
 config HIGHPTE
 config HIGHPTE
 	bool "Allocate 3rd-level pagetables from highmem"
 	bool "Allocate 3rd-level pagetables from highmem"
@@ -741,8 +744,7 @@ config SECCOMP
 source kernel/Kconfig.hz
 source kernel/Kconfig.hz
 
 
 config KEXEC
 config KEXEC
-	bool "kexec system call (EXPERIMENTAL)"
-	depends on EXPERIMENTAL
+	bool "kexec system call"
 	help
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
 	  current kernel, and to start another kernel.  It is like a reboot
@@ -763,6 +765,13 @@ config CRASH_DUMP
 	depends on HIGHMEM
 	depends on HIGHMEM
 	help
 	help
 	  Generate crash dump after being started by kexec.
 	  Generate crash dump after being started by kexec.
+          This should be normally only set in special crash dump kernels
+	  which are loaded in the main kernel with kexec-tools into
+	  a specially reserved region and then later executed after
+	  a crash by kdump/kexec. The crash dump kernel must be compiled
+          to a memory address not used by the main kernel or BIOS using
+          PHYSICAL_START.
+	  For more details see Documentation/kdump/kdump.txt
 
 
 config PHYSICAL_START
 config PHYSICAL_START
 	hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
 	hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)

+ 8 - 0
arch/i386/Makefile

@@ -46,6 +46,14 @@ cflags-y += -ffreestanding
 # a lot more stack due to the lack of sharing of stacklots:
 # a lot more stack due to the lack of sharing of stacklots:
 CFLAGS				+= $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;)
 CFLAGS				+= $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-option,-fno-unit-at-a-time); fi ;)
 
 
+# do binutils support CFI?
+cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
+AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
+
+# is .cfi_signal_frame supported too?
+cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
+AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
+
 CFLAGS += $(cflags-y)
 CFLAGS += $(cflags-y)
 
 
 # Default subarch .c files
 # Default subarch .c files

+ 75 - 22
arch/i386/boot/edd.S

@@ -15,42 +15,95 @@
 #include <asm/setup.h>
 #include <asm/setup.h>
 
 
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
 #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
+
+# It is assumed that %ds == INITSEG here
+
 	movb	$0, (EDD_MBR_SIG_NR_BUF)
 	movb	$0, (EDD_MBR_SIG_NR_BUF)
 	movb	$0, (EDDNR)
 	movb	$0, (EDDNR)
 
 
-# Check the command line for two options:
+# Check the command line for options:
 # edd=of  disables EDD completely  (edd=off)
 # edd=of  disables EDD completely  (edd=off)
 # edd=sk  skips the MBR test    (edd=skipmbr)
 # edd=sk  skips the MBR test    (edd=skipmbr)
+# edd=on  re-enables EDD (edd=on)
+
 	pushl	%esi
 	pushl	%esi
-    	cmpl	$0, %cs:cmd_line_ptr
-	jz	done_cl
+	movw	$edd_mbr_sig_start, %di	# Default to edd=on
+
 	movl	%cs:(cmd_line_ptr), %esi
 	movl	%cs:(cmd_line_ptr), %esi
-# ds:esi has the pointer to the command line now
-	movl	$(COMMAND_LINE_SIZE-7), %ecx
-# loop through kernel command line one byte at a time
-cl_loop:
-	cmpl	$EDD_CL_EQUALS, (%si)
+	andl	%esi, %esi
+	jz	old_cl			# Old boot protocol?
+
+# Convert to a real-mode pointer in fs:si
+	movl	%esi, %eax
+	shrl	$4, %eax
+	movw	%ax, %fs
+	andw	$0xf, %si
+	jmp	have_cl_pointer
+
+# Old-style boot protocol?
+old_cl:
+	push	%ds			# aka INITSEG
+	pop	%fs
+
+	cmpw	$0xa33f, (0x20)
+	jne	done_cl			# No command line at all?
+	movw	(0x22), %si		# Pointer relative to INITSEG
+
+# fs:si has the pointer to the command line now
+have_cl_pointer:
+
+# Loop through kernel command line one byte at a time.  Just in
+# case the loader is buggy and failed to null-terminate the command line
+# terminate if we get close enough to the end of the segment that we
+# cannot fit "edd=XX"...
+cl_atspace:
+	cmpw	$-5, %si		# Watch for segment wraparound
+	jae	done_cl
+	movl	%fs:(%si), %eax
+	andb	%al, %al		# End of line?
+	jz	done_cl
+	cmpl	$EDD_CL_EQUALS, %eax
 	jz	found_edd_equals
 	jz	found_edd_equals
-	incl	%esi
-	loop	cl_loop
-	jmp	done_cl
+	cmpb	$0x20, %al		# <= space consider whitespace
+	ja	cl_skipword
+	incw	%si
+	jmp	cl_atspace
+
+cl_skipword:
+	cmpw	$-5, %si		# Watch for segment wraparound
+	jae	done_cl
+	movb	%fs:(%si), %al		# End of string?
+	andb	%al, %al
+	jz	done_cl
+	cmpb	$0x20, %al
+	jbe	cl_atspace
+	incw	%si
+	jmp	cl_skipword
+
 found_edd_equals:
 found_edd_equals:
 # only looking at first two characters after equals
 # only looking at first two characters after equals
-    	addl	$4, %esi
-	cmpw	$EDD_CL_OFF, (%si)	# edd=of
-	jz	do_edd_off
-	cmpw	$EDD_CL_SKIP, (%si)	# edd=sk
-	jz	do_edd_skipmbr
-	jmp	done_cl
+# late overrides early on the command line, so keep going after finding something
+	movw	%fs:4(%si), %ax
+	cmpw	$EDD_CL_OFF, %ax	# edd=of
+	je	do_edd_off
+	cmpw	$EDD_CL_SKIP, %ax	# edd=sk
+	je	do_edd_skipmbr
+	cmpw	$EDD_CL_ON, %ax		# edd=on
+	je	do_edd_on
+	jmp	cl_skipword
 do_edd_skipmbr:
 do_edd_skipmbr:
-    	popl	%esi
-	jmp	edd_start
+	movw	$edd_start, %di
+	jmp	cl_skipword
 do_edd_off:
 do_edd_off:
-	popl	%esi
-	jmp	edd_done
+	movw	$edd_done, %di
+	jmp	cl_skipword
+do_edd_on:
+	movw	$edd_mbr_sig_start, %di
+	jmp	cl_skipword
+
 done_cl:
 done_cl:
 	popl	%esi
 	popl	%esi
-
+	jmpw	*%di
 
 
 # Read the first sector of each BIOS disk device and store the 4-byte signature
 # Read the first sector of each BIOS disk device and store the 4-byte signature
 edd_mbr_sig_start:
 edd_mbr_sig_start:

+ 2 - 2
arch/i386/boot/setup.S

@@ -494,12 +494,12 @@ no_voyager:
 	movw	%cs, %ax			# aka SETUPSEG
 	movw	%cs, %ax			# aka SETUPSEG
 	subw	$DELTA_INITSEG, %ax		# aka INITSEG
 	subw	$DELTA_INITSEG, %ax		# aka INITSEG
 	movw	%ax, %ds
 	movw	%ax, %ds
-	movw	$0, (0x1ff)			# default is no pointing device
+	movb	$0, (0x1ff)			# default is no pointing device
 	int	$0x11				# int 0x11: equipment list
 	int	$0x11				# int 0x11: equipment list
 	testb	$0x04, %al			# check if mouse installed
 	testb	$0x04, %al			# check if mouse installed
 	jz	no_psmouse
 	jz	no_psmouse
 
 
-	movw	$0xAA, (0x1ff)			# device present
+	movb	$0xAA, (0x1ff)			# device present
 no_psmouse:
 no_psmouse:
 
 
 #if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
 #if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)

+ 501 - 562
arch/i386/defconfig

@@ -1,41 +1,51 @@
 #
 #
 # Automatically generated make config: don't edit
 # Automatically generated make config: don't edit
+# Linux kernel version: 2.6.18-git5
+# Tue Sep 26 09:30:47 2006
 #
 #
 CONFIG_X86_32=y
 CONFIG_X86_32=y
+CONFIG_GENERIC_TIME=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_STACKTRACE_SUPPORT=y
 CONFIG_SEMAPHORE_SLEEPERS=y
 CONFIG_SEMAPHORE_SLEEPERS=y
 CONFIG_X86=y
 CONFIG_X86=y
 CONFIG_MMU=y
 CONFIG_MMU=y
 CONFIG_GENERIC_ISA_DMA=y
 CONFIG_GENERIC_ISA_DMA=y
 CONFIG_GENERIC_IOMAP=y
 CONFIG_GENERIC_IOMAP=y
+CONFIG_GENERIC_HWEIGHT=y
 CONFIG_ARCH_MAY_HAVE_PC_FDC=y
 CONFIG_ARCH_MAY_HAVE_PC_FDC=y
 CONFIG_DMI=y
 CONFIG_DMI=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
 
 #
 #
 # Code maturity level options
 # Code maturity level options
 #
 #
 CONFIG_EXPERIMENTAL=y
 CONFIG_EXPERIMENTAL=y
-CONFIG_BROKEN_ON_SMP=y
+CONFIG_LOCK_KERNEL=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_INIT_ENV_ARG_LIMIT=32
 
 
 #
 #
 # General setup
 # General setup
 #
 #
 CONFIG_LOCALVERSION=""
 CONFIG_LOCALVERSION=""
-# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_LOCALVERSION_AUTO=y
 CONFIG_SWAP=y
 CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC=y
-# CONFIG_POSIX_MQUEUE is not set
+CONFIG_POSIX_MQUEUE=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_BSD_PROCESS_ACCT is not set
-CONFIG_SYSCTL=y
+# CONFIG_TASKSTATS is not set
 # CONFIG_AUDIT is not set
 # CONFIG_AUDIT is not set
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_IKCONFIG_PROC=y
+# CONFIG_CPUSETS is not set
+# CONFIG_RELAY is not set
 CONFIG_INITRAMFS_SOURCE=""
 CONFIG_INITRAMFS_SOURCE=""
-CONFIG_UID16=y
-CONFIG_VM86=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 # CONFIG_EMBEDDED is not set
 # CONFIG_EMBEDDED is not set
+CONFIG_UID16=y
+CONFIG_SYSCTL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 # CONFIG_KALLSYMS_EXTRA_PASS is not set
 CONFIG_HOTPLUG=y
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_PRINTK=y
@@ -45,11 +55,9 @@ CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
 CONFIG_FUTEX=y
 CONFIG_EPOLL=y
 CONFIG_EPOLL=y
 CONFIG_SHMEM=y
 CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
 CONFIG_SLAB=y
 CONFIG_SLAB=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_RT_MUTEXES=y
 # CONFIG_TINY_SHMEM is not set
 # CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
 CONFIG_BASE_SMALL=0
 # CONFIG_SLOB is not set
 # CONFIG_SLOB is not set
@@ -60,41 +68,45 @@ CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_OBSOLETE_MODPARM=y
 # CONFIG_MODVERSIONS is not set
 # CONFIG_MODVERSIONS is not set
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 # CONFIG_KMOD is not set
 # CONFIG_KMOD is not set
+CONFIG_STOP_MACHINE=y
 
 
 #
 #
 # Block layer
 # Block layer
 #
 #
-# CONFIG_LBD is not set
+CONFIG_LBD=y
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_LSF is not set
 
 
 #
 #
 # IO Schedulers
 # IO Schedulers
 #
 #
 CONFIG_IOSCHED_NOOP=y
 CONFIG_IOSCHED_NOOP=y
-# CONFIG_IOSCHED_AS is not set
-# CONFIG_IOSCHED_DEADLINE is not set
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
 CONFIG_IOSCHED_CFQ=y
 CONFIG_IOSCHED_CFQ=y
-# CONFIG_DEFAULT_AS is not set
+CONFIG_DEFAULT_AS=y
 # CONFIG_DEFAULT_DEADLINE is not set
 # CONFIG_DEFAULT_DEADLINE is not set
-CONFIG_DEFAULT_CFQ=y
+# CONFIG_DEFAULT_CFQ is not set
 # CONFIG_DEFAULT_NOOP is not set
 # CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="cfq"
+CONFIG_DEFAULT_IOSCHED="anticipatory"
 
 
 #
 #
 # Processor type and features
 # Processor type and features
 #
 #
-CONFIG_X86_PC=y
+CONFIG_SMP=y
+# CONFIG_X86_PC is not set
 # CONFIG_X86_ELAN is not set
 # CONFIG_X86_ELAN is not set
 # CONFIG_X86_VOYAGER is not set
 # CONFIG_X86_VOYAGER is not set
 # CONFIG_X86_NUMAQ is not set
 # CONFIG_X86_NUMAQ is not set
 # CONFIG_X86_SUMMIT is not set
 # CONFIG_X86_SUMMIT is not set
 # CONFIG_X86_BIGSMP is not set
 # CONFIG_X86_BIGSMP is not set
 # CONFIG_X86_VISWS is not set
 # CONFIG_X86_VISWS is not set
-# CONFIG_X86_GENERICARCH is not set
+CONFIG_X86_GENERICARCH=y
 # CONFIG_X86_ES7000 is not set
 # CONFIG_X86_ES7000 is not set
+CONFIG_X86_CYCLONE_TIMER=y
 # CONFIG_M386 is not set
 # CONFIG_M386 is not set
 # CONFIG_M486 is not set
 # CONFIG_M486 is not set
 # CONFIG_M586 is not set
 # CONFIG_M586 is not set
@@ -102,11 +114,11 @@ CONFIG_X86_PC=y
 # CONFIG_M586MMX is not set
 # CONFIG_M586MMX is not set
 # CONFIG_M686 is not set
 # CONFIG_M686 is not set
 # CONFIG_MPENTIUMII is not set
 # CONFIG_MPENTIUMII is not set
-# CONFIG_MPENTIUMIII is not set
+CONFIG_MPENTIUMIII=y
 # CONFIG_MPENTIUMM is not set
 # CONFIG_MPENTIUMM is not set
 # CONFIG_MPENTIUM4 is not set
 # CONFIG_MPENTIUM4 is not set
 # CONFIG_MK6 is not set
 # CONFIG_MK6 is not set
-CONFIG_MK7=y
+# CONFIG_MK7 is not set
 # CONFIG_MK8 is not set
 # CONFIG_MK8 is not set
 # CONFIG_MCRUSOE is not set
 # CONFIG_MCRUSOE is not set
 # CONFIG_MEFFICEON is not set
 # CONFIG_MEFFICEON is not set
@@ -117,10 +129,10 @@ CONFIG_MK7=y
 # CONFIG_MGEODE_LX is not set
 # CONFIG_MGEODE_LX is not set
 # CONFIG_MCYRIXIII is not set
 # CONFIG_MCYRIXIII is not set
 # CONFIG_MVIAC3_2 is not set
 # CONFIG_MVIAC3_2 is not set
-# CONFIG_X86_GENERIC is not set
+CONFIG_X86_GENERIC=y
 CONFIG_X86_CMPXCHG=y
 CONFIG_X86_CMPXCHG=y
 CONFIG_X86_XADD=y
 CONFIG_X86_XADD=y
-CONFIG_X86_L1_CACHE_SHIFT=6
+CONFIG_X86_L1_CACHE_SHIFT=7
 CONFIG_RWSEM_XCHGADD_ALGORITHM=y
 CONFIG_RWSEM_XCHGADD_ALGORITHM=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
 CONFIG_X86_WP_WORKS_OK=y
 CONFIG_X86_WP_WORKS_OK=y
@@ -131,26 +143,28 @@ CONFIG_X86_CMPXCHG64=y
 CONFIG_X86_GOOD_APIC=y
 CONFIG_X86_GOOD_APIC=y
 CONFIG_X86_INTEL_USERCOPY=y
 CONFIG_X86_INTEL_USERCOPY=y
 CONFIG_X86_USE_PPRO_CHECKSUM=y
 CONFIG_X86_USE_PPRO_CHECKSUM=y
-CONFIG_X86_USE_3DNOW=y
 CONFIG_X86_TSC=y
 CONFIG_X86_TSC=y
-# CONFIG_HPET_TIMER is not set
-# CONFIG_SMP is not set
-CONFIG_PREEMPT_NONE=y
-# CONFIG_PREEMPT_VOLUNTARY is not set
+CONFIG_HPET_TIMER=y
+CONFIG_HPET_EMULATE_RTC=y
+CONFIG_NR_CPUS=32
+CONFIG_SCHED_SMT=y
+CONFIG_SCHED_MC=y
+# CONFIG_PREEMPT_NONE is not set
+CONFIG_PREEMPT_VOLUNTARY=y
 # CONFIG_PREEMPT is not set
 # CONFIG_PREEMPT is not set
-CONFIG_X86_UP_APIC=y
-CONFIG_X86_UP_IOAPIC=y
+CONFIG_PREEMPT_BKL=y
 CONFIG_X86_LOCAL_APIC=y
 CONFIG_X86_LOCAL_APIC=y
 CONFIG_X86_IO_APIC=y
 CONFIG_X86_IO_APIC=y
 CONFIG_X86_MCE=y
 CONFIG_X86_MCE=y
 CONFIG_X86_MCE_NONFATAL=y
 CONFIG_X86_MCE_NONFATAL=y
-# CONFIG_X86_MCE_P4THERMAL is not set
+CONFIG_X86_MCE_P4THERMAL=y
+CONFIG_VM86=y
 # CONFIG_TOSHIBA is not set
 # CONFIG_TOSHIBA is not set
 # CONFIG_I8K is not set
 # CONFIG_I8K is not set
 # CONFIG_X86_REBOOTFIXUPS is not set
 # CONFIG_X86_REBOOTFIXUPS is not set
-# CONFIG_MICROCODE is not set
-# CONFIG_X86_MSR is not set
-# CONFIG_X86_CPUID is not set
+CONFIG_MICROCODE=y
+CONFIG_X86_MSR=y
+CONFIG_X86_CPUID=y
 
 
 #
 #
 # Firmware Drivers
 # Firmware Drivers
@@ -158,68 +172,67 @@ CONFIG_X86_MCE_NONFATAL=y
 # CONFIG_EDD is not set
 # CONFIG_EDD is not set
 # CONFIG_DELL_RBU is not set
 # CONFIG_DELL_RBU is not set
 # CONFIG_DCDBAS is not set
 # CONFIG_DCDBAS is not set
-CONFIG_NOHIGHMEM=y
-# CONFIG_HIGHMEM4G is not set
+# CONFIG_NOHIGHMEM is not set
+CONFIG_HIGHMEM4G=y
 # CONFIG_HIGHMEM64G is not set
 # CONFIG_HIGHMEM64G is not set
-CONFIG_VMSPLIT_3G=y
-# CONFIG_VMSPLIT_3G_OPT is not set
-# CONFIG_VMSPLIT_2G is not set
-# CONFIG_VMSPLIT_1G is not set
 CONFIG_PAGE_OFFSET=0xC0000000
 CONFIG_PAGE_OFFSET=0xC0000000
-CONFIG_ARCH_FLATMEM_ENABLE=y
-CONFIG_ARCH_SPARSEMEM_ENABLE=y
-CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_HIGHMEM=y
 CONFIG_SELECT_MEMORY_MODEL=y
 CONFIG_SELECT_MEMORY_MODEL=y
 CONFIG_FLATMEM_MANUAL=y
 CONFIG_FLATMEM_MANUAL=y
 # CONFIG_DISCONTIGMEM_MANUAL is not set
 # CONFIG_DISCONTIGMEM_MANUAL is not set
 # CONFIG_SPARSEMEM_MANUAL is not set
 # CONFIG_SPARSEMEM_MANUAL is not set
 CONFIG_FLATMEM=y
 CONFIG_FLATMEM=y
 CONFIG_FLAT_NODE_MEM_MAP=y
 CONFIG_FLAT_NODE_MEM_MAP=y
-CONFIG_SPARSEMEM_STATIC=y
+# CONFIG_SPARSEMEM_STATIC is not set
 CONFIG_SPLIT_PTLOCK_CPUS=4
 CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_RESOURCES_64BIT=y
+# CONFIG_HIGHPTE is not set
 # CONFIG_MATH_EMULATION is not set
 # CONFIG_MATH_EMULATION is not set
 CONFIG_MTRR=y
 CONFIG_MTRR=y
 # CONFIG_EFI is not set
 # CONFIG_EFI is not set
+# CONFIG_IRQBALANCE is not set
 CONFIG_REGPARM=y
 CONFIG_REGPARM=y
-# CONFIG_SECCOMP is not set
-CONFIG_HZ_100=y
-# CONFIG_HZ_250 is not set
+CONFIG_SECCOMP=y
+# CONFIG_HZ_100 is not set
+CONFIG_HZ_250=y
 # CONFIG_HZ_1000 is not set
 # CONFIG_HZ_1000 is not set
-CONFIG_HZ=100
+CONFIG_HZ=250
 # CONFIG_KEXEC is not set
 # CONFIG_KEXEC is not set
+# CONFIG_CRASH_DUMP is not set
 CONFIG_PHYSICAL_START=0x100000
 CONFIG_PHYSICAL_START=0x100000
-CONFIG_DOUBLEFAULT=y
+# CONFIG_HOTPLUG_CPU is not set
+CONFIG_COMPAT_VDSO=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
 
 
 #
 #
 # Power management options (ACPI, APM)
 # Power management options (ACPI, APM)
 #
 #
 CONFIG_PM=y
 CONFIG_PM=y
-# CONFIG_PM_LEGACY is not set
+CONFIG_PM_LEGACY=y
 # CONFIG_PM_DEBUG is not set
 # CONFIG_PM_DEBUG is not set
-CONFIG_SOFTWARE_SUSPEND=y
-CONFIG_PM_STD_PARTITION=""
 
 
 #
 #
 # ACPI (Advanced Configuration and Power Interface) Support
 # ACPI (Advanced Configuration and Power Interface) Support
 #
 #
 CONFIG_ACPI=y
 CONFIG_ACPI=y
-# CONFIG_ACPI_SLEEP is not set
-# CONFIG_ACPI_AC is not set
-# CONFIG_ACPI_BATTERY is not set
-# CONFIG_ACPI_BUTTON is not set
+CONFIG_ACPI_AC=y
+CONFIG_ACPI_BATTERY=y
+CONFIG_ACPI_BUTTON=y
 # CONFIG_ACPI_VIDEO is not set
 # CONFIG_ACPI_VIDEO is not set
 # CONFIG_ACPI_HOTKEY is not set
 # CONFIG_ACPI_HOTKEY is not set
-# CONFIG_ACPI_FAN is not set
-# CONFIG_ACPI_PROCESSOR is not set
+CONFIG_ACPI_FAN=y
+# CONFIG_ACPI_DOCK is not set
+CONFIG_ACPI_PROCESSOR=y
+CONFIG_ACPI_THERMAL=y
 # CONFIG_ACPI_ASUS is not set
 # CONFIG_ACPI_ASUS is not set
 # CONFIG_ACPI_IBM is not set
 # CONFIG_ACPI_IBM is not set
 # CONFIG_ACPI_TOSHIBA is not set
 # CONFIG_ACPI_TOSHIBA is not set
-CONFIG_ACPI_BLACKLIST_YEAR=0
-# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_BLACKLIST_YEAR=2001
+CONFIG_ACPI_DEBUG=y
 CONFIG_ACPI_EC=y
 CONFIG_ACPI_EC=y
 CONFIG_ACPI_POWER=y
 CONFIG_ACPI_POWER=y
 CONFIG_ACPI_SYSTEM=y
 CONFIG_ACPI_SYSTEM=y
-# CONFIG_X86_PM_TIMER is not set
+CONFIG_X86_PM_TIMER=y
 # CONFIG_ACPI_CONTAINER is not set
 # CONFIG_ACPI_CONTAINER is not set
 
 
 #
 #
@@ -230,7 +243,41 @@ CONFIG_ACPI_SYSTEM=y
 #
 #
 # CPU Frequency scaling
 # CPU Frequency scaling
 #
 #
-# CONFIG_CPU_FREQ is not set
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_TABLE=y
+CONFIG_CPU_FREQ_DEBUG=y
+CONFIG_CPU_FREQ_STAT=y
+# CONFIG_CPU_FREQ_STAT_DETAILS is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+
+#
+# CPUFreq processor drivers
+#
+CONFIG_X86_ACPI_CPUFREQ=y
+# CONFIG_X86_POWERNOW_K6 is not set
+# CONFIG_X86_POWERNOW_K7 is not set
+CONFIG_X86_POWERNOW_K8=y
+CONFIG_X86_POWERNOW_K8_ACPI=y
+# CONFIG_X86_GX_SUSPMOD is not set
+# CONFIG_X86_SPEEDSTEP_CENTRINO is not set
+# CONFIG_X86_SPEEDSTEP_ICH is not set
+# CONFIG_X86_SPEEDSTEP_SMI is not set
+# CONFIG_X86_P4_CLOCKMOD is not set
+# CONFIG_X86_CPUFREQ_NFORCE2 is not set
+# CONFIG_X86_LONGRUN is not set
+# CONFIG_X86_LONGHAUL is not set
+
+#
+# shared options
+#
+CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y
+# CONFIG_X86_SPEEDSTEP_LIB is not set
 
 
 #
 #
 # Bus options (PCI, PCMCIA, EISA, MCA, ISA)
 # Bus options (PCI, PCMCIA, EISA, MCA, ISA)
@@ -244,12 +291,13 @@ CONFIG_PCI_BIOS=y
 CONFIG_PCI_DIRECT=y
 CONFIG_PCI_DIRECT=y
 CONFIG_PCI_MMCONFIG=y
 CONFIG_PCI_MMCONFIG=y
 # CONFIG_PCIEPORTBUS is not set
 # CONFIG_PCIEPORTBUS is not set
-# CONFIG_PCI_MSI is not set
-# CONFIG_PCI_LEGACY_PROC is not set
+CONFIG_PCI_MSI=y
+# CONFIG_PCI_DEBUG is not set
 CONFIG_ISA_DMA_API=y
 CONFIG_ISA_DMA_API=y
 # CONFIG_ISA is not set
 # CONFIG_ISA is not set
 # CONFIG_MCA is not set
 # CONFIG_MCA is not set
 # CONFIG_SCx200 is not set
 # CONFIG_SCx200 is not set
+CONFIG_K8_NB=y
 
 
 #
 #
 # PCCARD (PCMCIA/CardBus) support
 # PCCARD (PCMCIA/CardBus) support
@@ -278,93 +326,54 @@ CONFIG_NET=y
 #
 #
 # CONFIG_NETDEBUG is not set
 # CONFIG_NETDEBUG is not set
 CONFIG_PACKET=y
 CONFIG_PACKET=y
-CONFIG_PACKET_MMAP=y
+# CONFIG_PACKET_MMAP is not set
 CONFIG_UNIX=y
 CONFIG_UNIX=y
+CONFIG_XFRM=y
+# CONFIG_XFRM_USER is not set
+# CONFIG_XFRM_SUB_POLICY is not set
 # CONFIG_NET_KEY is not set
 # CONFIG_NET_KEY is not set
 CONFIG_INET=y
 CONFIG_INET=y
-# CONFIG_IP_MULTICAST is not set
+CONFIG_IP_MULTICAST=y
 # CONFIG_IP_ADVANCED_ROUTER is not set
 # CONFIG_IP_ADVANCED_ROUTER is not set
 CONFIG_IP_FIB_HASH=y
 CONFIG_IP_FIB_HASH=y
-# CONFIG_IP_PNP is not set
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+# CONFIG_IP_PNP_BOOTP is not set
+# CONFIG_IP_PNP_RARP is not set
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPIP is not set
 # CONFIG_NET_IPGRE is not set
 # CONFIG_NET_IPGRE is not set
+# CONFIG_IP_MROUTE is not set
 # CONFIG_ARPD is not set
 # CONFIG_ARPD is not set
 # CONFIG_SYN_COOKIES is not set
 # CONFIG_SYN_COOKIES is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_AH is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_ESP is not set
 # CONFIG_INET_IPCOMP is not set
 # CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
 # CONFIG_INET_TUNNEL is not set
 # CONFIG_INET_TUNNEL is not set
-# CONFIG_INET_DIAG is not set
+CONFIG_INET_XFRM_MODE_TRANSPORT=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
 # CONFIG_TCP_CONG_ADVANCED is not set
 # CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_BIC=y
-
-#
-# IP: Virtual Server Configuration
-#
-# CONFIG_IP_VS is not set
-# CONFIG_IPV6 is not set
-CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_DEBUG is not set
-
-#
-# Core Netfilter Configuration
-#
-# CONFIG_NETFILTER_NETLINK is not set
-CONFIG_NETFILTER_XTABLES=y
-# CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set
-# CONFIG_NETFILTER_XT_TARGET_MARK is not set
-# CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set
-# CONFIG_NETFILTER_XT_MATCH_COMMENT is not set
-# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set
-# CONFIG_NETFILTER_XT_MATCH_DCCP is not set
-# CONFIG_NETFILTER_XT_MATCH_HELPER is not set
-# CONFIG_NETFILTER_XT_MATCH_LENGTH is not set
-CONFIG_NETFILTER_XT_MATCH_LIMIT=y
-CONFIG_NETFILTER_XT_MATCH_MAC=y
-# CONFIG_NETFILTER_XT_MATCH_MARK is not set
-# CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set
-# CONFIG_NETFILTER_XT_MATCH_REALM is not set
-# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
-CONFIG_NETFILTER_XT_MATCH_STATE=y
-# CONFIG_NETFILTER_XT_MATCH_STRING is not set
-# CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set
-
-#
-# IP: Netfilter Configuration
-#
-CONFIG_IP_NF_CONNTRACK=y
-# CONFIG_IP_NF_CT_ACCT is not set
-# CONFIG_IP_NF_CONNTRACK_MARK is not set
-# CONFIG_IP_NF_CONNTRACK_EVENTS is not set
-# CONFIG_IP_NF_CT_PROTO_SCTP is not set
-CONFIG_IP_NF_FTP=y
-# CONFIG_IP_NF_IRC is not set
-# CONFIG_IP_NF_NETBIOS_NS is not set
-# CONFIG_IP_NF_TFTP is not set
-# CONFIG_IP_NF_AMANDA is not set
-# CONFIG_IP_NF_PPTP is not set
-# CONFIG_IP_NF_QUEUE is not set
-CONFIG_IP_NF_IPTABLES=y
-# CONFIG_IP_NF_MATCH_IPRANGE is not set
-# CONFIG_IP_NF_MATCH_MULTIPORT is not set
-# CONFIG_IP_NF_MATCH_TOS is not set
-# CONFIG_IP_NF_MATCH_RECENT is not set
-# CONFIG_IP_NF_MATCH_ECN is not set
-# CONFIG_IP_NF_MATCH_DSCP is not set
-# CONFIG_IP_NF_MATCH_AH_ESP is not set
-# CONFIG_IP_NF_MATCH_TTL is not set
-# CONFIG_IP_NF_MATCH_OWNER is not set
-# CONFIG_IP_NF_MATCH_ADDRTYPE is not set
-# CONFIG_IP_NF_MATCH_HASHLIMIT is not set
-CONFIG_IP_NF_FILTER=y
-# CONFIG_IP_NF_TARGET_REJECT is not set
-CONFIG_IP_NF_TARGET_LOG=y
-# CONFIG_IP_NF_TARGET_ULOG is not set
-# CONFIG_IP_NF_TARGET_TCPMSS is not set
-# CONFIG_IP_NF_NAT is not set
-# CONFIG_IP_NF_MANGLE is not set
-# CONFIG_IP_NF_RAW is not set
-# CONFIG_IP_NF_ARPTABLES is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+CONFIG_IPV6=y
+# CONFIG_IPV6_PRIVACY is not set
+# CONFIG_IPV6_ROUTER_PREF is not set
+# CONFIG_INET6_AH is not set
+# CONFIG_INET6_ESP is not set
+# CONFIG_INET6_IPCOMP is not set
+# CONFIG_IPV6_MIP6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
+CONFIG_INET6_XFRM_MODE_TRANSPORT=y
+CONFIG_INET6_XFRM_MODE_TUNNEL=y
+# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
+# CONFIG_IPV6_TUNNEL is not set
+# CONFIG_IPV6_SUBTREES is not set
+# CONFIG_IPV6_MULTIPLE_TABLES is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
 
 
 #
 #
 # DCCP Configuration (EXPERIMENTAL)
 # DCCP Configuration (EXPERIMENTAL)
@@ -389,7 +398,6 @@ CONFIG_IP_NF_TARGET_LOG=y
 # CONFIG_ATALK is not set
 # CONFIG_ATALK is not set
 # CONFIG_X25 is not set
 # CONFIG_X25 is not set
 # CONFIG_LAPB is not set
 # CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
 # CONFIG_ECONET is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
 # CONFIG_WAN_ROUTER is not set
 
 
@@ -402,6 +410,7 @@ CONFIG_IP_NF_TARGET_LOG=y
 # Network testing
 # Network testing
 #
 #
 # CONFIG_NET_PKTGEN is not set
 # CONFIG_NET_PKTGEN is not set
+# CONFIG_NET_TCPPROBE is not set
 # CONFIG_HAMRADIO is not set
 # CONFIG_HAMRADIO is not set
 # CONFIG_IRDA is not set
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
 # CONFIG_BT is not set
@@ -416,7 +425,9 @@ CONFIG_IP_NF_TARGET_LOG=y
 #
 #
 CONFIG_STANDALONE=y
 CONFIG_STANDALONE=y
 CONFIG_PREVENT_FIRMWARE_BUILD=y
 CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
+CONFIG_FW_LOADER=y
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_SYS_HYPERVISOR is not set
 
 
 #
 #
 # Connector - unified userspace <-> kernelspace linker
 # Connector - unified userspace <-> kernelspace linker
@@ -431,13 +442,7 @@ CONFIG_PREVENT_FIRMWARE_BUILD=y
 #
 #
 # Parallel port support
 # Parallel port support
 #
 #
-CONFIG_PARPORT=y
-CONFIG_PARPORT_PC=y
-# CONFIG_PARPORT_SERIAL is not set
-# CONFIG_PARPORT_PC_FIFO is not set
-# CONFIG_PARPORT_PC_SUPERIO is not set
-# CONFIG_PARPORT_GSC is not set
-CONFIG_PARPORT_1284=y
+# CONFIG_PARPORT is not set
 
 
 #
 #
 # Plug and Play support
 # Plug and Play support
@@ -447,8 +452,7 @@ CONFIG_PARPORT_1284=y
 #
 #
 # Block devices
 # Block devices
 #
 #
-# CONFIG_BLK_DEV_FD is not set
-# CONFIG_PARIDE is not set
+CONFIG_BLK_DEV_FD=y
 # CONFIG_BLK_CPQ_DA is not set
 # CONFIG_BLK_CPQ_DA is not set
 # CONFIG_BLK_CPQ_CISS_DA is not set
 # CONFIG_BLK_CPQ_CISS_DA is not set
 # CONFIG_BLK_DEV_DAC960 is not set
 # CONFIG_BLK_DEV_DAC960 is not set
@@ -459,8 +463,11 @@ CONFIG_BLK_DEV_LOOP=y
 # CONFIG_BLK_DEV_NBD is not set
 # CONFIG_BLK_DEV_NBD is not set
 # CONFIG_BLK_DEV_SX8 is not set
 # CONFIG_BLK_DEV_SX8 is not set
 # CONFIG_BLK_DEV_UB is not set
 # CONFIG_BLK_DEV_UB is not set
-# CONFIG_BLK_DEV_RAM is not set
+CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=16
 CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
+CONFIG_BLK_DEV_INITRD=y
 # CONFIG_CDROM_PKTCDVD is not set
 # CONFIG_CDROM_PKTCDVD is not set
 # CONFIG_ATA_OVER_ETH is not set
 # CONFIG_ATA_OVER_ETH is not set
 
 
@@ -476,7 +483,7 @@ CONFIG_BLK_DEV_IDE=y
 # CONFIG_BLK_DEV_IDE_SATA is not set
 # CONFIG_BLK_DEV_IDE_SATA is not set
 # CONFIG_BLK_DEV_HD_IDE is not set
 # CONFIG_BLK_DEV_HD_IDE is not set
 CONFIG_BLK_DEV_IDEDISK=y
 CONFIG_BLK_DEV_IDEDISK=y
-# CONFIG_IDEDISK_MULTI_MODE is not set
+CONFIG_IDEDISK_MULTI_MODE=y
 CONFIG_BLK_DEV_IDECD=y
 CONFIG_BLK_DEV_IDECD=y
 # CONFIG_BLK_DEV_IDETAPE is not set
 # CONFIG_BLK_DEV_IDETAPE is not set
 # CONFIG_BLK_DEV_IDEFLOPPY is not set
 # CONFIG_BLK_DEV_IDEFLOPPY is not set
@@ -486,10 +493,10 @@ CONFIG_BLK_DEV_IDECD=y
 #
 #
 # IDE chipset support/bugfixes
 # IDE chipset support/bugfixes
 #
 #
-# CONFIG_IDE_GENERIC is not set
+CONFIG_IDE_GENERIC=y
 # CONFIG_BLK_DEV_CMD640 is not set
 # CONFIG_BLK_DEV_CMD640 is not set
 CONFIG_BLK_DEV_IDEPCI=y
 CONFIG_BLK_DEV_IDEPCI=y
-CONFIG_IDEPCI_SHARE_IRQ=y
+# CONFIG_IDEPCI_SHARE_IRQ is not set
 # CONFIG_BLK_DEV_OFFBOARD is not set
 # CONFIG_BLK_DEV_OFFBOARD is not set
 # CONFIG_BLK_DEV_GENERIC is not set
 # CONFIG_BLK_DEV_GENERIC is not set
 # CONFIG_BLK_DEV_OPTI621 is not set
 # CONFIG_BLK_DEV_OPTI621 is not set
@@ -500,7 +507,7 @@ CONFIG_IDEDMA_PCI_AUTO=y
 # CONFIG_IDEDMA_ONLYDISK is not set
 # CONFIG_IDEDMA_ONLYDISK is not set
 # CONFIG_BLK_DEV_AEC62XX is not set
 # CONFIG_BLK_DEV_AEC62XX is not set
 # CONFIG_BLK_DEV_ALI15X3 is not set
 # CONFIG_BLK_DEV_ALI15X3 is not set
-# CONFIG_BLK_DEV_AMD74XX is not set
+CONFIG_BLK_DEV_AMD74XX=y
 # CONFIG_BLK_DEV_ATIIXP is not set
 # CONFIG_BLK_DEV_ATIIXP is not set
 # CONFIG_BLK_DEV_CMD64X is not set
 # CONFIG_BLK_DEV_CMD64X is not set
 # CONFIG_BLK_DEV_TRIFLEX is not set
 # CONFIG_BLK_DEV_TRIFLEX is not set
@@ -511,7 +518,7 @@ CONFIG_IDEDMA_PCI_AUTO=y
 # CONFIG_BLK_DEV_HPT34X is not set
 # CONFIG_BLK_DEV_HPT34X is not set
 # CONFIG_BLK_DEV_HPT366 is not set
 # CONFIG_BLK_DEV_HPT366 is not set
 # CONFIG_BLK_DEV_SC1200 is not set
 # CONFIG_BLK_DEV_SC1200 is not set
-# CONFIG_BLK_DEV_PIIX is not set
+CONFIG_BLK_DEV_PIIX=y
 # CONFIG_BLK_DEV_IT821X is not set
 # CONFIG_BLK_DEV_IT821X is not set
 # CONFIG_BLK_DEV_NS87415 is not set
 # CONFIG_BLK_DEV_NS87415 is not set
 # CONFIG_BLK_DEV_PDC202XX_OLD is not set
 # CONFIG_BLK_DEV_PDC202XX_OLD is not set
@@ -521,7 +528,7 @@ CONFIG_IDEDMA_PCI_AUTO=y
 # CONFIG_BLK_DEV_SIS5513 is not set
 # CONFIG_BLK_DEV_SIS5513 is not set
 # CONFIG_BLK_DEV_SLC90E66 is not set
 # CONFIG_BLK_DEV_SLC90E66 is not set
 # CONFIG_BLK_DEV_TRM290 is not set
 # CONFIG_BLK_DEV_TRM290 is not set
-CONFIG_BLK_DEV_VIA82CXXX=y
+# CONFIG_BLK_DEV_VIA82CXXX is not set
 # CONFIG_IDE_ARM is not set
 # CONFIG_IDE_ARM is not set
 CONFIG_BLK_DEV_IDEDMA=y
 CONFIG_BLK_DEV_IDEDMA=y
 # CONFIG_IDEDMA_IVB is not set
 # CONFIG_IDEDMA_IVB is not set
@@ -533,6 +540,7 @@ CONFIG_IDEDMA_AUTO=y
 #
 #
 # CONFIG_RAID_ATTRS is not set
 # CONFIG_RAID_ATTRS is not set
 CONFIG_SCSI=y
 CONFIG_SCSI=y
+CONFIG_SCSI_NETLINK=y
 # CONFIG_SCSI_PROC_FS is not set
 # CONFIG_SCSI_PROC_FS is not set
 
 
 #
 #
@@ -541,8 +549,9 @@ CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SD=y
 # CONFIG_CHR_DEV_ST is not set
 # CONFIG_CHR_DEV_ST is not set
 # CONFIG_CHR_DEV_OSST is not set
 # CONFIG_CHR_DEV_OSST is not set
-# CONFIG_BLK_DEV_SR is not set
-# CONFIG_CHR_DEV_SG is not set
+CONFIG_BLK_DEV_SR=y
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_CHR_DEV_SG=y
 # CONFIG_CHR_DEV_SCH is not set
 # CONFIG_CHR_DEV_SCH is not set
 
 
 #
 #
@@ -553,29 +562,44 @@ CONFIG_BLK_DEV_SD=y
 # CONFIG_SCSI_LOGGING is not set
 # CONFIG_SCSI_LOGGING is not set
 
 
 #
 #
-# SCSI Transport Attributes
+# SCSI Transports
 #
 #
-# CONFIG_SCSI_SPI_ATTRS is not set
-# CONFIG_SCSI_FC_ATTRS is not set
+CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_SCSI_FC_ATTRS=y
 # CONFIG_SCSI_ISCSI_ATTRS is not set
 # CONFIG_SCSI_ISCSI_ATTRS is not set
 # CONFIG_SCSI_SAS_ATTRS is not set
 # CONFIG_SCSI_SAS_ATTRS is not set
+# CONFIG_SCSI_SAS_LIBSAS is not set
 
 
 #
 #
 # SCSI low-level drivers
 # SCSI low-level drivers
 #
 #
 # CONFIG_ISCSI_TCP is not set
 # CONFIG_ISCSI_TCP is not set
-# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
+CONFIG_BLK_DEV_3W_XXXX_RAID=y
 # CONFIG_SCSI_3W_9XXX is not set
 # CONFIG_SCSI_3W_9XXX is not set
 # CONFIG_SCSI_ACARD is not set
 # CONFIG_SCSI_ACARD is not set
 # CONFIG_SCSI_AACRAID is not set
 # CONFIG_SCSI_AACRAID is not set
-# CONFIG_SCSI_AIC7XXX is not set
+CONFIG_SCSI_AIC7XXX=y
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=32
+CONFIG_AIC7XXX_RESET_DELAY_MS=5000
+CONFIG_AIC7XXX_DEBUG_ENABLE=y
+CONFIG_AIC7XXX_DEBUG_MASK=0
+CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
 # CONFIG_SCSI_AIC7XXX_OLD is not set
 # CONFIG_SCSI_AIC7XXX_OLD is not set
-# CONFIG_SCSI_AIC79XX is not set
+CONFIG_SCSI_AIC79XX=y
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_RESET_DELAY_MS=4000
+# CONFIG_AIC79XX_ENABLE_RD_STRM is not set
+# CONFIG_AIC79XX_DEBUG_ENABLE is not set
+CONFIG_AIC79XX_DEBUG_MASK=0
+# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
+# CONFIG_SCSI_AIC94XX is not set
 # CONFIG_SCSI_DPT_I2O is not set
 # CONFIG_SCSI_DPT_I2O is not set
+# CONFIG_SCSI_ADVANSYS is not set
+# CONFIG_SCSI_ARCMSR is not set
 # CONFIG_MEGARAID_NEWGEN is not set
 # CONFIG_MEGARAID_NEWGEN is not set
 # CONFIG_MEGARAID_LEGACY is not set
 # CONFIG_MEGARAID_LEGACY is not set
 # CONFIG_MEGARAID_SAS is not set
 # CONFIG_MEGARAID_SAS is not set
-# CONFIG_SCSI_SATA is not set
+# CONFIG_SCSI_HPTIOP is not set
 # CONFIG_SCSI_BUSLOGIC is not set
 # CONFIG_SCSI_BUSLOGIC is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_DMX3191D is not set
 # CONFIG_SCSI_EATA is not set
 # CONFIG_SCSI_EATA is not set
@@ -584,11 +608,9 @@ CONFIG_BLK_DEV_SD=y
 # CONFIG_SCSI_IPS is not set
 # CONFIG_SCSI_IPS is not set
 # CONFIG_SCSI_INITIO is not set
 # CONFIG_SCSI_INITIO is not set
 # CONFIG_SCSI_INIA100 is not set
 # CONFIG_SCSI_INIA100 is not set
-# CONFIG_SCSI_PPA is not set
-# CONFIG_SCSI_IMM is not set
+# CONFIG_SCSI_STEX is not set
 # CONFIG_SCSI_SYM53C8XX_2 is not set
 # CONFIG_SCSI_SYM53C8XX_2 is not set
 # CONFIG_SCSI_IPR is not set
 # CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_FC is not set
 # CONFIG_SCSI_QLOGIC_1280 is not set
 # CONFIG_SCSI_QLOGIC_1280 is not set
 # CONFIG_SCSI_QLA_FC is not set
 # CONFIG_SCSI_QLA_FC is not set
 # CONFIG_SCSI_LPFC is not set
 # CONFIG_SCSI_LPFC is not set
@@ -597,23 +619,115 @@ CONFIG_BLK_DEV_SD=y
 # CONFIG_SCSI_NSP32 is not set
 # CONFIG_SCSI_NSP32 is not set
 # CONFIG_SCSI_DEBUG is not set
 # CONFIG_SCSI_DEBUG is not set
 
 
+#
+# Serial ATA (prod) and Parallel ATA (experimental) drivers
+#
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_SVW=y
+CONFIG_ATA_PIIX=y
+# CONFIG_SATA_MV is not set
+CONFIG_SATA_NV=y
+# CONFIG_PDC_ADMA is not set
+# CONFIG_SATA_QSTOR is not set
+# CONFIG_SATA_PROMISE is not set
+# CONFIG_SATA_SX4 is not set
+CONFIG_SATA_SIL=y
+# CONFIG_SATA_SIL24 is not set
+# CONFIG_SATA_SIS is not set
+# CONFIG_SATA_ULI is not set
+CONFIG_SATA_VIA=y
+# CONFIG_SATA_VITESSE is not set
+CONFIG_SATA_INTEL_COMBINED=y
+# CONFIG_PATA_ALI is not set
+# CONFIG_PATA_AMD is not set
+# CONFIG_PATA_ARTOP is not set
+# CONFIG_PATA_ATIIXP is not set
+# CONFIG_PATA_CMD64X is not set
+# CONFIG_PATA_CS5520 is not set
+# CONFIG_PATA_CS5530 is not set
+# CONFIG_PATA_CS5535 is not set
+# CONFIG_PATA_CYPRESS is not set
+# CONFIG_PATA_EFAR is not set
+# CONFIG_ATA_GENERIC is not set
+# CONFIG_PATA_HPT366 is not set
+# CONFIG_PATA_HPT37X is not set
+# CONFIG_PATA_HPT3X2N is not set
+# CONFIG_PATA_HPT3X3 is not set
+# CONFIG_PATA_IT821X is not set
+# CONFIG_PATA_JMICRON is not set
+# CONFIG_PATA_LEGACY is not set
+# CONFIG_PATA_TRIFLEX is not set
+# CONFIG_PATA_MPIIX is not set
+# CONFIG_PATA_OLDPIIX is not set
+# CONFIG_PATA_NETCELL is not set
+# CONFIG_PATA_NS87410 is not set
+# CONFIG_PATA_OPTI is not set
+# CONFIG_PATA_OPTIDMA is not set
+# CONFIG_PATA_PDC_OLD is not set
+# CONFIG_PATA_QDI is not set
+# CONFIG_PATA_RADISYS is not set
+# CONFIG_PATA_RZ1000 is not set
+# CONFIG_PATA_SC1200 is not set
+# CONFIG_PATA_SERVERWORKS is not set
+# CONFIG_PATA_PDC2027X is not set
+# CONFIG_PATA_SIL680 is not set
+# CONFIG_PATA_SIS is not set
+# CONFIG_PATA_VIA is not set
+# CONFIG_PATA_WINBOND is not set
+
 #
 #
 # Multi-device support (RAID and LVM)
 # Multi-device support (RAID and LVM)
 #
 #
-# CONFIG_MD is not set
+CONFIG_MD=y
+# CONFIG_BLK_DEV_MD is not set
+CONFIG_BLK_DEV_DM=y
+# CONFIG_DM_CRYPT is not set
+# CONFIG_DM_SNAPSHOT is not set
+# CONFIG_DM_MIRROR is not set
+# CONFIG_DM_ZERO is not set
+# CONFIG_DM_MULTIPATH is not set
 
 
 #
 #
 # Fusion MPT device support
 # Fusion MPT device support
 #
 #
-# CONFIG_FUSION is not set
-# CONFIG_FUSION_SPI is not set
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=y
 # CONFIG_FUSION_FC is not set
 # CONFIG_FUSION_FC is not set
 # CONFIG_FUSION_SAS is not set
 # CONFIG_FUSION_SAS is not set
+CONFIG_FUSION_MAX_SGE=128
+# CONFIG_FUSION_CTL is not set
 
 
 #
 #
 # IEEE 1394 (FireWire) support
 # IEEE 1394 (FireWire) support
 #
 #
-# CONFIG_IEEE1394 is not set
+CONFIG_IEEE1394=y
+
+#
+# Subsystem Options
+#
+# CONFIG_IEEE1394_VERBOSEDEBUG is not set
+# CONFIG_IEEE1394_OUI_DB is not set
+# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
+# CONFIG_IEEE1394_EXPORT_FULL_API is not set
+
+#
+# Device Drivers
+#
+
+#
+# Texas Instruments PCILynx requires I2C
+#
+CONFIG_IEEE1394_OHCI1394=y
+
+#
+# Protocol Drivers
+#
+# CONFIG_IEEE1394_VIDEO1394 is not set
+# CONFIG_IEEE1394_SBP2 is not set
+# CONFIG_IEEE1394_ETH1394 is not set
+# CONFIG_IEEE1394_DV1394 is not set
+CONFIG_IEEE1394_RAWIO=y
 
 
 #
 #
 # I2O device support
 # I2O device support
@@ -652,46 +766,63 @@ CONFIG_MII=y
 #
 #
 # Tulip family network device support
 # Tulip family network device support
 #
 #
-# CONFIG_NET_TULIP is not set
+CONFIG_NET_TULIP=y
+# CONFIG_DE2104X is not set
+CONFIG_TULIP=y
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_TULIP_NAPI is not set
+# CONFIG_DE4X5 is not set
+# CONFIG_WINBOND_840 is not set
+# CONFIG_DM9102 is not set
+# CONFIG_ULI526X is not set
 # CONFIG_HP100 is not set
 # CONFIG_HP100 is not set
 CONFIG_NET_PCI=y
 CONFIG_NET_PCI=y
 # CONFIG_PCNET32 is not set
 # CONFIG_PCNET32 is not set
 # CONFIG_AMD8111_ETH is not set
 # CONFIG_AMD8111_ETH is not set
 # CONFIG_ADAPTEC_STARFIRE is not set
 # CONFIG_ADAPTEC_STARFIRE is not set
-# CONFIG_B44 is not set
-# CONFIG_FORCEDETH is not set
+CONFIG_B44=y
+CONFIG_FORCEDETH=y
+# CONFIG_FORCEDETH_NAPI is not set
 # CONFIG_DGRS is not set
 # CONFIG_DGRS is not set
 # CONFIG_EEPRO100 is not set
 # CONFIG_EEPRO100 is not set
 CONFIG_E100=y
 CONFIG_E100=y
 # CONFIG_FEALNX is not set
 # CONFIG_FEALNX is not set
 # CONFIG_NATSEMI is not set
 # CONFIG_NATSEMI is not set
 # CONFIG_NE2K_PCI is not set
 # CONFIG_NE2K_PCI is not set
-# CONFIG_8139CP is not set
-# CONFIG_8139TOO is not set
+CONFIG_8139CP=y
+CONFIG_8139TOO=y
+# CONFIG_8139TOO_PIO is not set
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+# CONFIG_8139TOO_8129 is not set
+# CONFIG_8139_OLD_RX_RESET is not set
 # CONFIG_SIS900 is not set
 # CONFIG_SIS900 is not set
 # CONFIG_EPIC100 is not set
 # CONFIG_EPIC100 is not set
 # CONFIG_SUNDANCE is not set
 # CONFIG_SUNDANCE is not set
 # CONFIG_TLAN is not set
 # CONFIG_TLAN is not set
 # CONFIG_VIA_RHINE is not set
 # CONFIG_VIA_RHINE is not set
-# CONFIG_NET_POCKET is not set
 
 
 #
 #
 # Ethernet (1000 Mbit)
 # Ethernet (1000 Mbit)
 #
 #
 # CONFIG_ACENIC is not set
 # CONFIG_ACENIC is not set
 # CONFIG_DL2K is not set
 # CONFIG_DL2K is not set
-# CONFIG_E1000 is not set
+CONFIG_E1000=y
+# CONFIG_E1000_NAPI is not set
+# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
 # CONFIG_NS83820 is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
 # CONFIG_YELLOWFIN is not set
-# CONFIG_R8169 is not set
+CONFIG_R8169=y
+# CONFIG_R8169_NAPI is not set
 # CONFIG_SIS190 is not set
 # CONFIG_SIS190 is not set
 # CONFIG_SKGE is not set
 # CONFIG_SKGE is not set
-# CONFIG_SKY2 is not set
+CONFIG_SKY2=y
 # CONFIG_SK98LIN is not set
 # CONFIG_SK98LIN is not set
 # CONFIG_VIA_VELOCITY is not set
 # CONFIG_VIA_VELOCITY is not set
-# CONFIG_TIGON3 is not set
-# CONFIG_BNX2 is not set
+CONFIG_TIGON3=y
+CONFIG_BNX2=y
+# CONFIG_QLA3XXX is not set
 
 
 #
 #
 # Ethernet (10000 Mbit)
 # Ethernet (10000 Mbit)
@@ -699,6 +830,7 @@ CONFIG_E100=y
 # CONFIG_CHELSIO_T1 is not set
 # CONFIG_CHELSIO_T1 is not set
 # CONFIG_IXGB is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
 # CONFIG_S2IO is not set
+# CONFIG_MYRI10GE is not set
 
 
 #
 #
 # Token Ring devices
 # Token Ring devices
@@ -716,14 +848,15 @@ CONFIG_E100=y
 # CONFIG_WAN is not set
 # CONFIG_WAN is not set
 # CONFIG_FDDI is not set
 # CONFIG_FDDI is not set
 # CONFIG_HIPPI is not set
 # CONFIG_HIPPI is not set
-# CONFIG_PLIP is not set
 # CONFIG_PPP is not set
 # CONFIG_PPP is not set
 # CONFIG_SLIP is not set
 # CONFIG_SLIP is not set
 # CONFIG_NET_FC is not set
 # CONFIG_NET_FC is not set
 # CONFIG_SHAPER is not set
 # CONFIG_SHAPER is not set
-# CONFIG_NETCONSOLE is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
+CONFIG_NETCONSOLE=y
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_RX is not set
+# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NET_POLL_CONTROLLER=y
 
 
 #
 #
 # ISDN subsystem
 # ISDN subsystem
@@ -745,8 +878,8 @@ CONFIG_INPUT=y
 #
 #
 CONFIG_INPUT_MOUSEDEV=y
 CONFIG_INPUT_MOUSEDEV=y
 CONFIG_INPUT_MOUSEDEV_PSAUX=y
 CONFIG_INPUT_MOUSEDEV_PSAUX=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1280
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
 # CONFIG_INPUT_JOYDEV is not set
 # CONFIG_INPUT_JOYDEV is not set
 # CONFIG_INPUT_TSDEV is not set
 # CONFIG_INPUT_TSDEV is not set
 CONFIG_INPUT_EVDEV=y
 CONFIG_INPUT_EVDEV=y
@@ -776,7 +909,6 @@ CONFIG_SERIO=y
 CONFIG_SERIO_I8042=y
 CONFIG_SERIO_I8042=y
 # CONFIG_SERIO_SERPORT is not set
 # CONFIG_SERIO_SERPORT is not set
 # CONFIG_SERIO_CT82C710 is not set
 # CONFIG_SERIO_CT82C710 is not set
-# CONFIG_SERIO_PARKBD is not set
 # CONFIG_SERIO_PCIPS2 is not set
 # CONFIG_SERIO_PCIPS2 is not set
 CONFIG_SERIO_LIBPS2=y
 CONFIG_SERIO_LIBPS2=y
 # CONFIG_SERIO_RAW is not set
 # CONFIG_SERIO_RAW is not set
@@ -788,14 +920,15 @@ CONFIG_SERIO_LIBPS2=y
 CONFIG_VT=y
 CONFIG_VT=y
 CONFIG_VT_CONSOLE=y
 CONFIG_VT_CONSOLE=y
 CONFIG_HW_CONSOLE=y
 CONFIG_HW_CONSOLE=y
+# CONFIG_VT_HW_CONSOLE_BINDING is not set
 # CONFIG_SERIAL_NONSTANDARD is not set
 # CONFIG_SERIAL_NONSTANDARD is not set
 
 
 #
 #
 # Serial drivers
 # Serial drivers
 #
 #
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250=y
-# CONFIG_SERIAL_8250_CONSOLE is not set
-# CONFIG_SERIAL_8250_ACPI is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_PCI=y
 CONFIG_SERIAL_8250_NR_UARTS=4
 CONFIG_SERIAL_8250_NR_UARTS=4
 CONFIG_SERIAL_8250_RUNTIME_UARTS=4
 CONFIG_SERIAL_8250_RUNTIME_UARTS=4
 # CONFIG_SERIAL_8250_EXTENDED is not set
 # CONFIG_SERIAL_8250_EXTENDED is not set
@@ -804,14 +937,11 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=4
 # Non-8250 serial port support
 # Non-8250 serial port support
 #
 #
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
 # CONFIG_SERIAL_JSM is not set
 # CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 CONFIG_UNIX98_PTYS=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTYS=y
 CONFIG_LEGACY_PTY_COUNT=256
 CONFIG_LEGACY_PTY_COUNT=256
-CONFIG_PRINTER=y
-# CONFIG_LP_CONSOLE is not set
-# CONFIG_PPDEV is not set
-# CONFIG_TIPAR is not set
 
 
 #
 #
 # IPMI
 # IPMI
@@ -822,8 +952,12 @@ CONFIG_PRINTER=y
 # Watchdog Cards
 # Watchdog Cards
 #
 #
 # CONFIG_WATCHDOG is not set
 # CONFIG_WATCHDOG is not set
-# CONFIG_HW_RANDOM is not set
-CONFIG_NVRAM=y
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_INTEL=y
+CONFIG_HW_RANDOM_AMD=y
+CONFIG_HW_RANDOM_GEODE=y
+CONFIG_HW_RANDOM_VIA=y
+# CONFIG_NVRAM is not set
 CONFIG_RTC=y
 CONFIG_RTC=y
 # CONFIG_DTLK is not set
 # CONFIG_DTLK is not set
 # CONFIG_R3964 is not set
 # CONFIG_R3964 is not set
@@ -833,31 +967,28 @@ CONFIG_RTC=y
 #
 #
 # Ftape, the floppy tape device driver
 # Ftape, the floppy tape device driver
 #
 #
-# CONFIG_FTAPE is not set
 CONFIG_AGP=y
 CONFIG_AGP=y
 # CONFIG_AGP_ALI is not set
 # CONFIG_AGP_ALI is not set
 # CONFIG_AGP_ATI is not set
 # CONFIG_AGP_ATI is not set
 # CONFIG_AGP_AMD is not set
 # CONFIG_AGP_AMD is not set
-# CONFIG_AGP_AMD64 is not set
-# CONFIG_AGP_INTEL is not set
+CONFIG_AGP_AMD64=y
+CONFIG_AGP_INTEL=y
 # CONFIG_AGP_NVIDIA is not set
 # CONFIG_AGP_NVIDIA is not set
 # CONFIG_AGP_SIS is not set
 # CONFIG_AGP_SIS is not set
 # CONFIG_AGP_SWORKS is not set
 # CONFIG_AGP_SWORKS is not set
-CONFIG_AGP_VIA=y
+# CONFIG_AGP_VIA is not set
 # CONFIG_AGP_EFFICEON is not set
 # CONFIG_AGP_EFFICEON is not set
-CONFIG_DRM=y
-# CONFIG_DRM_TDFX is not set
-# CONFIG_DRM_R128 is not set
-CONFIG_DRM_RADEON=y
-# CONFIG_DRM_MGA is not set
-# CONFIG_DRM_SIS is not set
-# CONFIG_DRM_VIA is not set
-# CONFIG_DRM_SAVAGE is not set
+# CONFIG_DRM is not set
 # CONFIG_MWAVE is not set
 # CONFIG_MWAVE is not set
+# CONFIG_PC8736x_GPIO is not set
+# CONFIG_NSC_GPIO is not set
 # CONFIG_CS5535_GPIO is not set
 # CONFIG_CS5535_GPIO is not set
-# CONFIG_RAW_DRIVER is not set
-# CONFIG_HPET is not set
-# CONFIG_HANGCHECK_TIMER is not set
+CONFIG_RAW_DRIVER=y
+CONFIG_MAX_RAW_DEVS=256
+CONFIG_HPET=y
+# CONFIG_HPET_RTC_IRQ is not set
+CONFIG_HPET_MMAP=y
+CONFIG_HANGCHECK_TIMER=y
 
 
 #
 #
 # TPM devices
 # TPM devices
@@ -868,59 +999,7 @@ CONFIG_DRM_RADEON=y
 #
 #
 # I2C support
 # I2C support
 #
 #
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-
-#
-# I2C Algorithms
-#
-CONFIG_I2C_ALGOBIT=y
-# CONFIG_I2C_ALGOPCF is not set
-# CONFIG_I2C_ALGOPCA is not set
-
-#
-# I2C Hardware Bus support
-#
-# CONFIG_I2C_ALI1535 is not set
-# CONFIG_I2C_ALI1563 is not set
-# CONFIG_I2C_ALI15X3 is not set
-# CONFIG_I2C_AMD756 is not set
-# CONFIG_I2C_AMD8111 is not set
-# CONFIG_I2C_I801 is not set
-# CONFIG_I2C_I810 is not set
-# CONFIG_I2C_PIIX4 is not set
-CONFIG_I2C_ISA=y
-# CONFIG_I2C_NFORCE2 is not set
-# CONFIG_I2C_PARPORT is not set
-# CONFIG_I2C_PARPORT_LIGHT is not set
-# CONFIG_I2C_PROSAVAGE is not set
-# CONFIG_I2C_SAVAGE4 is not set
-# CONFIG_SCx200_ACB is not set
-# CONFIG_I2C_SIS5595 is not set
-# CONFIG_I2C_SIS630 is not set
-# CONFIG_I2C_SIS96X is not set
-# CONFIG_I2C_STUB is not set
-# CONFIG_I2C_VIA is not set
-CONFIG_I2C_VIAPRO=y
-# CONFIG_I2C_VOODOO3 is not set
-# CONFIG_I2C_PCA_ISA is not set
-
-#
-# Miscellaneous I2C Chip support
-#
-# CONFIG_SENSORS_DS1337 is not set
-# CONFIG_SENSORS_DS1374 is not set
-# CONFIG_SENSORS_EEPROM is not set
-# CONFIG_SENSORS_PCF8574 is not set
-# CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
-# CONFIG_SENSORS_RTC8564 is not set
-# CONFIG_SENSORS_MAX6875 is not set
-# CONFIG_RTC_X1205_I2C is not set
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# CONFIG_I2C_DEBUG_CHIP is not set
+# CONFIG_I2C is not set
 
 
 #
 #
 # SPI support
 # SPI support
@@ -931,169 +1010,44 @@ CONFIG_I2C_VIAPRO=y
 #
 #
 # Dallas's 1-wire bus
 # Dallas's 1-wire bus
 #
 #
-# CONFIG_W1 is not set
 
 
 #
 #
 # Hardware Monitoring support
 # Hardware Monitoring support
 #
 #
-CONFIG_HWMON=y
-CONFIG_HWMON_VID=y
-# CONFIG_SENSORS_ADM1021 is not set
-# CONFIG_SENSORS_ADM1025 is not set
-# CONFIG_SENSORS_ADM1026 is not set
-# CONFIG_SENSORS_ADM1031 is not set
-# CONFIG_SENSORS_ADM9240 is not set
-# CONFIG_SENSORS_ASB100 is not set
-# CONFIG_SENSORS_ATXP1 is not set
-# CONFIG_SENSORS_DS1621 is not set
-# CONFIG_SENSORS_F71805F is not set
-# CONFIG_SENSORS_FSCHER is not set
-# CONFIG_SENSORS_FSCPOS is not set
-# CONFIG_SENSORS_GL518SM is not set
-# CONFIG_SENSORS_GL520SM is not set
-CONFIG_SENSORS_IT87=y
-# CONFIG_SENSORS_LM63 is not set
-# CONFIG_SENSORS_LM75 is not set
-# CONFIG_SENSORS_LM77 is not set
-# CONFIG_SENSORS_LM78 is not set
-# CONFIG_SENSORS_LM80 is not set
-# CONFIG_SENSORS_LM83 is not set
-# CONFIG_SENSORS_LM85 is not set
-# CONFIG_SENSORS_LM87 is not set
-# CONFIG_SENSORS_LM90 is not set
-# CONFIG_SENSORS_LM92 is not set
-# CONFIG_SENSORS_MAX1619 is not set
-# CONFIG_SENSORS_PC87360 is not set
-# CONFIG_SENSORS_SIS5595 is not set
-# CONFIG_SENSORS_SMSC47M1 is not set
-# CONFIG_SENSORS_SMSC47B397 is not set
-# CONFIG_SENSORS_VIA686A is not set
-# CONFIG_SENSORS_VT8231 is not set
-# CONFIG_SENSORS_W83781D is not set
-# CONFIG_SENSORS_W83792D is not set
-# CONFIG_SENSORS_W83L785TS is not set
-# CONFIG_SENSORS_W83627HF is not set
-# CONFIG_SENSORS_W83627EHF is not set
-# CONFIG_SENSORS_HDAPS is not set
-# CONFIG_HWMON_DEBUG_CHIP is not set
+# CONFIG_HWMON is not set
+# CONFIG_HWMON_VID is not set
 
 
 #
 #
 # Misc devices
 # Misc devices
 #
 #
 # CONFIG_IBM_ASM is not set
 # CONFIG_IBM_ASM is not set
 
 
-#
-# Multimedia Capabilities Port drivers
-#
-
 #
 #
 # Multimedia devices
 # Multimedia devices
 #
 #
-CONFIG_VIDEO_DEV=y
-
-#
-# Video For Linux
-#
-
-#
-# Video Adapters
-#
-# CONFIG_VIDEO_ADV_DEBUG is not set
-# CONFIG_VIDEO_BT848 is not set
-# CONFIG_VIDEO_BWQCAM is not set
-# CONFIG_VIDEO_CQCAM is not set
-# CONFIG_VIDEO_W9966 is not set
-# CONFIG_VIDEO_CPIA is not set
-# CONFIG_VIDEO_SAA5246A is not set
-# CONFIG_VIDEO_SAA5249 is not set
-# CONFIG_TUNER_3036 is not set
-# CONFIG_VIDEO_STRADIS is not set
-# CONFIG_VIDEO_ZORAN is not set
-CONFIG_VIDEO_SAA7134=y
-# CONFIG_VIDEO_SAA7134_ALSA is not set
-# CONFIG_VIDEO_MXB is not set
-# CONFIG_VIDEO_DPC is not set
-# CONFIG_VIDEO_HEXIUM_ORION is not set
-# CONFIG_VIDEO_HEXIUM_GEMINI is not set
-# CONFIG_VIDEO_CX88 is not set
-# CONFIG_VIDEO_EM28XX is not set
-# CONFIG_VIDEO_OVCAMCHIP is not set
-# CONFIG_VIDEO_AUDIO_DECODER is not set
-# CONFIG_VIDEO_DECODER is not set
-
-#
-# Radio Adapters
-#
-# CONFIG_RADIO_GEMTEK_PCI is not set
-# CONFIG_RADIO_MAXIRADIO is not set
-# CONFIG_RADIO_MAESTRO is not set
+# CONFIG_VIDEO_DEV is not set
+CONFIG_VIDEO_V4L2=y
 
 
 #
 #
 # Digital Video Broadcasting Devices
 # Digital Video Broadcasting Devices
 #
 #
 # CONFIG_DVB is not set
 # CONFIG_DVB is not set
-CONFIG_VIDEO_TUNER=y
-CONFIG_VIDEO_BUF=y
-CONFIG_VIDEO_IR=y
+# CONFIG_USB_DABUSB is not set
 
 
 #
 #
 # Graphics support
 # Graphics support
 #
 #
-CONFIG_FB=y
-CONFIG_FB_CFB_FILLRECT=y
-CONFIG_FB_CFB_COPYAREA=y
-CONFIG_FB_CFB_IMAGEBLIT=y
-# CONFIG_FB_MACMODES is not set
-CONFIG_FB_MODE_HELPERS=y
-# CONFIG_FB_TILEBLITTING is not set
-# CONFIG_FB_CIRRUS is not set
-# CONFIG_FB_PM2 is not set
-# CONFIG_FB_CYBER2000 is not set
-# CONFIG_FB_ARC is not set
-# CONFIG_FB_ASILIANT is not set
-# CONFIG_FB_IMSTT is not set
-# CONFIG_FB_VGA16 is not set
-# CONFIG_FB_VESA is not set
-CONFIG_VIDEO_SELECT=y
-# CONFIG_FB_HGA is not set
-# CONFIG_FB_S1D13XXX is not set
-# CONFIG_FB_NVIDIA is not set
-# CONFIG_FB_RIVA is not set
-# CONFIG_FB_I810 is not set
-# CONFIG_FB_INTEL is not set
-# CONFIG_FB_MATROX is not set
-# CONFIG_FB_RADEON_OLD is not set
-CONFIG_FB_RADEON=y
-CONFIG_FB_RADEON_I2C=y
-# CONFIG_FB_RADEON_DEBUG is not set
-# CONFIG_FB_ATY128 is not set
-# CONFIG_FB_ATY is not set
-# CONFIG_FB_SAVAGE is not set
-# CONFIG_FB_SIS is not set
-# CONFIG_FB_NEOMAGIC is not set
-# CONFIG_FB_KYRO is not set
-# CONFIG_FB_3DFX is not set
-# CONFIG_FB_VOODOO1 is not set
-# CONFIG_FB_CYBLA is not set
-# CONFIG_FB_TRIDENT is not set
-# CONFIG_FB_GEODE is not set
-# CONFIG_FB_VIRTUAL is not set
+CONFIG_FIRMWARE_EDID=y
+# CONFIG_FB is not set
 
 
 #
 #
 # Console display driver support
 # Console display driver support
 #
 #
 CONFIG_VGA_CONSOLE=y
 CONFIG_VGA_CONSOLE=y
+CONFIG_VGACON_SOFT_SCROLLBACK=y
+CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128
+CONFIG_VIDEO_SELECT=y
 CONFIG_DUMMY_CONSOLE=y
 CONFIG_DUMMY_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
-# CONFIG_FONTS is not set
-CONFIG_FONT_8x8=y
-CONFIG_FONT_8x16=y
-
-#
-# Logo configuration
-#
-# CONFIG_LOGO is not set
 # CONFIG_BACKLIGHT_LCD_SUPPORT is not set
 # CONFIG_BACKLIGHT_LCD_SUPPORT is not set
 
 
 #
 #
@@ -1104,97 +1058,30 @@ CONFIG_SOUND=y
 #
 #
 # Advanced Linux Sound Architecture
 # Advanced Linux Sound Architecture
 #
 #
-CONFIG_SND=y
-CONFIG_SND_TIMER=y
-CONFIG_SND_PCM=y
-CONFIG_SND_RAWMIDI=y
-CONFIG_SND_SEQUENCER=y
-# CONFIG_SND_SEQ_DUMMY is not set
-# CONFIG_SND_MIXER_OSS is not set
-# CONFIG_SND_PCM_OSS is not set
-# CONFIG_SND_SEQUENCER_OSS is not set
-CONFIG_SND_RTCTIMER=y
-CONFIG_SND_SEQ_RTCTIMER_DEFAULT=y
-# CONFIG_SND_DYNAMIC_MINORS is not set
-# CONFIG_SND_SUPPORT_OLD_API is not set
-# CONFIG_SND_VERBOSE_PRINTK is not set
-# CONFIG_SND_DEBUG is not set
-
-#
-# Generic devices
-#
-CONFIG_SND_MPU401_UART=y
-CONFIG_SND_AC97_CODEC=y
-CONFIG_SND_AC97_BUS=y
-# CONFIG_SND_DUMMY is not set
-# CONFIG_SND_VIRMIDI is not set
-# CONFIG_SND_MTPAV is not set
-# CONFIG_SND_SERIAL_U16550 is not set
-# CONFIG_SND_MPU401 is not set
-
-#
-# PCI devices
-#
-# CONFIG_SND_AD1889 is not set
-# CONFIG_SND_ALS4000 is not set
-# CONFIG_SND_ALI5451 is not set
-# CONFIG_SND_ATIIXP is not set
-# CONFIG_SND_ATIIXP_MODEM is not set
-# CONFIG_SND_AU8810 is not set
-# CONFIG_SND_AU8820 is not set
-# CONFIG_SND_AU8830 is not set
-# CONFIG_SND_AZT3328 is not set
-# CONFIG_SND_BT87X is not set
-# CONFIG_SND_CA0106 is not set
-# CONFIG_SND_CMIPCI is not set
-# CONFIG_SND_CS4281 is not set
-# CONFIG_SND_CS46XX is not set
-# CONFIG_SND_CS5535AUDIO is not set
-# CONFIG_SND_EMU10K1 is not set
-# CONFIG_SND_EMU10K1X is not set
-# CONFIG_SND_ENS1370 is not set
-# CONFIG_SND_ENS1371 is not set
-# CONFIG_SND_ES1938 is not set
-# CONFIG_SND_ES1968 is not set
-# CONFIG_SND_FM801 is not set
-# CONFIG_SND_HDA_INTEL is not set
-# CONFIG_SND_HDSP is not set
-# CONFIG_SND_HDSPM is not set
-# CONFIG_SND_ICE1712 is not set
-# CONFIG_SND_ICE1724 is not set
-# CONFIG_SND_INTEL8X0 is not set
-# CONFIG_SND_INTEL8X0M is not set
-# CONFIG_SND_KORG1212 is not set
-# CONFIG_SND_MAESTRO3 is not set
-# CONFIG_SND_MIXART is not set
-# CONFIG_SND_NM256 is not set
-# CONFIG_SND_PCXHR is not set
-# CONFIG_SND_RME32 is not set
-# CONFIG_SND_RME96 is not set
-# CONFIG_SND_RME9652 is not set
-# CONFIG_SND_SONICVIBES is not set
-# CONFIG_SND_TRIDENT is not set
-CONFIG_SND_VIA82XX=y
-# CONFIG_SND_VIA82XX_MODEM is not set
-# CONFIG_SND_VX222 is not set
-# CONFIG_SND_YMFPCI is not set
-
-#
-# USB devices
-#
-# CONFIG_SND_USB_AUDIO is not set
-# CONFIG_SND_USB_USX2Y is not set
+# CONFIG_SND is not set
 
 
 #
 #
 # Open Sound System
 # Open Sound System
 #
 #
-# CONFIG_SOUND_PRIME is not set
+CONFIG_SOUND_PRIME=y
+CONFIG_OSS_OBSOLETE_DRIVER=y
+# CONFIG_SOUND_BT878 is not set
+# CONFIG_SOUND_EMU10K1 is not set
+# CONFIG_SOUND_FUSION is not set
+# CONFIG_SOUND_ES1371 is not set
+CONFIG_SOUND_ICH=y
+# CONFIG_SOUND_TRIDENT is not set
+# CONFIG_SOUND_MSNDCLAS is not set
+# CONFIG_SOUND_MSNDPIN is not set
+# CONFIG_SOUND_VIA82CXXX is not set
+# CONFIG_SOUND_OSS is not set
 
 
 #
 #
 # USB support
 # USB support
 #
 #
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_HCD=y
 CONFIG_USB_ARCH_HAS_OHCI=y
 CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB_ARCH_HAS_EHCI=y
 CONFIG_USB=y
 CONFIG_USB=y
 # CONFIG_USB_DEBUG is not set
 # CONFIG_USB_DEBUG is not set
 
 
@@ -1213,17 +1100,19 @@ CONFIG_USB_DEVICEFS=y
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_HCD=y
 # CONFIG_USB_EHCI_SPLIT_ISO is not set
 # CONFIG_USB_EHCI_SPLIT_ISO is not set
 # CONFIG_USB_EHCI_ROOT_HUB_TT is not set
 # CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+# CONFIG_USB_EHCI_TT_NEWSCHED is not set
 # CONFIG_USB_ISP116X_HCD is not set
 # CONFIG_USB_ISP116X_HCD is not set
-# CONFIG_USB_OHCI_HCD is not set
+CONFIG_USB_OHCI_HCD=y
+# CONFIG_USB_OHCI_BIG_ENDIAN is not set
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 # CONFIG_USB_SL811_HCD is not set
 # CONFIG_USB_SL811_HCD is not set
 
 
 #
 #
 # USB Device Class drivers
 # USB Device Class drivers
 #
 #
-# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
 # CONFIG_USB_ACM is not set
 # CONFIG_USB_ACM is not set
-# CONFIG_USB_PRINTER is not set
+CONFIG_USB_PRINTER=y
 
 
 #
 #
 # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
 # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
@@ -1248,21 +1137,17 @@ CONFIG_USB_STORAGE=y
 #
 #
 # USB Input Devices
 # USB Input Devices
 #
 #
-# CONFIG_USB_HID is not set
-
-#
-# USB HID Boot Protocol drivers
-#
-# CONFIG_USB_KBD is not set
-# CONFIG_USB_MOUSE is not set
+CONFIG_USB_HID=y
+CONFIG_USB_HIDINPUT=y
+# CONFIG_USB_HIDINPUT_POWERBOOK is not set
+# CONFIG_HID_FF is not set
+# CONFIG_USB_HIDDEV is not set
 # CONFIG_USB_AIPTEK is not set
 # CONFIG_USB_AIPTEK is not set
 # CONFIG_USB_WACOM is not set
 # CONFIG_USB_WACOM is not set
 # CONFIG_USB_ACECAD is not set
 # CONFIG_USB_ACECAD is not set
 # CONFIG_USB_KBTAB is not set
 # CONFIG_USB_KBTAB is not set
 # CONFIG_USB_POWERMATE is not set
 # CONFIG_USB_POWERMATE is not set
-# CONFIG_USB_MTOUCH is not set
-# CONFIG_USB_ITMTOUCH is not set
-# CONFIG_USB_EGALAX is not set
+# CONFIG_USB_TOUCHSCREEN is not set
 # CONFIG_USB_YEALINK is not set
 # CONFIG_USB_YEALINK is not set
 # CONFIG_USB_XPAD is not set
 # CONFIG_USB_XPAD is not set
 # CONFIG_USB_ATI_REMOTE is not set
 # CONFIG_USB_ATI_REMOTE is not set
@@ -1276,21 +1161,6 @@ CONFIG_USB_STORAGE=y
 # CONFIG_USB_MDC800 is not set
 # CONFIG_USB_MDC800 is not set
 # CONFIG_USB_MICROTEK is not set
 # CONFIG_USB_MICROTEK is not set
 
 
-#
-# USB Multimedia devices
-#
-# CONFIG_USB_DABUSB is not set
-# CONFIG_USB_VICAM is not set
-# CONFIG_USB_DSBR is not set
-# CONFIG_USB_ET61X251 is not set
-# CONFIG_USB_IBMCAM is not set
-# CONFIG_USB_KONICAWC is not set
-# CONFIG_USB_OV511 is not set
-# CONFIG_USB_SE401 is not set
-# CONFIG_USB_SN9C102 is not set
-# CONFIG_USB_STV680 is not set
-# CONFIG_USB_PWC is not set
-
 #
 #
 # USB Network Adapters
 # USB Network Adapters
 #
 #
@@ -1299,12 +1169,11 @@ CONFIG_USB_STORAGE=y
 # CONFIG_USB_PEGASUS is not set
 # CONFIG_USB_PEGASUS is not set
 # CONFIG_USB_RTL8150 is not set
 # CONFIG_USB_RTL8150 is not set
 # CONFIG_USB_USBNET is not set
 # CONFIG_USB_USBNET is not set
-# CONFIG_USB_MON is not set
+CONFIG_USB_MON=y
 
 
 #
 #
 # USB port drivers
 # USB port drivers
 #
 #
-# CONFIG_USB_USS720 is not set
 
 
 #
 #
 # USB Serial Converter support
 # USB Serial Converter support
@@ -1321,10 +1190,12 @@ CONFIG_USB_STORAGE=y
 # CONFIG_USB_LEGOTOWER is not set
 # CONFIG_USB_LEGOTOWER is not set
 # CONFIG_USB_LCD is not set
 # CONFIG_USB_LCD is not set
 # CONFIG_USB_LED is not set
 # CONFIG_USB_LED is not set
+# CONFIG_USB_CYPRESS_CY7C63 is not set
 # CONFIG_USB_CYTHERM is not set
 # CONFIG_USB_CYTHERM is not set
 # CONFIG_USB_PHIDGETKIT is not set
 # CONFIG_USB_PHIDGETKIT is not set
 # CONFIG_USB_PHIDGETSERVO is not set
 # CONFIG_USB_PHIDGETSERVO is not set
 # CONFIG_USB_IDMOUSE is not set
 # CONFIG_USB_IDMOUSE is not set
+# CONFIG_USB_APPLEDISPLAY is not set
 # CONFIG_USB_SISUSBVGA is not set
 # CONFIG_USB_SISUSBVGA is not set
 # CONFIG_USB_LD is not set
 # CONFIG_USB_LD is not set
 # CONFIG_USB_TEST is not set
 # CONFIG_USB_TEST is not set
@@ -1343,57 +1214,97 @@ CONFIG_USB_STORAGE=y
 #
 #
 # CONFIG_MMC is not set
 # CONFIG_MMC is not set
 
 
+#
+# LED devices
+#
+# CONFIG_NEW_LEDS is not set
+
+#
+# LED drivers
+#
+
+#
+# LED Triggers
+#
+
 #
 #
 # InfiniBand support
 # InfiniBand support
 #
 #
 # CONFIG_INFINIBAND is not set
 # CONFIG_INFINIBAND is not set
 
 
 #
 #
-# SN Devices
+# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
+#
+# CONFIG_EDAC is not set
+
+#
+# Real Time Clock
 #
 #
+# CONFIG_RTC_CLASS is not set
 
 
 #
 #
-# EDAC - error detection and reporting (RAS)
+# DMA Engine support
+#
+# CONFIG_DMA_ENGINE is not set
+
+#
+# DMA Clients
+#
+
+#
+# DMA Devices
 #
 #
-# CONFIG_EDAC is not set
 
 
 #
 #
 # File systems
 # File systems
 #
 #
 CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS=y
-# CONFIG_EXT2_FS_XATTR is not set
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+# CONFIG_EXT2_FS_SECURITY is not set
 # CONFIG_EXT2_FS_XIP is not set
 # CONFIG_EXT2_FS_XIP is not set
-# CONFIG_EXT3_FS is not set
-# CONFIG_REISERFS_FS is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+CONFIG_REISERFS_FS=y
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+CONFIG_REISERFS_FS_XATTR=y
+CONFIG_REISERFS_FS_POSIX_ACL=y
+# CONFIG_REISERFS_FS_SECURITY is not set
 # CONFIG_JFS_FS is not set
 # CONFIG_JFS_FS is not set
-# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FS_POSIX_ACL=y
 # CONFIG_XFS_FS is not set
 # CONFIG_XFS_FS is not set
 # CONFIG_OCFS2_FS is not set
 # CONFIG_OCFS2_FS is not set
 # CONFIG_MINIX_FS is not set
 # CONFIG_MINIX_FS is not set
 # CONFIG_ROMFS_FS is not set
 # CONFIG_ROMFS_FS is not set
-# CONFIG_INOTIFY is not set
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
 # CONFIG_QUOTA is not set
 # CONFIG_QUOTA is not set
 CONFIG_DNOTIFY=y
 CONFIG_DNOTIFY=y
 # CONFIG_AUTOFS_FS is not set
 # CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
+CONFIG_AUTOFS4_FS=y
 # CONFIG_FUSE_FS is not set
 # CONFIG_FUSE_FS is not set
 
 
 #
 #
 # CD-ROM/DVD Filesystems
 # CD-ROM/DVD Filesystems
 #
 #
 CONFIG_ISO9660_FS=y
 CONFIG_ISO9660_FS=y
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_ZISOFS_FS=y
+# CONFIG_JOLIET is not set
+# CONFIG_ZISOFS is not set
 # CONFIG_UDF_FS is not set
 # CONFIG_UDF_FS is not set
 
 
 #
 #
 # DOS/FAT/NT Filesystems
 # DOS/FAT/NT Filesystems
 #
 #
 CONFIG_FAT_FS=y
 CONFIG_FAT_FS=y
-# CONFIG_MSDOS_FS is not set
+CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_VFAT_FS=y
-CONFIG_FAT_DEFAULT_CODEPAGE=850
+CONFIG_FAT_DEFAULT_CODEPAGE=437
 CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
 CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
 # CONFIG_NTFS_FS is not set
 # CONFIG_NTFS_FS is not set
 
 
@@ -1404,10 +1315,9 @@ CONFIG_PROC_FS=y
 CONFIG_PROC_KCORE=y
 CONFIG_PROC_KCORE=y
 CONFIG_SYSFS=y
 CONFIG_SYSFS=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS=y
-# CONFIG_HUGETLBFS is not set
-# CONFIG_HUGETLB_PAGE is not set
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
 CONFIG_RAMFS=y
 CONFIG_RAMFS=y
-# CONFIG_RELAYFS_FS is not set
 # CONFIG_CONFIGFS_FS is not set
 # CONFIG_CONFIGFS_FS is not set
 
 
 #
 #
@@ -1430,13 +1340,26 @@ CONFIG_RAMFS=y
 #
 #
 # Network File Systems
 # Network File Systems
 #
 #
-# CONFIG_NFS_FS is not set
-# CONFIG_NFSD is not set
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+# CONFIG_NFS_V4 is not set
+# CONFIG_NFS_DIRECTIO is not set
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V3_ACL is not set
+# CONFIG_NFSD_V4 is not set
+CONFIG_NFSD_TCP=y
+CONFIG_ROOT_NFS=y
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_EXPORTFS=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+# CONFIG_RPCSEC_GSS_KRB5 is not set
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
 # CONFIG_SMB_FS is not set
 # CONFIG_SMB_FS is not set
-CONFIG_CIFS=y
-# CONFIG_CIFS_STATS is not set
-# CONFIG_CIFS_XATTR is not set
-# CONFIG_CIFS_EXPERIMENTAL is not set
+# CONFIG_CIFS is not set
 # CONFIG_NCP_FS is not set
 # CONFIG_NCP_FS is not set
 # CONFIG_CODA_FS is not set
 # CONFIG_CODA_FS is not set
 # CONFIG_AFS_FS is not set
 # CONFIG_AFS_FS is not set
@@ -1445,33 +1368,18 @@ CONFIG_CIFS=y
 #
 #
 # Partition Types
 # Partition Types
 #
 #
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-# CONFIG_MAC_PARTITION is not set
+# CONFIG_PARTITION_ADVANCED is not set
 CONFIG_MSDOS_PARTITION=y
 CONFIG_MSDOS_PARTITION=y
-# CONFIG_BSD_DISKLABEL is not set
-# CONFIG_MINIX_SUBPARTITION is not set
-# CONFIG_SOLARIS_X86_PARTITION is not set
-# CONFIG_UNIXWARE_DISKLABEL is not set
-# CONFIG_LDM_PARTITION is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_KARMA_PARTITION is not set
-# CONFIG_EFI_PARTITION is not set
 
 
 #
 #
 # Native Language Support
 # Native Language Support
 #
 #
 CONFIG_NLS=y
 CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="iso8859-15"
-# CONFIG_NLS_CODEPAGE_437 is not set
+CONFIG_NLS_DEFAULT="iso8859-1"
+CONFIG_NLS_CODEPAGE_437=y
 # CONFIG_NLS_CODEPAGE_737 is not set
 # CONFIG_NLS_CODEPAGE_737 is not set
 # CONFIG_NLS_CODEPAGE_775 is not set
 # CONFIG_NLS_CODEPAGE_775 is not set
-CONFIG_NLS_CODEPAGE_850=y
+# CONFIG_NLS_CODEPAGE_850 is not set
 # CONFIG_NLS_CODEPAGE_852 is not set
 # CONFIG_NLS_CODEPAGE_852 is not set
 # CONFIG_NLS_CODEPAGE_855 is not set
 # CONFIG_NLS_CODEPAGE_855 is not set
 # CONFIG_NLS_CODEPAGE_857 is not set
 # CONFIG_NLS_CODEPAGE_857 is not set
@@ -1491,7 +1399,7 @@ CONFIG_NLS_CODEPAGE_850=y
 # CONFIG_NLS_ISO8859_8 is not set
 # CONFIG_NLS_ISO8859_8 is not set
 # CONFIG_NLS_CODEPAGE_1250 is not set
 # CONFIG_NLS_CODEPAGE_1250 is not set
 # CONFIG_NLS_CODEPAGE_1251 is not set
 # CONFIG_NLS_CODEPAGE_1251 is not set
-# CONFIG_NLS_ASCII is not set
+CONFIG_NLS_ASCII=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_NLS_ISO8859_1=y
 # CONFIG_NLS_ISO8859_2 is not set
 # CONFIG_NLS_ISO8859_2 is not set
 # CONFIG_NLS_ISO8859_3 is not set
 # CONFIG_NLS_ISO8859_3 is not set
@@ -1510,20 +1418,50 @@ CONFIG_NLS_UTF8=y
 #
 #
 # Instrumentation Support
 # Instrumentation Support
 #
 #
-# CONFIG_PROFILING is not set
-# CONFIG_KPROBES is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_KPROBES=y
 
 
 #
 #
 # Kernel hacking
 # Kernel hacking
 #
 #
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
 # CONFIG_PRINTK_TIME is not set
 # CONFIG_PRINTK_TIME is not set
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_MAGIC_SYSRQ=y
-# CONFIG_DEBUG_KERNEL is not set
-CONFIG_LOG_BUF_SHIFT=14
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_LOG_BUF_SHIFT=18
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_RWSEMS is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
 CONFIG_DEBUG_BUGVERBOSE=y
 CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_FRAME_POINTER is not set
+CONFIG_UNWIND_INFO=y
+CONFIG_STACK_UNWIND=y
+# CONFIG_FORCED_INLINING is not set
+# CONFIG_RCU_TORTURE_TEST is not set
 CONFIG_EARLY_PRINTK=y
 CONFIG_EARLY_PRINTK=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_RODATA is not set
+# CONFIG_4KSTACKS is not set
 CONFIG_X86_FIND_SMP_CONFIG=y
 CONFIG_X86_FIND_SMP_CONFIG=y
 CONFIG_X86_MPPARSE=y
 CONFIG_X86_MPPARSE=y
+CONFIG_DOUBLEFAULT=y
 
 
 #
 #
 # Security options
 # Security options
@@ -1536,10 +1474,6 @@ CONFIG_X86_MPPARSE=y
 #
 #
 # CONFIG_CRYPTO is not set
 # CONFIG_CRYPTO is not set
 
 
-#
-# Hardware crypto devices
-#
-
 #
 #
 # Library routines
 # Library routines
 #
 #
@@ -1548,7 +1482,12 @@ CONFIG_X86_MPPARSE=y
 CONFIG_CRC32=y
 CONFIG_CRC32=y
 # CONFIG_LIBCRC32C is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
 CONFIG_ZLIB_INFLATE=y
+CONFIG_PLIST=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_GENERIC_HARDIRQS=y
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_PENDING_IRQ=y
+CONFIG_X86_SMP=y
+CONFIG_X86_HT=y
 CONFIG_X86_BIOS_REBOOT=y
 CONFIG_X86_BIOS_REBOOT=y
+CONFIG_X86_TRAMPOLINE=y
 CONFIG_KTIME_SCALAR=y
 CONFIG_KTIME_SCALAR=y

+ 2 - 1
arch/i386/kernel/Makefile

@@ -4,7 +4,7 @@
 
 
 extra-y := head.o init_task.o vmlinux.lds
 extra-y := head.o init_task.o vmlinux.lds
 
 
-obj-y	:= process.o semaphore.o signal.o entry.o traps.o irq.o \
+obj-y	:= process.o signal.o entry.o traps.o irq.o \
 		ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
 		ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
 		pci-dma.o i386_ksyms.o i387.o bootflag.o \
 		pci-dma.o i386_ksyms.o i387.o bootflag.o \
 		quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
 		quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
@@ -81,4 +81,5 @@ $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
 	$(call if_changed,syscall)
 	$(call if_changed,syscall)
 
 
 k8-y                      += ../../x86_64/kernel/k8.o
 k8-y                      += ../../x86_64/kernel/k8.o
+stacktrace-y		  += ../../x86_64/kernel/stacktrace.o
 
 

+ 2 - 0
arch/i386/kernel/acpi/Makefile

@@ -1,5 +1,7 @@
 obj-$(CONFIG_ACPI)		+= boot.o
 obj-$(CONFIG_ACPI)		+= boot.o
+ifneq ($(CONFIG_PCI),)
 obj-$(CONFIG_X86_IO_APIC)	+= earlyquirk.o
 obj-$(CONFIG_X86_IO_APIC)	+= earlyquirk.o
+endif
 obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup.o
 obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup.o
 
 
 ifneq ($(CONFIG_ACPI_PROCESSOR),)
 ifneq ($(CONFIG_ACPI_PROCESSOR),)

+ 172 - 9
arch/i386/kernel/acpi/boot.c

@@ -26,9 +26,12 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/acpi.h>
 #include <linux/acpi.h>
 #include <linux/efi.h>
 #include <linux/efi.h>
+#include <linux/cpumask.h>
 #include <linux/module.h>
 #include <linux/module.h>
 #include <linux/dmi.h>
 #include <linux/dmi.h>
 #include <linux/irq.h>
 #include <linux/irq.h>
+#include <linux/bootmem.h>
+#include <linux/ioport.h>
 
 
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/io_apic.h>
 #include <asm/io_apic.h>
@@ -36,11 +39,17 @@
 #include <asm/io.h>
 #include <asm/io.h>
 #include <asm/mpspec.h>
 #include <asm/mpspec.h>
 
 
-#ifdef	CONFIG_X86_64
+static int __initdata acpi_force = 0;
 
 
-extern void __init clustered_apic_check(void);
+#ifdef	CONFIG_ACPI
+int acpi_disabled = 0;
+#else
+int acpi_disabled = 1;
+#endif
+EXPORT_SYMBOL(acpi_disabled);
+
+#ifdef	CONFIG_X86_64
 
 
-extern int gsi_irq_sharing(int gsi);
 #include <asm/proto.h>
 #include <asm/proto.h>
 
 
 static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
 static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
@@ -506,16 +515,76 @@ EXPORT_SYMBOL(acpi_register_gsi);
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 int acpi_map_lsapic(acpi_handle handle, int *pcpu)
 int acpi_map_lsapic(acpi_handle handle, int *pcpu)
 {
 {
-	/* TBD */
-	return -EINVAL;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	union acpi_object *obj;
+	struct acpi_table_lapic *lapic;
+	cpumask_t tmp_map, new_map;
+	u8 physid;
+	int cpu;
+
+	if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
+		return -EINVAL;
+
+	if (!buffer.length || !buffer.pointer)
+		return -EINVAL;
+
+	obj = buffer.pointer;
+	if (obj->type != ACPI_TYPE_BUFFER ||
+	    obj->buffer.length < sizeof(*lapic)) {
+		kfree(buffer.pointer);
+		return -EINVAL;
+	}
+
+	lapic = (struct acpi_table_lapic *)obj->buffer.pointer;
+
+	if ((lapic->header.type != ACPI_MADT_LAPIC) ||
+	    (!lapic->flags.enabled)) {
+		kfree(buffer.pointer);
+		return -EINVAL;
+	}
+
+	physid = lapic->id;
+
+	kfree(buffer.pointer);
+	buffer.length = ACPI_ALLOCATE_BUFFER;
+	buffer.pointer = NULL;
+
+	tmp_map = cpu_present_map;
+	mp_register_lapic(physid, lapic->flags.enabled);
+
+	/*
+	 * If mp_register_lapic successfully generates a new logical cpu
+	 * number, then the following will get us exactly what was mapped
+	 */
+	cpus_andnot(new_map, cpu_present_map, tmp_map);
+	if (cpus_empty(new_map)) {
+		printk ("Unable to map lapic to logical cpu number\n");
+		return -EINVAL;
+	}
+
+	cpu = first_cpu(new_map);
+
+	*pcpu = cpu;
+	return 0;
 }
 }
 
 
 EXPORT_SYMBOL(acpi_map_lsapic);
 EXPORT_SYMBOL(acpi_map_lsapic);
 
 
 int acpi_unmap_lsapic(int cpu)
 int acpi_unmap_lsapic(int cpu)
 {
 {
-	/* TBD */
-	return -EINVAL;
+	int i;
+
+	for_each_possible_cpu(i) {
+		if (x86_acpiid_to_apicid[i] == x86_cpu_to_apicid[cpu]) {
+			x86_acpiid_to_apicid[i] = -1;
+			break;
+		}
+	}
+	x86_cpu_to_apicid[cpu] = -1;
+	cpu_clear(cpu, cpu_present_map);
+	num_processors--;
+
+	return (0);
 }
 }
 
 
 EXPORT_SYMBOL(acpi_unmap_lsapic);
 EXPORT_SYMBOL(acpi_unmap_lsapic);
@@ -579,6 +648,8 @@ static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
 static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
 static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
 {
 {
 	struct acpi_table_hpet *hpet_tbl;
 	struct acpi_table_hpet *hpet_tbl;
+	struct resource *hpet_res;
+	resource_size_t res_start;
 
 
 	if (!phys || !size)
 	if (!phys || !size)
 		return -EINVAL;
 		return -EINVAL;
@@ -594,12 +665,26 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
 		       "memory.\n");
 		       "memory.\n");
 		return -1;
 		return -1;
 	}
 	}
+
+#define HPET_RESOURCE_NAME_SIZE 9
+	hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
+	if (hpet_res) {
+		memset(hpet_res, 0, sizeof(*hpet_res));
+		hpet_res->name = (void *)&hpet_res[1];
+		hpet_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE,
+			 "HPET %u", hpet_tbl->number);
+		hpet_res->end = (1 * 1024) - 1;
+	}
+
 #ifdef	CONFIG_X86_64
 #ifdef	CONFIG_X86_64
 	vxtime.hpet_address = hpet_tbl->addr.addrl |
 	vxtime.hpet_address = hpet_tbl->addr.addrl |
 	    ((long)hpet_tbl->addr.addrh << 32);
 	    ((long)hpet_tbl->addr.addrh << 32);
 
 
 	printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
 	printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
 	       hpet_tbl->id, vxtime.hpet_address);
 	       hpet_tbl->id, vxtime.hpet_address);
+
+	res_start = vxtime.hpet_address;
 #else				/* X86 */
 #else				/* X86 */
 	{
 	{
 		extern unsigned long hpet_address;
 		extern unsigned long hpet_address;
@@ -607,9 +692,17 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
 		hpet_address = hpet_tbl->addr.addrl;
 		hpet_address = hpet_tbl->addr.addrl;
 		printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
 		printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
 		       hpet_tbl->id, hpet_address);
 		       hpet_tbl->id, hpet_address);
+
+		res_start = hpet_address;
 	}
 	}
 #endif				/* X86 */
 #endif				/* X86 */
 
 
+	if (hpet_res) {
+		hpet_res->start = res_start;
+		hpet_res->end += res_start;
+		insert_resource(&iomem_resource, hpet_res);
+	}
+
 	return 0;
 	return 0;
 }
 }
 #else
 #else
@@ -860,8 +953,6 @@ static void __init acpi_process_madt(void)
 	return;
 	return;
 }
 }
 
 
-extern int acpi_force;
-
 #ifdef __i386__
 #ifdef __i386__
 
 
 static int __init disable_acpi_irq(struct dmi_system_id *d)
 static int __init disable_acpi_irq(struct dmi_system_id *d)
@@ -1163,3 +1254,75 @@ int __init acpi_boot_init(void)
 
 
 	return 0;
 	return 0;
 }
 }
+
+static int __init parse_acpi(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	/* "acpi=off" disables both ACPI table parsing and interpreter */
+	if (strcmp(arg, "off") == 0) {
+		disable_acpi();
+	}
+	/* acpi=force to over-ride black-list */
+	else if (strcmp(arg, "force") == 0) {
+		acpi_force = 1;
+		acpi_ht = 1;
+		acpi_disabled = 0;
+	}
+	/* acpi=strict disables out-of-spec workarounds */
+	else if (strcmp(arg, "strict") == 0) {
+		acpi_strict = 1;
+	}
+	/* Limit ACPI just to boot-time to enable HT */
+	else if (strcmp(arg, "ht") == 0) {
+		if (!acpi_force)
+			disable_acpi();
+		acpi_ht = 1;
+	}
+	/* "acpi=noirq" disables ACPI interrupt routing */
+	else if (strcmp(arg, "noirq") == 0) {
+		acpi_noirq_set();
+	} else {
+		/* Core will printk when we return error. */
+		return -EINVAL;
+	}
+	return 0;
+}
+early_param("acpi", parse_acpi);
+
+/* FIXME: Using pci= for an ACPI parameter is a travesty. */
+static int __init parse_pci(char *arg)
+{
+	if (arg && strcmp(arg, "noacpi") == 0)
+		acpi_disable_pci();
+	return 0;
+}
+early_param("pci", parse_pci);
+
+#ifdef CONFIG_X86_IO_APIC
+static int __init parse_acpi_skip_timer_override(char *arg)
+{
+	acpi_skip_timer_override = 1;
+	return 0;
+}
+early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override);
+#endif /* CONFIG_X86_IO_APIC */
+
+static int __init setup_acpi_sci(char *s)
+{
+	if (!s)
+		return -EINVAL;
+	if (!strcmp(s, "edge"))
+		acpi_sci_flags.trigger = 1;
+	else if (!strcmp(s, "level"))
+		acpi_sci_flags.trigger = 3;
+	else if (!strcmp(s, "high"))
+		acpi_sci_flags.polarity = 1;
+	else if (!strcmp(s, "low"))
+		acpi_sci_flags.polarity = 3;
+	else
+		return -EINVAL;
+	return 0;
+}
+early_param("acpi_sci", setup_acpi_sci);

+ 5 - 1
arch/i386/kernel/acpi/earlyquirk.c

@@ -48,7 +48,11 @@ void __init check_acpi_pci(void)
 	int num, slot, func;
 	int num, slot, func;
 
 
 	/* Assume the machine supports type 1. If not it will 
 	/* Assume the machine supports type 1. If not it will 
-	   always read ffffffff and should not have any side effect. */
+	   always read ffffffff and should not have any side effect.
+	   Actually a few buggy systems can machine check. Allow the user
+	   to disable it by command line option at least -AK */
+	if (!early_pci_allowed())
+		return;
 
 
 	/* Poor man's PCI discovery */
 	/* Poor man's PCI discovery */
 	for (num = 0; num < 32; num++) {
 	for (num = 0; num < 32; num++) {

+ 28 - 3
arch/i386/kernel/apic.c

@@ -52,7 +52,18 @@ static cpumask_t timer_bcast_ipi;
 /*
 /*
  * Knob to control our willingness to enable the local APIC.
  * Knob to control our willingness to enable the local APIC.
  */
  */
-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+
+static inline void lapic_disable(void)
+{
+	enable_local_apic = -1;
+	clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+}
+
+static inline void lapic_enable(void)
+{
+	enable_local_apic = 1;
+}
 
 
 /*
 /*
  * Debug level
  * Debug level
@@ -586,8 +597,7 @@ void __devinit setup_local_APIC(void)
 			printk("No ESR for 82489DX.\n");
 			printk("No ESR for 82489DX.\n");
 	}
 	}
 
 
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		setup_apic_nmi_watchdog();
+	setup_apic_nmi_watchdog(NULL);
 	apic_pm_activate();
 	apic_pm_activate();
 }
 }
 
 
@@ -1373,3 +1383,18 @@ int __init APIC_init_uniprocessor (void)
 
 
 	return 0;
 	return 0;
 }
 }
+
+static int __init parse_lapic(char *arg)
+{
+	lapic_enable();
+	return 0;
+}
+early_param("lapic", parse_lapic);
+
+static int __init parse_nolapic(char *arg)
+{
+	lapic_disable();
+	return 0;
+}
+early_param("nolapic", parse_nolapic);
+

+ 3 - 4
arch/i386/kernel/cpu/amd.c

@@ -22,7 +22,7 @@
 extern void vide(void);
 extern void vide(void);
 __asm__(".align 4\nvide: ret");
 __asm__(".align 4\nvide: ret");
 
 
-static void __init init_amd(struct cpuinfo_x86 *c)
+static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 {
 {
 	u32 l, h;
 	u32 l, h;
 	int mbytes = num_physpages >> (20-PAGE_SHIFT);
 	int mbytes = num_physpages >> (20-PAGE_SHIFT);
@@ -246,7 +246,7 @@ static void __init init_amd(struct cpuinfo_x86 *c)
 		num_cache_leaves = 3;
 		num_cache_leaves = 3;
 }
 }
 
 
-static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
 {
 {
 	/* AMD errata T13 (order #21922) */
 	/* AMD errata T13 (order #21922) */
 	if ((c->x86 == 6)) {
 	if ((c->x86 == 6)) {
@@ -259,7 +259,7 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
 	return size;
 	return size;
 }
 }
 
 
-static struct cpu_dev amd_cpu_dev __initdata = {
+static struct cpu_dev amd_cpu_dev __cpuinitdata = {
 	.c_vendor	= "AMD",
 	.c_vendor	= "AMD",
 	.c_ident 	= { "AuthenticAMD" },
 	.c_ident 	= { "AuthenticAMD" },
 	.c_models = {
 	.c_models = {
@@ -275,7 +275,6 @@ static struct cpu_dev amd_cpu_dev __initdata = {
 		},
 		},
 	},
 	},
 	.c_init		= init_amd,
 	.c_init		= init_amd,
-	.c_identify	= generic_identify,
 	.c_size_cache	= amd_size_cache,
 	.c_size_cache	= amd_size_cache,
 };
 };
 
 

+ 12 - 12
arch/i386/kernel/cpu/centaur.c

@@ -9,7 +9,7 @@
 
 
 #ifdef CONFIG_X86_OOSTORE
 #ifdef CONFIG_X86_OOSTORE
 
 
-static u32 __init power2(u32 x)
+static u32 __cpuinit power2(u32 x)
 {
 {
 	u32 s=1;
 	u32 s=1;
 	while(s<=x)
 	while(s<=x)
@@ -22,7 +22,7 @@ static u32 __init power2(u32 x)
  *	Set up an actual MCR
  *	Set up an actual MCR
  */
  */
  
  
-static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key)
+static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key)
 {
 {
 	u32 lo, hi;
 	u32 lo, hi;
 	
 	
@@ -40,7 +40,7 @@ static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key)
  *	Shortcut: We know you can't put 4Gig of RAM on a winchip
  *	Shortcut: We know you can't put 4Gig of RAM on a winchip
  */
  */
 
 
-static u32 __init ramtop(void)		/* 16388 */
+static u32 __cpuinit ramtop(void)		/* 16388 */
 {
 {
 	int i;
 	int i;
 	u32 top = 0;
 	u32 top = 0;
@@ -91,7 +91,7 @@ static u32 __init ramtop(void)		/* 16388 */
  *	Compute a set of MCR's to give maximum coverage
  *	Compute a set of MCR's to give maximum coverage
  */
  */
 
 
-static int __init centaur_mcr_compute(int nr, int key)
+static int __cpuinit centaur_mcr_compute(int nr, int key)
 {
 {
 	u32 mem = ramtop();
 	u32 mem = ramtop();
 	u32 root = power2(mem);
 	u32 root = power2(mem);
@@ -166,7 +166,7 @@ static int __init centaur_mcr_compute(int nr, int key)
 	return ct;
 	return ct;
 }
 }
 
 
-static void __init centaur_create_optimal_mcr(void)
+static void __cpuinit centaur_create_optimal_mcr(void)
 {
 {
 	int i;
 	int i;
 	/*
 	/*
@@ -189,7 +189,7 @@ static void __init centaur_create_optimal_mcr(void)
 		wrmsr(MSR_IDT_MCR0+i, 0, 0);
 		wrmsr(MSR_IDT_MCR0+i, 0, 0);
 }
 }
 
 
-static void __init winchip2_create_optimal_mcr(void)
+static void __cpuinit winchip2_create_optimal_mcr(void)
 {
 {
 	u32 lo, hi;
 	u32 lo, hi;
 	int i;
 	int i;
@@ -227,7 +227,7 @@ static void __init winchip2_create_optimal_mcr(void)
  *	Handle the MCR key on the Winchip 2.
  *	Handle the MCR key on the Winchip 2.
  */
  */
 
 
-static void __init winchip2_unprotect_mcr(void)
+static void __cpuinit winchip2_unprotect_mcr(void)
 {
 {
 	u32 lo, hi;
 	u32 lo, hi;
 	u32 key;
 	u32 key;
@@ -239,7 +239,7 @@ static void __init winchip2_unprotect_mcr(void)
 	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
 	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
 }
 }
 
 
-static void __init winchip2_protect_mcr(void)
+static void __cpuinit winchip2_protect_mcr(void)
 {
 {
 	u32 lo, hi;
 	u32 lo, hi;
 	
 	
@@ -257,7 +257,7 @@ static void __init winchip2_protect_mcr(void)
 #define RNG_ENABLED	(1 << 3)
 #define RNG_ENABLED	(1 << 3)
 #define RNG_ENABLE	(1 << 6)	/* MSR_VIA_RNG */
 #define RNG_ENABLE	(1 << 6)	/* MSR_VIA_RNG */
 
 
-static void __init init_c3(struct cpuinfo_x86 *c)
+static void __cpuinit init_c3(struct cpuinfo_x86 *c)
 {
 {
 	u32  lo, hi;
 	u32  lo, hi;
 
 
@@ -303,7 +303,7 @@ static void __init init_c3(struct cpuinfo_x86 *c)
 	display_cacheinfo(c);
 	display_cacheinfo(c);
 }
 }
 
 
-static void __init init_centaur(struct cpuinfo_x86 *c)
+static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
 {
 {
 	enum {
 	enum {
 		ECX8=1<<1,
 		ECX8=1<<1,
@@ -442,7 +442,7 @@ static void __init init_centaur(struct cpuinfo_x86 *c)
 	}
 	}
 }
 }
 
 
-static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size)
 {
 {
 	/* VIA C3 CPUs (670-68F) need further shifting. */
 	/* VIA C3 CPUs (670-68F) need further shifting. */
 	if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
 	if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
@@ -457,7 +457,7 @@ static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size
 	return size;
 	return size;
 }
 }
 
 
-static struct cpu_dev centaur_cpu_dev __initdata = {
+static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
 	.c_vendor	= "Centaur",
 	.c_vendor	= "Centaur",
 	.c_ident	= { "CentaurHauls" },
 	.c_ident	= { "CentaurHauls" },
 	.c_init		= init_centaur,
 	.c_init		= init_centaur,

+ 4 - 4
arch/i386/kernel/cpu/common.c

@@ -36,7 +36,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
 
 
 extern int disable_pse;
 extern int disable_pse;
 
 
-static void default_init(struct cpuinfo_x86 * c)
+static void __cpuinit default_init(struct cpuinfo_x86 * c)
 {
 {
 	/* Not much we can do here... */
 	/* Not much we can do here... */
 	/* Check if at least it has cpuid */
 	/* Check if at least it has cpuid */
@@ -49,7 +49,7 @@ static void default_init(struct cpuinfo_x86 * c)
 	}
 	}
 }
 }
 
 
-static struct cpu_dev default_cpu = {
+static struct cpu_dev __cpuinitdata default_cpu = {
 	.c_init	= default_init,
 	.c_init	= default_init,
 	.c_vendor = "Unknown",
 	.c_vendor = "Unknown",
 };
 };
@@ -265,7 +265,7 @@ static void __init early_cpu_detect(void)
 	}
 	}
 }
 }
 
 
-void __cpuinit generic_identify(struct cpuinfo_x86 * c)
+static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 {
 {
 	u32 tfms, xlvl;
 	u32 tfms, xlvl;
 	int ebx;
 	int ebx;
@@ -675,7 +675,7 @@ old_gdt:
 #endif
 #endif
 
 
 	/* Clear %fs and %gs. */
 	/* Clear %fs and %gs. */
-	asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
+	asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
 
 
 	/* Clear all 6 debug registers: */
 	/* Clear all 6 debug registers: */
 	set_debugreg(0, 0);
 	set_debugreg(0, 0);

+ 0 - 2
arch/i386/kernel/cpu/cpu.h

@@ -24,7 +24,5 @@ extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
 extern int get_model_name(struct cpuinfo_x86 *c);
 extern int get_model_name(struct cpuinfo_x86 *c);
 extern void display_cacheinfo(struct cpuinfo_x86 *c);
 extern void display_cacheinfo(struct cpuinfo_x86 *c);
 
 
-extern void generic_identify(struct cpuinfo_x86 * c);
-
 extern void early_intel_workaround(struct cpuinfo_x86 *c);
 extern void early_intel_workaround(struct cpuinfo_x86 *c);
 
 

+ 20 - 22
arch/i386/kernel/cpu/cyrix.c

@@ -12,7 +12,7 @@
 /*
 /*
  * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
  * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
  */
  */
-static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
+static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
 {
 {
 	unsigned char ccr2, ccr3;
 	unsigned char ccr2, ccr3;
 	unsigned long flags;
 	unsigned long flags;
@@ -52,25 +52,25 @@ static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
  * Actually since bugs.h doesn't even reference this perhaps someone should
  * Actually since bugs.h doesn't even reference this perhaps someone should
  * fix the documentation ???
  * fix the documentation ???
  */
  */
-static unsigned char Cx86_dir0_msb __initdata = 0;
+static unsigned char Cx86_dir0_msb __cpuinitdata = 0;
 
 
-static char Cx86_model[][9] __initdata = {
+static char Cx86_model[][9] __cpuinitdata = {
 	"Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
 	"Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
 	"M II ", "Unknown"
 	"M II ", "Unknown"
 };
 };
-static char Cx486_name[][5] __initdata = {
+static char Cx486_name[][5] __cpuinitdata = {
 	"SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
 	"SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
 	"SRx2", "DRx2"
 	"SRx2", "DRx2"
 };
 };
-static char Cx486S_name[][4] __initdata = {
+static char Cx486S_name[][4] __cpuinitdata = {
 	"S", "S2", "Se", "S2e"
 	"S", "S2", "Se", "S2e"
 };
 };
-static char Cx486D_name[][4] __initdata = {
+static char Cx486D_name[][4] __cpuinitdata = {
 	"DX", "DX2", "?", "?", "?", "DX4"
 	"DX", "DX2", "?", "?", "?", "DX4"
 };
 };
-static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock";
-static char cyrix_model_mult1[] __initdata = "12??43";
-static char cyrix_model_mult2[] __initdata = "12233445";
+static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock";
+static char cyrix_model_mult1[] __cpuinitdata = "12??43";
+static char cyrix_model_mult2[] __cpuinitdata = "12233445";
 
 
 /*
 /*
  * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
  * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
@@ -82,7 +82,7 @@ static char cyrix_model_mult2[] __initdata = "12233445";
 
 
 extern void calibrate_delay(void) __init;
 extern void calibrate_delay(void) __init;
 
 
-static void __init check_cx686_slop(struct cpuinfo_x86 *c)
+static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 	
 	
@@ -107,7 +107,7 @@ static void __init check_cx686_slop(struct cpuinfo_x86 *c)
 }
 }
 
 
 
 
-static void __init set_cx86_reorder(void)
+static void __cpuinit set_cx86_reorder(void)
 {
 {
 	u8 ccr3;
 	u8 ccr3;
 
 
@@ -122,7 +122,7 @@ static void __init set_cx86_reorder(void)
 	setCx86(CX86_CCR3, ccr3);
 	setCx86(CX86_CCR3, ccr3);
 }
 }
 
 
-static void __init set_cx86_memwb(void)
+static void __cpuinit set_cx86_memwb(void)
 {
 {
 	u32 cr0;
 	u32 cr0;
 
 
@@ -137,7 +137,7 @@ static void __init set_cx86_memwb(void)
 	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
 	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
 }
 }
 
 
-static void __init set_cx86_inc(void)
+static void __cpuinit set_cx86_inc(void)
 {
 {
 	unsigned char ccr3;
 	unsigned char ccr3;
 
 
@@ -158,7 +158,7 @@ static void __init set_cx86_inc(void)
  *	Configure later MediaGX and/or Geode processor.
  *	Configure later MediaGX and/or Geode processor.
  */
  */
 
 
-static void __init geode_configure(void)
+static void __cpuinit geode_configure(void)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 	u8 ccr3, ccr4;
 	u8 ccr3, ccr4;
@@ -184,14 +184,14 @@ static void __init geode_configure(void)
 
 
 
 
 #ifdef CONFIG_PCI
 #ifdef CONFIG_PCI
-static struct pci_device_id __initdata cyrix_55x0[] = {
+static struct pci_device_id __cpuinitdata cyrix_55x0[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) },
 	{ },
 	{ },
 };
 };
 #endif
 #endif
 
 
-static void __init init_cyrix(struct cpuinfo_x86 *c)
+static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 {
 {
 	unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
 	unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
 	char *buf = c->x86_model_id;
 	char *buf = c->x86_model_id;
@@ -346,7 +346,7 @@ static void __init init_cyrix(struct cpuinfo_x86 *c)
 /*
 /*
  * Handle National Semiconductor branded processors
  * Handle National Semiconductor branded processors
  */
  */
-static void __init init_nsc(struct cpuinfo_x86 *c)
+static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
 {
 {
 	/* There may be GX1 processors in the wild that are branded
 	/* There may be GX1 processors in the wild that are branded
 	 * NSC and not Cyrix.
 	 * NSC and not Cyrix.
@@ -394,7 +394,7 @@ static inline int test_cyrix_52div(void)
 	return (unsigned char) (test >> 8) == 0x02;
 	return (unsigned char) (test >> 8) == 0x02;
 }
 }
 
 
-static void cyrix_identify(struct cpuinfo_x86 * c)
+static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c)
 {
 {
 	/* Detect Cyrix with disabled CPUID */
 	/* Detect Cyrix with disabled CPUID */
 	if ( c->x86 == 4 && test_cyrix_52div() ) {
 	if ( c->x86 == 4 && test_cyrix_52div() ) {
@@ -427,10 +427,9 @@ static void cyrix_identify(struct cpuinfo_x86 * c)
 			local_irq_restore(flags);
 			local_irq_restore(flags);
 		}
 		}
 	}
 	}
-	generic_identify(c);
 }
 }
 
 
-static struct cpu_dev cyrix_cpu_dev __initdata = {
+static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
 	.c_vendor	= "Cyrix",
 	.c_vendor	= "Cyrix",
 	.c_ident 	= { "CyrixInstead" },
 	.c_ident 	= { "CyrixInstead" },
 	.c_init		= init_cyrix,
 	.c_init		= init_cyrix,
@@ -453,11 +452,10 @@ static int __init cyrix_exit_cpu(void)
 
 
 late_initcall(cyrix_exit_cpu);
 late_initcall(cyrix_exit_cpu);
 
 
-static struct cpu_dev nsc_cpu_dev __initdata = {
+static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
 	.c_vendor	= "NSC",
 	.c_vendor	= "NSC",
 	.c_ident 	= { "Geode by NSC" },
 	.c_ident 	= { "Geode by NSC" },
 	.c_init		= init_nsc,
 	.c_init		= init_nsc,
-	.c_identify	= generic_identify,
 };
 };
 
 
 int __init nsc_init_cpu(void)
 int __init nsc_init_cpu(void)

+ 1 - 2
arch/i386/kernel/cpu/intel.c

@@ -198,7 +198,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 }
 }
 
 
 
 
-static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
 {
 {
 	/* Intel PIII Tualatin. This comes in two flavours.
 	/* Intel PIII Tualatin. This comes in two flavours.
 	 * One has 256kb of cache, the other 512. We have no way
 	 * One has 256kb of cache, the other 512. We have no way
@@ -263,7 +263,6 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = {
 		},
 		},
 	},
 	},
 	.c_init		= init_intel,
 	.c_init		= init_intel,
-	.c_identify	= generic_identify,
 	.c_size_cache	= intel_size_cache,
 	.c_size_cache	= intel_size_cache,
 };
 };
 
 

+ 1 - 1
arch/i386/kernel/cpu/mcheck/Makefile

@@ -1,2 +1,2 @@
-obj-y	=	mce.o k7.o p4.o p5.o p6.o winchip.o
+obj-y	=	mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o
 obj-$(CONFIG_X86_MCE_NONFATAL)	+=	non-fatal.o
 obj-$(CONFIG_X86_MCE_NONFATAL)	+=	non-fatal.o

+ 9 - 17
arch/i386/kernel/cpu/mcheck/p4.c

@@ -13,6 +13,8 @@
 #include <asm/msr.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
 #include <asm/apic.h>
 
 
+#include <asm/therm_throt.h>
+
 #include "mce.h"
 #include "mce.h"
 
 
 /* as supported by the P4/Xeon family */
 /* as supported by the P4/Xeon family */
@@ -44,25 +46,12 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs)
 /* P4/Xeon Thermal transition interrupt handler */
 /* P4/Xeon Thermal transition interrupt handler */
 static void intel_thermal_interrupt(struct pt_regs *regs)
 static void intel_thermal_interrupt(struct pt_regs *regs)
 {
 {
-	u32 l, h;
-	unsigned int cpu = smp_processor_id();
-	static unsigned long next[NR_CPUS];
+	__u64 msr_val;
 
 
 	ack_APIC_irq();
 	ack_APIC_irq();
 
 
-	if (time_after(next[cpu], jiffies))
-		return;
-
-	next[cpu] = jiffies + HZ*5;
-	rdmsr(MSR_IA32_THERM_STATUS, l, h);
-	if (l & 0x1) {
-		printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
-		printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
-				cpu);
-		add_taint(TAINT_MACHINE_CHECK);
-	} else {
-		printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
-	}
+	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
+	therm_throt_process(msr_val & 0x1);
 }
 }
 
 
 /* Thermal interrupt handler for this CPU setup */
 /* Thermal interrupt handler for this CPU setup */
@@ -122,10 +111,13 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
 	
 	
 	rdmsr (MSR_IA32_MISC_ENABLE, l, h);
 	rdmsr (MSR_IA32_MISC_ENABLE, l, h);
 	wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
 	wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
-	
+
 	l = apic_read (APIC_LVTTHMR);
 	l = apic_read (APIC_LVTTHMR);
 	apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
 	apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
 	printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
 	printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
+
+	/* enable thermal throttle processing */
+	atomic_set(&therm_throt_en, 1);
 	return;
 	return;
 }
 }
 #endif /* CONFIG_X86_MCE_P4THERMAL */
 #endif /* CONFIG_X86_MCE_P4THERMAL */

+ 180 - 0
arch/i386/kernel/cpu/mcheck/therm_throt.c

@@ -0,0 +1,180 @@
+/*
+ * linux/arch/i386/kerne/cpu/mcheck/therm_throt.c
+ *
+ * Thermal throttle event support code (such as syslog messaging and rate
+ * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
+ * This allows consistent reporting of CPU thermal throttle events.
+ *
+ * Maintains a counter in /sys that keeps track of the number of thermal
+ * events, such that the user knows how bad the thermal problem might be
+ * (since the logging to syslog and mcelog is rate limited).
+ *
+ * Author: Dmitriy Zavin (dmitriyz@google.com)
+ *
+ * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
+ *          Inspired by Ross Biro's and Al Borchers' counter code.
+ */
+
+#include <linux/percpu.h>
+#include <linux/sysdev.h>
+#include <linux/cpu.h>
+#include <asm/cpu.h>
+#include <linux/notifier.h>
+#include <asm/therm_throt.h>
+
+/* How long to wait between reporting thermal events */
+#define CHECK_INTERVAL              (300 * HZ)
+
+static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
+static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
+atomic_t therm_throt_en = ATOMIC_INIT(0);
+
+#ifdef CONFIG_SYSFS
+#define define_therm_throt_sysdev_one_ro(_name)                              \
+        static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
+
+#define define_therm_throt_sysdev_show_func(name)                            \
+static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev,        \
+                                              char *buf)                     \
+{                                                                            \
+	unsigned int cpu = dev->id;                                          \
+	ssize_t ret;                                                         \
+                                                                             \
+	preempt_disable();              /* CPU hotplug */                    \
+	if (cpu_online(cpu))                                                 \
+		ret = sprintf(buf, "%lu\n",                                  \
+			      per_cpu(thermal_throttle_##name, cpu));        \
+	else                                                                 \
+		ret = 0;                                                     \
+	preempt_enable();                                                    \
+                                                                             \
+	return ret;                                                          \
+}
+
+define_therm_throt_sysdev_show_func(count);
+define_therm_throt_sysdev_one_ro(count);
+
+static struct attribute *thermal_throttle_attrs[] = {
+	&attr_count.attr,
+	NULL
+};
+
+static struct attribute_group thermal_throttle_attr_group = {
+	.attrs = thermal_throttle_attrs,
+	.name = "thermal_throttle"
+};
+#endif /* CONFIG_SYSFS */
+
+/***
+ * therm_throt_process - Process thermal throttling event from interrupt
+ * @curr: Whether the condition is current or not (boolean), since the
+ *        thermal interrupt normally gets called both when the thermal
+ *        event begins and once the event has ended.
+ *
+ * This function is called by the thermal interrupt after the
+ * IRQ has been acknowledged.
+ *
+ * It will take care of rate limiting and printing messages to the syslog.
+ *
+ * Returns: 0 : Event should NOT be further logged, i.e. still in
+ *              "timeout" from previous log message.
+ *          1 : Event should be logged further, and a message has been
+ *              printed to the syslog.
+ */
+int therm_throt_process(int curr)
+{
+	unsigned int cpu = smp_processor_id();
+	__u64 tmp_jiffs = get_jiffies_64();
+
+	if (curr)
+		__get_cpu_var(thermal_throttle_count)++;
+
+	if (time_before64(tmp_jiffs, __get_cpu_var(next_check)))
+		return 0;
+
+	__get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
+
+	/* if we just entered the thermal event */
+	if (curr) {
+		printk(KERN_CRIT "CPU%d: Temperature above threshold, "
+		       "cpu clock throttled (total events = %lu)\n", cpu,
+		       __get_cpu_var(thermal_throttle_count));
+
+		add_taint(TAINT_MACHINE_CHECK);
+	} else {
+		printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu);
+	}
+
+	return 1;
+}
+
+#ifdef CONFIG_SYSFS
+/* Add/Remove thermal_throttle interface for CPU device */
+static __cpuinit int thermal_throttle_add_dev(struct sys_device * sys_dev)
+{
+	sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+	return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static __cpuinit int thermal_throttle_remove_dev(struct sys_device * sys_dev)
+{
+	sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+	return 0;
+}
+
+/* Mutex protecting device creation against CPU hotplug */
+static DEFINE_MUTEX(therm_cpu_lock);
+
+/* Get notified when a cpu comes on/off. Be hotplug friendly. */
+static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
+						   unsigned long action,
+						   void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct sys_device *sys_dev;
+
+	sys_dev = get_cpu_sysdev(cpu);
+	mutex_lock(&therm_cpu_lock);
+	switch (action) {
+	case CPU_ONLINE:
+		thermal_throttle_add_dev(sys_dev);
+		break;
+	case CPU_DEAD:
+		thermal_throttle_remove_dev(sys_dev);
+		break;
+	}
+	mutex_unlock(&therm_cpu_lock);
+	return NOTIFY_OK;
+}
+
+static struct notifier_block thermal_throttle_cpu_notifier =
+{
+	.notifier_call = thermal_throttle_cpu_callback,
+};
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static __init int thermal_throttle_init_device(void)
+{
+	unsigned int cpu = 0;
+
+	if (!atomic_read(&therm_throt_en))
+		return 0;
+
+	register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
+
+#ifdef CONFIG_HOTPLUG_CPU
+	mutex_lock(&therm_cpu_lock);
+#endif
+	/* connect live CPUs to sysfs */
+	for_each_online_cpu(cpu)
+		thermal_throttle_add_dev(get_cpu_sysdev(cpu));
+#ifdef CONFIG_HOTPLUG_CPU
+	mutex_unlock(&therm_cpu_lock);
+#endif
+
+	return 0;
+}
+
+device_initcall(thermal_throttle_init_device);
+#endif /* CONFIG_SYSFS */

+ 4 - 5
arch/i386/kernel/cpu/nexgen.c

@@ -10,7 +10,7 @@
  *	to have CPUID. (Thanks to Herbert Oppmann)
  *	to have CPUID. (Thanks to Herbert Oppmann)
  */
  */
  
  
-static int __init deep_magic_nexgen_probe(void)
+static int __cpuinit deep_magic_nexgen_probe(void)
 {
 {
 	int ret;
 	int ret;
 	
 	
@@ -27,21 +27,20 @@ static int __init deep_magic_nexgen_probe(void)
 	return  ret;
 	return  ret;
 }
 }
 
 
-static void __init init_nexgen(struct cpuinfo_x86 * c)
+static void __cpuinit init_nexgen(struct cpuinfo_x86 * c)
 {
 {
 	c->x86_cache_size = 256; /* A few had 1 MB... */
 	c->x86_cache_size = 256; /* A few had 1 MB... */
 }
 }
 
 
-static void __init nexgen_identify(struct cpuinfo_x86 * c)
+static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c)
 {
 {
 	/* Detect NexGen with old hypercode */
 	/* Detect NexGen with old hypercode */
 	if ( deep_magic_nexgen_probe() ) {
 	if ( deep_magic_nexgen_probe() ) {
 		strcpy(c->x86_vendor_id, "NexGenDriven");
 		strcpy(c->x86_vendor_id, "NexGenDriven");
 	}
 	}
-	generic_identify(c);
 }
 }
 
 
-static struct cpu_dev nexgen_cpu_dev __initdata = {
+static struct cpu_dev nexgen_cpu_dev __cpuinitdata = {
 	.c_vendor	= "Nexgen",
 	.c_vendor	= "Nexgen",
 	.c_ident	= { "NexGenDriven" },
 	.c_ident	= { "NexGenDriven" },
 	.c_models = {
 	.c_models = {

+ 2 - 2
arch/i386/kernel/cpu/proc.c

@@ -46,8 +46,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 
 		/* Intel-defined (#2) */
 		/* Intel-defined (#2) */
 		"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
 		"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
-		"tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
-		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+		"tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
+		NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
 
 
 		/* VIA/Cyrix/Centaur-defined */
 		/* VIA/Cyrix/Centaur-defined */

+ 2 - 2
arch/i386/kernel/cpu/rise.c

@@ -5,7 +5,7 @@
 
 
 #include "cpu.h"
 #include "cpu.h"
 
 
-static void __init init_rise(struct cpuinfo_x86 *c)
+static void __cpuinit init_rise(struct cpuinfo_x86 *c)
 {
 {
 	printk("CPU: Rise iDragon");
 	printk("CPU: Rise iDragon");
 	if (c->x86_model > 2)
 	if (c->x86_model > 2)
@@ -28,7 +28,7 @@ static void __init init_rise(struct cpuinfo_x86 *c)
 	set_bit(X86_FEATURE_CX8, c->x86_capability);
 	set_bit(X86_FEATURE_CX8, c->x86_capability);
 }
 }
 
 
-static struct cpu_dev rise_cpu_dev __initdata = {
+static struct cpu_dev rise_cpu_dev __cpuinitdata = {
 	.c_vendor	= "Rise",
 	.c_vendor	= "Rise",
 	.c_ident	= { "RiseRiseRise" },
 	.c_ident	= { "RiseRiseRise" },
 	.c_models = {
 	.c_models = {

+ 3 - 4
arch/i386/kernel/cpu/transmeta.c

@@ -5,7 +5,7 @@
 #include <asm/msr.h>
 #include <asm/msr.h>
 #include "cpu.h"
 #include "cpu.h"
 
 
-static void __init init_transmeta(struct cpuinfo_x86 *c)
+static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
 {
 {
 	unsigned int cap_mask, uk, max, dummy;
 	unsigned int cap_mask, uk, max, dummy;
 	unsigned int cms_rev1, cms_rev2;
 	unsigned int cms_rev1, cms_rev2;
@@ -85,10 +85,9 @@ static void __init init_transmeta(struct cpuinfo_x86 *c)
 #endif
 #endif
 }
 }
 
 
-static void __init transmeta_identify(struct cpuinfo_x86 * c)
+static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c)
 {
 {
 	u32 xlvl;
 	u32 xlvl;
-	generic_identify(c);
 
 
 	/* Transmeta-defined flags: level 0x80860001 */
 	/* Transmeta-defined flags: level 0x80860001 */
 	xlvl = cpuid_eax(0x80860000);
 	xlvl = cpuid_eax(0x80860000);
@@ -98,7 +97,7 @@ static void __init transmeta_identify(struct cpuinfo_x86 * c)
 	}
 	}
 }
 }
 
 
-static struct cpu_dev transmeta_cpu_dev __initdata = {
+static struct cpu_dev transmeta_cpu_dev __cpuinitdata = {
 	.c_vendor	= "Transmeta",
 	.c_vendor	= "Transmeta",
 	.c_ident	= { "GenuineTMx86", "TransmetaCPU" },
 	.c_ident	= { "GenuineTMx86", "TransmetaCPU" },
 	.c_init		= init_transmeta,
 	.c_init		= init_transmeta,

+ 1 - 6
arch/i386/kernel/cpu/umc.c

@@ -5,12 +5,8 @@
 
 
 /* UMC chips appear to be only either 386 or 486, so no special init takes place.
 /* UMC chips appear to be only either 386 or 486, so no special init takes place.
  */
  */
-static void __init init_umc(struct cpuinfo_x86 * c)
-{
-
-}
 
 
-static struct cpu_dev umc_cpu_dev __initdata = {
+static struct cpu_dev umc_cpu_dev __cpuinitdata = {
 	.c_vendor	= "UMC",
 	.c_vendor	= "UMC",
 	.c_ident 	= { "UMC UMC UMC" },
 	.c_ident 	= { "UMC UMC UMC" },
 	.c_models = {
 	.c_models = {
@@ -21,7 +17,6 @@ static struct cpu_dev umc_cpu_dev __initdata = {
 		  }
 		  }
 		},
 		},
 	},
 	},
-	.c_init		= init_umc,
 };
 };
 
 
 int __init umc_init_cpu(void)
 int __init umc_init_cpu(void)

+ 19 - 3
arch/i386/kernel/crash.c

@@ -22,6 +22,8 @@
 #include <asm/nmi.h>
 #include <asm/nmi.h>
 #include <asm/hw_irq.h>
 #include <asm/hw_irq.h>
 #include <asm/apic.h>
 #include <asm/apic.h>
+#include <asm/kdebug.h>
+
 #include <mach_ipi.h>
 #include <mach_ipi.h>
 
 
 
 
@@ -93,16 +95,25 @@ static void crash_save_self(struct pt_regs *regs)
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
 static atomic_t waiting_for_crash_ipi;
 static atomic_t waiting_for_crash_ipi;
 
 
-static int crash_nmi_callback(struct pt_regs *regs, int cpu)
+static int crash_nmi_callback(struct notifier_block *self,
+			unsigned long val, void *data)
 {
 {
+	struct pt_regs *regs;
 	struct pt_regs fixed_regs;
 	struct pt_regs fixed_regs;
+	int cpu;
+
+	if (val != DIE_NMI_IPI)
+		return NOTIFY_OK;
+
+	regs = ((struct die_args *)data)->regs;
+	cpu = raw_smp_processor_id();
 
 
 	/* Don't do anything if this handler is invoked on crashing cpu.
 	/* Don't do anything if this handler is invoked on crashing cpu.
 	 * Otherwise, system will completely hang. Crashing cpu can get
 	 * Otherwise, system will completely hang. Crashing cpu can get
 	 * an NMI if system was initially booted with nmi_watchdog parameter.
 	 * an NMI if system was initially booted with nmi_watchdog parameter.
 	 */
 	 */
 	if (cpu == crashing_cpu)
 	if (cpu == crashing_cpu)
-		return 1;
+		return NOTIFY_STOP;
 	local_irq_disable();
 	local_irq_disable();
 
 
 	if (!user_mode_vm(regs)) {
 	if (!user_mode_vm(regs)) {
@@ -125,13 +136,18 @@ static void smp_send_nmi_allbutself(void)
 	send_IPI_allbutself(NMI_VECTOR);
 	send_IPI_allbutself(NMI_VECTOR);
 }
 }
 
 
+static struct notifier_block crash_nmi_nb = {
+	.notifier_call = crash_nmi_callback,
+};
+
 static void nmi_shootdown_cpus(void)
 static void nmi_shootdown_cpus(void)
 {
 {
 	unsigned long msecs;
 	unsigned long msecs;
 
 
 	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
 	atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
 	/* Would it be better to replace the trap vector here? */
 	/* Would it be better to replace the trap vector here? */
-	set_nmi_callback(crash_nmi_callback);
+	if (register_die_notifier(&crash_nmi_nb))
+		return;		/* return what? */
 	/* Ensure the new callback function is set before sending
 	/* Ensure the new callback function is set before sending
 	 * out the NMI
 	 * out the NMI
 	 */
 	 */

+ 74 - 36
arch/i386/kernel/entry.S

@@ -76,8 +76,15 @@ DF_MASK		= 0x00000400
 NT_MASK		= 0x00004000
 NT_MASK		= 0x00004000
 VM_MASK		= 0x00020000
 VM_MASK		= 0x00020000
 
 
+/* These are replaces for paravirtualization */
+#define DISABLE_INTERRUPTS		cli
+#define ENABLE_INTERRUPTS		sti
+#define ENABLE_INTERRUPTS_SYSEXIT	sti; sysexit
+#define INTERRUPT_RETURN		iret
+#define GET_CR0_INTO_EAX		movl %cr0, %eax
+
 #ifdef CONFIG_PREEMPT
 #ifdef CONFIG_PREEMPT
-#define preempt_stop		cli; TRACE_IRQS_OFF
+#define preempt_stop		DISABLE_INTERRUPTS; TRACE_IRQS_OFF
 #else
 #else
 #define preempt_stop
 #define preempt_stop
 #define resume_kernel		restore_nocheck
 #define resume_kernel		restore_nocheck
@@ -176,18 +183,21 @@ VM_MASK		= 0x00020000
 
 
 #define RING0_INT_FRAME \
 #define RING0_INT_FRAME \
 	CFI_STARTPROC simple;\
 	CFI_STARTPROC simple;\
+	CFI_SIGNAL_FRAME;\
 	CFI_DEF_CFA esp, 3*4;\
 	CFI_DEF_CFA esp, 3*4;\
 	/*CFI_OFFSET cs, -2*4;*/\
 	/*CFI_OFFSET cs, -2*4;*/\
 	CFI_OFFSET eip, -3*4
 	CFI_OFFSET eip, -3*4
 
 
 #define RING0_EC_FRAME \
 #define RING0_EC_FRAME \
 	CFI_STARTPROC simple;\
 	CFI_STARTPROC simple;\
+	CFI_SIGNAL_FRAME;\
 	CFI_DEF_CFA esp, 4*4;\
 	CFI_DEF_CFA esp, 4*4;\
 	/*CFI_OFFSET cs, -2*4;*/\
 	/*CFI_OFFSET cs, -2*4;*/\
 	CFI_OFFSET eip, -3*4
 	CFI_OFFSET eip, -3*4
 
 
 #define RING0_PTREGS_FRAME \
 #define RING0_PTREGS_FRAME \
 	CFI_STARTPROC simple;\
 	CFI_STARTPROC simple;\
+	CFI_SIGNAL_FRAME;\
 	CFI_DEF_CFA esp, OLDESP-EBX;\
 	CFI_DEF_CFA esp, OLDESP-EBX;\
 	/*CFI_OFFSET cs, CS-OLDESP;*/\
 	/*CFI_OFFSET cs, CS-OLDESP;*/\
 	CFI_OFFSET eip, EIP-OLDESP;\
 	CFI_OFFSET eip, EIP-OLDESP;\
@@ -233,10 +243,11 @@ ret_from_intr:
 check_userspace:
 check_userspace:
 	movl EFLAGS(%esp), %eax		# mix EFLAGS and CS
 	movl EFLAGS(%esp), %eax		# mix EFLAGS and CS
 	movb CS(%esp), %al
 	movb CS(%esp), %al
-	testl $(VM_MASK | 3), %eax
-	jz resume_kernel
+	andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
+	cmpl $USER_RPL, %eax
+	jb resume_kernel		# not returning to v8086 or userspace
 ENTRY(resume_userspace)
 ENTRY(resume_userspace)
- 	cli				# make sure we don't miss an interrupt
+ 	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# setting need_resched or sigpending
 					# between sampling and the iret
 					# between sampling and the iret
 	movl TI_flags(%ebp), %ecx
 	movl TI_flags(%ebp), %ecx
@@ -247,7 +258,7 @@ ENTRY(resume_userspace)
 
 
 #ifdef CONFIG_PREEMPT
 #ifdef CONFIG_PREEMPT
 ENTRY(resume_kernel)
 ENTRY(resume_kernel)
-	cli
+	DISABLE_INTERRUPTS
 	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
 	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
 	jnz restore_nocheck
 	jnz restore_nocheck
 need_resched:
 need_resched:
@@ -267,6 +278,7 @@ need_resched:
 	# sysenter call handler stub
 	# sysenter call handler stub
 ENTRY(sysenter_entry)
 ENTRY(sysenter_entry)
 	CFI_STARTPROC simple
 	CFI_STARTPROC simple
+	CFI_SIGNAL_FRAME
 	CFI_DEF_CFA esp, 0
 	CFI_DEF_CFA esp, 0
 	CFI_REGISTER esp, ebp
 	CFI_REGISTER esp, ebp
 	movl TSS_sysenter_esp0(%esp),%esp
 	movl TSS_sysenter_esp0(%esp),%esp
@@ -275,7 +287,7 @@ sysenter_past_esp:
 	 * No need to follow this irqs on/off section: the syscall
 	 * No need to follow this irqs on/off section: the syscall
 	 * disabled irqs and here we enable it straight after entry:
 	 * disabled irqs and here we enable it straight after entry:
 	 */
 	 */
-	sti
+	ENABLE_INTERRUPTS
 	pushl $(__USER_DS)
 	pushl $(__USER_DS)
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_ADJUST_CFA_OFFSET 4
 	/*CFI_REL_OFFSET ss, 0*/
 	/*CFI_REL_OFFSET ss, 0*/
@@ -320,7 +332,7 @@ sysenter_past_esp:
 	jae syscall_badsys
 	jae syscall_badsys
 	call *sys_call_table(,%eax,4)
 	call *sys_call_table(,%eax,4)
 	movl %eax,EAX(%esp)
 	movl %eax,EAX(%esp)
-	cli
+	DISABLE_INTERRUPTS
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	movl TI_flags(%ebp), %ecx
 	movl TI_flags(%ebp), %ecx
 	testw $_TIF_ALLWORK_MASK, %cx
 	testw $_TIF_ALLWORK_MASK, %cx
@@ -330,8 +342,7 @@ sysenter_past_esp:
 	movl OLDESP(%esp), %ecx
 	movl OLDESP(%esp), %ecx
 	xorl %ebp,%ebp
 	xorl %ebp,%ebp
 	TRACE_IRQS_ON
 	TRACE_IRQS_ON
-	sti
-	sysexit
+	ENABLE_INTERRUPTS_SYSEXIT
 	CFI_ENDPROC
 	CFI_ENDPROC
 
 
 
 
@@ -356,7 +367,7 @@ syscall_call:
 	call *sys_call_table(,%eax,4)
 	call *sys_call_table(,%eax,4)
 	movl %eax,EAX(%esp)		# store the return value
 	movl %eax,EAX(%esp)		# store the return value
 syscall_exit:
 syscall_exit:
-	cli				# make sure we don't miss an interrupt
+	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# setting need_resched or sigpending
 					# between sampling and the iret
 					# between sampling and the iret
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
@@ -371,8 +382,8 @@ restore_all:
 	# See comments in process.c:copy_thread() for details.
 	# See comments in process.c:copy_thread() for details.
 	movb OLDSS(%esp), %ah
 	movb OLDSS(%esp), %ah
 	movb CS(%esp), %al
 	movb CS(%esp), %al
-	andl $(VM_MASK | (4 << 8) | 3), %eax
-	cmpl $((4 << 8) | 3), %eax
+	andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
+	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
 	CFI_REMEMBER_STATE
 	CFI_REMEMBER_STATE
 	je ldt_ss			# returning to user-space with LDT SS
 	je ldt_ss			# returning to user-space with LDT SS
 restore_nocheck:
 restore_nocheck:
@@ -381,11 +392,11 @@ restore_nocheck_notrace:
 	RESTORE_REGS
 	RESTORE_REGS
 	addl $4, %esp
 	addl $4, %esp
 	CFI_ADJUST_CFA_OFFSET -4
 	CFI_ADJUST_CFA_OFFSET -4
-1:	iret
+1:	INTERRUPT_RETURN
 .section .fixup,"ax"
 .section .fixup,"ax"
 iret_exc:
 iret_exc:
 	TRACE_IRQS_ON
 	TRACE_IRQS_ON
-	sti
+	ENABLE_INTERRUPTS
 	pushl $0			# no error code
 	pushl $0			# no error code
 	pushl $do_iret_error
 	pushl $do_iret_error
 	jmp error_code
 	jmp error_code
@@ -409,7 +420,7 @@ ldt_ss:
 	 * dosemu and wine happy. */
 	 * dosemu and wine happy. */
 	subl $8, %esp		# reserve space for switch16 pointer
 	subl $8, %esp		# reserve space for switch16 pointer
 	CFI_ADJUST_CFA_OFFSET 8
 	CFI_ADJUST_CFA_OFFSET 8
-	cli
+	DISABLE_INTERRUPTS
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
 	movl %esp, %eax
 	movl %esp, %eax
 	/* Set up the 16bit stack frame with switch32 pointer on top,
 	/* Set up the 16bit stack frame with switch32 pointer on top,
@@ -419,7 +430,7 @@ ldt_ss:
 	TRACE_IRQS_IRET
 	TRACE_IRQS_IRET
 	RESTORE_REGS
 	RESTORE_REGS
 	lss 20+4(%esp), %esp	# switch to 16bit stack
 	lss 20+4(%esp), %esp	# switch to 16bit stack
-1:	iret
+1:	INTERRUPT_RETURN
 .section __ex_table,"a"
 .section __ex_table,"a"
 	.align 4
 	.align 4
 	.long 1b,iret_exc
 	.long 1b,iret_exc
@@ -434,7 +445,7 @@ work_pending:
 	jz work_notifysig
 	jz work_notifysig
 work_resched:
 work_resched:
 	call schedule
 	call schedule
-	cli				# make sure we don't miss an interrupt
+	DISABLE_INTERRUPTS		# make sure we don't miss an interrupt
 					# setting need_resched or sigpending
 					# setting need_resched or sigpending
 					# between sampling and the iret
 					# between sampling and the iret
 	TRACE_IRQS_OFF
 	TRACE_IRQS_OFF
@@ -490,7 +501,7 @@ syscall_exit_work:
 	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
 	testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
 	jz work_pending
 	jz work_pending
 	TRACE_IRQS_ON
 	TRACE_IRQS_ON
-	sti				# could let do_syscall_trace() call
+	ENABLE_INTERRUPTS		# could let do_syscall_trace() call
 					# schedule() instead
 					# schedule() instead
 	movl %esp, %eax
 	movl %esp, %eax
 	movl $1, %edx
 	movl $1, %edx
@@ -591,11 +602,9 @@ ENTRY(name)				\
 /* The include is where all of the SMP etc. interrupts come from */
 /* The include is where all of the SMP etc. interrupts come from */
 #include "entry_arch.h"
 #include "entry_arch.h"
 
 
-ENTRY(divide_error)
-	RING0_INT_FRAME
-	pushl $0			# no error code
-	CFI_ADJUST_CFA_OFFSET 4
-	pushl $do_divide_error
+KPROBE_ENTRY(page_fault)
+	RING0_EC_FRAME
+	pushl $do_page_fault
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_ADJUST_CFA_OFFSET 4
 	ALIGN
 	ALIGN
 error_code:
 error_code:
@@ -645,6 +654,7 @@ error_code:
 	call *%edi
 	call *%edi
 	jmp ret_from_exception
 	jmp ret_from_exception
 	CFI_ENDPROC
 	CFI_ENDPROC
+KPROBE_END(page_fault)
 
 
 ENTRY(coprocessor_error)
 ENTRY(coprocessor_error)
 	RING0_INT_FRAME
 	RING0_INT_FRAME
@@ -669,7 +679,7 @@ ENTRY(device_not_available)
 	pushl $-1			# mark this as an int
 	pushl $-1			# mark this as an int
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	SAVE_ALL
-	movl %cr0, %eax
+	GET_CR0_INTO_EAX
 	testl $0x4, %eax		# EM (math emulation bit)
 	testl $0x4, %eax		# EM (math emulation bit)
 	jne device_not_available_emulate
 	jne device_not_available_emulate
 	preempt_stop
 	preempt_stop
@@ -702,9 +712,15 @@ device_not_available_emulate:
 	jne ok;					\
 	jne ok;					\
 label:						\
 label:						\
 	movl TSS_sysenter_esp0+offset(%esp),%esp;	\
 	movl TSS_sysenter_esp0+offset(%esp),%esp;	\
+	CFI_DEF_CFA esp, 0;			\
+	CFI_UNDEFINED eip;			\
 	pushfl;					\
 	pushfl;					\
+	CFI_ADJUST_CFA_OFFSET 4;		\
 	pushl $__KERNEL_CS;			\
 	pushl $__KERNEL_CS;			\
-	pushl $sysenter_past_esp
+	CFI_ADJUST_CFA_OFFSET 4;		\
+	pushl $sysenter_past_esp;		\
+	CFI_ADJUST_CFA_OFFSET 4;		\
+	CFI_REL_OFFSET eip, 0
 
 
 KPROBE_ENTRY(debug)
 KPROBE_ENTRY(debug)
 	RING0_INT_FRAME
 	RING0_INT_FRAME
@@ -720,7 +736,8 @@ debug_stack_correct:
 	call do_debug
 	call do_debug
 	jmp ret_from_exception
 	jmp ret_from_exception
 	CFI_ENDPROC
 	CFI_ENDPROC
-	.previous .text
+KPROBE_END(debug)
+
 /*
 /*
  * NMI is doubly nasty. It can happen _while_ we're handling
  * NMI is doubly nasty. It can happen _while_ we're handling
  * a debug fault, and the debug fault hasn't yet been able to
  * a debug fault, and the debug fault hasn't yet been able to
@@ -729,7 +746,7 @@ debug_stack_correct:
  * check whether we got an NMI on the debug path where the debug
  * check whether we got an NMI on the debug path where the debug
  * fault happened on the sysenter path.
  * fault happened on the sysenter path.
  */
  */
-ENTRY(nmi)
+KPROBE_ENTRY(nmi)
 	RING0_INT_FRAME
 	RING0_INT_FRAME
 	pushl %eax
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_ADJUST_CFA_OFFSET 4
@@ -754,6 +771,7 @@ ENTRY(nmi)
 	cmpl $sysenter_entry,12(%esp)
 	cmpl $sysenter_entry,12(%esp)
 	je nmi_debug_stack_check
 	je nmi_debug_stack_check
 nmi_stack_correct:
 nmi_stack_correct:
+	/* We have a RING0_INT_FRAME here */
 	pushl %eax
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_ADJUST_CFA_OFFSET 4
 	SAVE_ALL
 	SAVE_ALL
@@ -764,9 +782,12 @@ nmi_stack_correct:
 	CFI_ENDPROC
 	CFI_ENDPROC
 
 
 nmi_stack_fixup:
 nmi_stack_fixup:
+	RING0_INT_FRAME
 	FIX_STACK(12,nmi_stack_correct, 1)
 	FIX_STACK(12,nmi_stack_correct, 1)
 	jmp nmi_stack_correct
 	jmp nmi_stack_correct
+
 nmi_debug_stack_check:
 nmi_debug_stack_check:
+	/* We have a RING0_INT_FRAME here */
 	cmpw $__KERNEL_CS,16(%esp)
 	cmpw $__KERNEL_CS,16(%esp)
 	jne nmi_stack_correct
 	jne nmi_stack_correct
 	cmpl $debug,(%esp)
 	cmpl $debug,(%esp)
@@ -777,8 +798,10 @@ nmi_debug_stack_check:
 	jmp nmi_stack_correct
 	jmp nmi_stack_correct
 
 
 nmi_16bit_stack:
 nmi_16bit_stack:
-	RING0_INT_FRAME
-	/* create the pointer to lss back */
+	/* We have a RING0_INT_FRAME here.
+	 *
+	 * create the pointer to lss back
+	 */
 	pushl %ss
 	pushl %ss
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_ADJUST_CFA_OFFSET 4
 	pushl %esp
 	pushl %esp
@@ -799,12 +822,13 @@ nmi_16bit_stack:
 	call do_nmi
 	call do_nmi
 	RESTORE_REGS
 	RESTORE_REGS
 	lss 12+4(%esp), %esp		# back to 16bit stack
 	lss 12+4(%esp), %esp		# back to 16bit stack
-1:	iret
+1:	INTERRUPT_RETURN
 	CFI_ENDPROC
 	CFI_ENDPROC
 .section __ex_table,"a"
 .section __ex_table,"a"
 	.align 4
 	.align 4
 	.long 1b,iret_exc
 	.long 1b,iret_exc
 .previous
 .previous
+KPROBE_END(nmi)
 
 
 KPROBE_ENTRY(int3)
 KPROBE_ENTRY(int3)
 	RING0_INT_FRAME
 	RING0_INT_FRAME
@@ -816,7 +840,7 @@ KPROBE_ENTRY(int3)
 	call do_int3
 	call do_int3
 	jmp ret_from_exception
 	jmp ret_from_exception
 	CFI_ENDPROC
 	CFI_ENDPROC
-	.previous .text
+KPROBE_END(int3)
 
 
 ENTRY(overflow)
 ENTRY(overflow)
 	RING0_INT_FRAME
 	RING0_INT_FRAME
@@ -881,7 +905,7 @@ KPROBE_ENTRY(general_protection)
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
 	CFI_ENDPROC
 	CFI_ENDPROC
-	.previous .text
+KPROBE_END(general_protection)
 
 
 ENTRY(alignment_check)
 ENTRY(alignment_check)
 	RING0_EC_FRAME
 	RING0_EC_FRAME
@@ -890,13 +914,14 @@ ENTRY(alignment_check)
 	jmp error_code
 	jmp error_code
 	CFI_ENDPROC
 	CFI_ENDPROC
 
 
-KPROBE_ENTRY(page_fault)
-	RING0_EC_FRAME
-	pushl $do_page_fault
+ENTRY(divide_error)
+	RING0_INT_FRAME
+	pushl $0			# no error code
+	CFI_ADJUST_CFA_OFFSET 4
+	pushl $do_divide_error
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_ADJUST_CFA_OFFSET 4
 	jmp error_code
 	jmp error_code
 	CFI_ENDPROC
 	CFI_ENDPROC
-	.previous .text
 
 
 #ifdef CONFIG_X86_MCE
 #ifdef CONFIG_X86_MCE
 ENTRY(machine_check)
 ENTRY(machine_check)
@@ -949,6 +974,19 @@ ENTRY(arch_unwind_init_running)
 ENDPROC(arch_unwind_init_running)
 ENDPROC(arch_unwind_init_running)
 #endif
 #endif
 
 
+ENTRY(kernel_thread_helper)
+	pushl $0		# fake return address for unwinder
+	CFI_STARTPROC
+	movl %edx,%eax
+	push %edx
+	CFI_ADJUST_CFA_OFFSET 4
+	call *%ebx
+	push %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	call do_exit
+	CFI_ENDPROC
+ENDPROC(kernel_thread_helper)
+
 .section .rodata,"a"
 .section .rodata,"a"
 #include "syscall_table.S"
 #include "syscall_table.S"
 
 

+ 67 - 0
arch/i386/kernel/head.S

@@ -371,8 +371,65 @@ rp_sidt:
 	addl $8,%edi
 	addl $8,%edi
 	dec %ecx
 	dec %ecx
 	jne rp_sidt
 	jne rp_sidt
+
+.macro	set_early_handler handler,trapno
+	lea \handler,%edx
+	movl $(__KERNEL_CS << 16),%eax
+	movw %dx,%ax
+	movw $0x8E00,%dx	/* interrupt gate - dpl=0, present */
+	lea idt_table,%edi
+	movl %eax,8*\trapno(%edi)
+	movl %edx,8*\trapno+4(%edi)
+.endm
+
+	set_early_handler handler=early_divide_err,trapno=0
+	set_early_handler handler=early_illegal_opcode,trapno=6
+	set_early_handler handler=early_protection_fault,trapno=13
+	set_early_handler handler=early_page_fault,trapno=14
+
 	ret
 	ret
 
 
+early_divide_err:
+	xor %edx,%edx
+	pushl $0	/* fake errcode */
+	jmp early_fault
+
+early_illegal_opcode:
+	movl $6,%edx
+	pushl $0	/* fake errcode */
+	jmp early_fault
+
+early_protection_fault:
+	movl $13,%edx
+	jmp early_fault
+
+early_page_fault:
+	movl $14,%edx
+	jmp early_fault
+
+early_fault:
+	cld
+#ifdef CONFIG_PRINTK
+	movl $(__KERNEL_DS),%eax
+	movl %eax,%ds
+	movl %eax,%es
+	cmpl $2,early_recursion_flag
+	je hlt_loop
+	incl early_recursion_flag
+	movl %cr2,%eax
+	pushl %eax
+	pushl %edx		/* trapno */
+	pushl $fault_msg
+#ifdef CONFIG_EARLY_PRINTK
+	call early_printk
+#else
+	call printk
+#endif
+#endif
+hlt_loop:
+	hlt
+	jmp hlt_loop
+
 /* This is the default interrupt "handler" :-) */
 /* This is the default interrupt "handler" :-) */
 	ALIGN
 	ALIGN
 ignore_int:
 ignore_int:
@@ -386,6 +443,9 @@ ignore_int:
 	movl $(__KERNEL_DS),%eax
 	movl $(__KERNEL_DS),%eax
 	movl %eax,%ds
 	movl %eax,%ds
 	movl %eax,%es
 	movl %eax,%es
+	cmpl $2,early_recursion_flag
+	je hlt_loop
+	incl early_recursion_flag
 	pushl 16(%esp)
 	pushl 16(%esp)
 	pushl 24(%esp)
 	pushl 24(%esp)
 	pushl 32(%esp)
 	pushl 32(%esp)
@@ -431,9 +491,16 @@ ENTRY(stack_start)
 
 
 ready:	.byte 0
 ready:	.byte 0
 
 
+early_recursion_flag:
+	.long 0
+
 int_msg:
 int_msg:
 	.asciz "Unknown interrupt or fault at EIP %p %p %p\n"
 	.asciz "Unknown interrupt or fault at EIP %p %p %p\n"
 
 
+fault_msg:
+	.ascii "Int %d: CR2 %p  err %p  EIP %p  CS %p  flags %p\n"
+	.asciz "Stack: %p %p %p %p %p %p %p %p\n"
+
 /*
 /*
  * The IDT and GDT 'descriptors' are a strange 48-bit object
  * The IDT and GDT 'descriptors' are a strange 48-bit object
  * only used by the lidt and lgdt instructions. They are not
  * only used by the lidt and lgdt instructions. They are not

+ 5 - 1
arch/i386/kernel/i8259.c

@@ -45,6 +45,8 @@ static void end_8259A_irq (unsigned int irq)
 
 
 #define shutdown_8259A_irq	disable_8259A_irq
 #define shutdown_8259A_irq	disable_8259A_irq
 
 
+static int i8259A_auto_eoi;
+
 static void mask_and_ack_8259A(unsigned int);
 static void mask_and_ack_8259A(unsigned int);
 
 
 unsigned int startup_8259A_irq(unsigned int irq)
 unsigned int startup_8259A_irq(unsigned int irq)
@@ -253,7 +255,7 @@ static void save_ELCR(char *trigger)
 
 
 static int i8259A_resume(struct sys_device *dev)
 static int i8259A_resume(struct sys_device *dev)
 {
 {
-	init_8259A(0);
+	init_8259A(i8259A_auto_eoi);
 	restore_ELCR(irq_trigger);
 	restore_ELCR(irq_trigger);
 	return 0;
 	return 0;
 }
 }
@@ -301,6 +303,8 @@ void init_8259A(int auto_eoi)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 
 
+	i8259A_auto_eoi = auto_eoi;
+
 	spin_lock_irqsave(&i8259A_lock, flags);
 	spin_lock_irqsave(&i8259A_lock, flags);
 
 
 	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
 	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */

+ 67 - 58
arch/i386/kernel/io_apic.c

@@ -40,6 +40,7 @@
 #include <asm/nmi.h>
 #include <asm/nmi.h>
 
 
 #include <mach_apic.h>
 #include <mach_apic.h>
+#include <mach_apicdef.h>
 
 
 #include "io_ports.h"
 #include "io_ports.h"
 
 
@@ -65,7 +66,7 @@ int sis_apic_bug = -1;
  */
  */
 int nr_ioapic_registers[MAX_IO_APICS];
 int nr_ioapic_registers[MAX_IO_APICS];
 
 
-int disable_timer_pin_1 __initdata;
+static int disable_timer_pin_1 __initdata;
 
 
 /*
 /*
  * Rough estimation of how many shared IRQs there are, can
  * Rough estimation of how many shared IRQs there are, can
@@ -93,6 +94,34 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
 #define vector_to_irq(vector)	(vector)
 #define vector_to_irq(vector)	(vector)
 #endif
 #endif
 
 
+
+union entry_union {
+	struct { u32 w1, w2; };
+	struct IO_APIC_route_entry entry;
+};
+
+static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
+{
+	union entry_union eu;
+	unsigned long flags;
+	spin_lock_irqsave(&ioapic_lock, flags);
+	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+	return eu.entry;
+}
+
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+	unsigned long flags;
+	union entry_union eu;
+	eu.entry = e;
+	spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
 /*
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
  * shared ISA-space IRQs, so we have to support them. We are super
@@ -200,13 +229,9 @@ static void unmask_IO_APIC_irq (unsigned int irq)
 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 {
 {
 	struct IO_APIC_route_entry entry;
 	struct IO_APIC_route_entry entry;
-	unsigned long flags;
 	
 	
 	/* Check delivery_mode to be sure we're not clearing an SMI pin */
 	/* Check delivery_mode to be sure we're not clearing an SMI pin */
-	spin_lock_irqsave(&ioapic_lock, flags);
-	*(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
-	*(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	entry = ioapic_read_entry(apic, pin);
 	if (entry.delivery_mode == dest_SMI)
 	if (entry.delivery_mode == dest_SMI)
 		return;
 		return;
 
 
@@ -215,10 +240,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 	 */
 	 */
 	memset(&entry, 0, sizeof(entry));
 	memset(&entry, 0, sizeof(entry));
 	entry.mask = 1;
 	entry.mask = 1;
-	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
-	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	ioapic_write_entry(apic, pin, entry);
 }
 }
 
 
 static void clear_IO_APIC (void)
 static void clear_IO_APIC (void)
@@ -1283,9 +1305,8 @@ static void __init setup_IO_APIC_irqs(void)
 			if (!apic && (irq < 16))
 			if (!apic && (irq < 16))
 				disable_8259A_irq(irq);
 				disable_8259A_irq(irq);
 		}
 		}
+		ioapic_write_entry(apic, pin, entry);
 		spin_lock_irqsave(&ioapic_lock, flags);
 		spin_lock_irqsave(&ioapic_lock, flags);
-		io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
-		io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
 		set_native_irq_info(irq, TARGET_CPUS);
 		set_native_irq_info(irq, TARGET_CPUS);
 		spin_unlock_irqrestore(&ioapic_lock, flags);
 		spin_unlock_irqrestore(&ioapic_lock, flags);
 	}
 	}
@@ -1301,7 +1322,6 @@ static void __init setup_IO_APIC_irqs(void)
 static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
 static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
 {
 {
 	struct IO_APIC_route_entry entry;
 	struct IO_APIC_route_entry entry;
-	unsigned long flags;
 
 
 	memset(&entry,0,sizeof(entry));
 	memset(&entry,0,sizeof(entry));
 
 
@@ -1331,10 +1351,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
 	/*
 	/*
 	 * Add it to the IO-APIC irq-routing table:
 	 * Add it to the IO-APIC irq-routing table:
 	 */
 	 */
-	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
-	io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	ioapic_write_entry(apic, pin, entry);
 
 
 	enable_8259A_irq(0);
 	enable_8259A_irq(0);
 }
 }
@@ -1444,10 +1461,7 @@ void __init print_IO_APIC(void)
 	for (i = 0; i <= reg_01.bits.entries; i++) {
 	for (i = 0; i <= reg_01.bits.entries; i++) {
 		struct IO_APIC_route_entry entry;
 		struct IO_APIC_route_entry entry;
 
 
-		spin_lock_irqsave(&ioapic_lock, flags);
-		*(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
-		*(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
+		entry = ioapic_read_entry(apic, i);
 
 
 		printk(KERN_DEBUG " %02x %03X %02X  ",
 		printk(KERN_DEBUG " %02x %03X %02X  ",
 			i,
 			i,
@@ -1666,10 +1680,7 @@ static void __init enable_IO_APIC(void)
 		/* See if any of the pins is in ExtINT mode */
 		/* See if any of the pins is in ExtINT mode */
 		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
 			struct IO_APIC_route_entry entry;
 			struct IO_APIC_route_entry entry;
-			spin_lock_irqsave(&ioapic_lock, flags);
-			*(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
-			*(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
-			spin_unlock_irqrestore(&ioapic_lock, flags);
+			entry = ioapic_read_entry(apic, pin);
 
 
 
 
 			/* If the interrupt line is enabled and in ExtInt mode
 			/* If the interrupt line is enabled and in ExtInt mode
@@ -1726,7 +1737,6 @@ void disable_IO_APIC(void)
 	 */
 	 */
 	if (ioapic_i8259.pin != -1) {
 	if (ioapic_i8259.pin != -1) {
 		struct IO_APIC_route_entry entry;
 		struct IO_APIC_route_entry entry;
-		unsigned long flags;
 
 
 		memset(&entry, 0, sizeof(entry));
 		memset(&entry, 0, sizeof(entry));
 		entry.mask            = 0; /* Enabled */
 		entry.mask            = 0; /* Enabled */
@@ -1743,12 +1753,7 @@ void disable_IO_APIC(void)
 		/*
 		/*
 		 * Add it to the IO-APIC irq-routing table:
 		 * Add it to the IO-APIC irq-routing table:
 		 */
 		 */
-		spin_lock_irqsave(&ioapic_lock, flags);
-		io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
-			*(((int *)&entry)+1));
-		io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
-			*(((int *)&entry)+0));
-		spin_unlock_irqrestore(&ioapic_lock, flags);
+		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
 	}
 	}
 	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
 }
 }
@@ -2213,17 +2218,13 @@ static inline void unlock_ExtINT_logic(void)
 	int apic, pin, i;
 	int apic, pin, i;
 	struct IO_APIC_route_entry entry0, entry1;
 	struct IO_APIC_route_entry entry0, entry1;
 	unsigned char save_control, save_freq_select;
 	unsigned char save_control, save_freq_select;
-	unsigned long flags;
 
 
 	pin  = find_isa_irq_pin(8, mp_INT);
 	pin  = find_isa_irq_pin(8, mp_INT);
 	apic = find_isa_irq_apic(8, mp_INT);
 	apic = find_isa_irq_apic(8, mp_INT);
 	if (pin == -1)
 	if (pin == -1)
 		return;
 		return;
 
 
-	spin_lock_irqsave(&ioapic_lock, flags);
-	*(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
-	*(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	entry0 = ioapic_read_entry(apic, pin);
 	clear_IO_APIC_pin(apic, pin);
 	clear_IO_APIC_pin(apic, pin);
 
 
 	memset(&entry1, 0, sizeof(entry1));
 	memset(&entry1, 0, sizeof(entry1));
@@ -2236,10 +2237,7 @@ static inline void unlock_ExtINT_logic(void)
 	entry1.trigger = 0;
 	entry1.trigger = 0;
 	entry1.vector = 0;
 	entry1.vector = 0;
 
 
-	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
-	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	ioapic_write_entry(apic, pin, entry1);
 
 
 	save_control = CMOS_READ(RTC_CONTROL);
 	save_control = CMOS_READ(RTC_CONTROL);
 	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
 	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
@@ -2258,10 +2256,7 @@ static inline void unlock_ExtINT_logic(void)
 	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
 	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
 	clear_IO_APIC_pin(apic, pin);
 	clear_IO_APIC_pin(apic, pin);
 
 
-	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
-	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	ioapic_write_entry(apic, pin, entry0);
 }
 }
 
 
 int timer_uses_ioapic_pin_0;
 int timer_uses_ioapic_pin_0;
@@ -2461,17 +2456,12 @@ static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
 {
 {
 	struct IO_APIC_route_entry *entry;
 	struct IO_APIC_route_entry *entry;
 	struct sysfs_ioapic_data *data;
 	struct sysfs_ioapic_data *data;
-	unsigned long flags;
 	int i;
 	int i;
 	
 	
 	data = container_of(dev, struct sysfs_ioapic_data, dev);
 	data = container_of(dev, struct sysfs_ioapic_data, dev);
 	entry = data->entry;
 	entry = data->entry;
-	spin_lock_irqsave(&ioapic_lock, flags);
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
-		*(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
-		*(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
-	}
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
+		entry[i] = ioapic_read_entry(dev->id, i);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -2493,11 +2483,9 @@ static int ioapic_resume(struct sys_device *dev)
 		reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
 		reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
 		io_apic_write(dev->id, 0, reg_00.raw);
 		io_apic_write(dev->id, 0, reg_00.raw);
 	}
 	}
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
-		io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
-		io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
-	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
+	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
+		ioapic_write_entry(dev->id, i, entry[i]);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -2694,9 +2682,8 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
 	if (!ioapic && (irq < 16))
 	if (!ioapic && (irq < 16))
 		disable_8259A_irq(irq);
 		disable_8259A_irq(irq);
 
 
+	ioapic_write_entry(ioapic, pin, entry);
 	spin_lock_irqsave(&ioapic_lock, flags);
 	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
-	io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
 	set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
 	set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
 
@@ -2704,3 +2691,25 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
 }
 }
 
 
 #endif /* CONFIG_ACPI */
 #endif /* CONFIG_ACPI */
+
+static int __init parse_disable_timer_pin_1(char *arg)
+{
+	disable_timer_pin_1 = 1;
+	return 0;
+}
+early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
+
+static int __init parse_enable_timer_pin_1(char *arg)
+{
+	disable_timer_pin_1 = -1;
+	return 0;
+}
+early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
+
+static int __init parse_noapic(char *arg)
+{
+	/* disable IO-APIC */
+	disable_ioapic_setup();
+	return 0;
+}
+early_param("noapic", parse_noapic);

+ 50 - 90
arch/i386/kernel/machine_kexec.c

@@ -9,6 +9,7 @@
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/kexec.h>
 #include <linux/kexec.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
+#include <linux/init.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/tlbflush.h>
@@ -20,70 +21,13 @@
 #include <asm/system.h>
 #include <asm/system.h>
 
 
 #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
 #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
-
-#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define L2_ATTR (_PAGE_PRESENT)
-
-#define LEVEL0_SIZE (1UL << 12UL)
-
-#ifndef CONFIG_X86_PAE
-#define LEVEL1_SIZE (1UL << 22UL)
-static u32 pgtable_level1[1024] PAGE_ALIGNED;
-
-static void identity_map_page(unsigned long address)
-{
-	unsigned long level1_index, level2_index;
-	u32 *pgtable_level2;
-
-	/* Find the current page table */
-	pgtable_level2 = __va(read_cr3());
-
-	/* Find the indexes of the physical address to identity map */
-	level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
-	level2_index = address / LEVEL1_SIZE;
-
-	/* Identity map the page table entry */
-	pgtable_level1[level1_index] = address | L0_ATTR;
-	pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
-
-	/* Flush the tlb so the new mapping takes effect.
-	 * Global tlb entries are not flushed but that is not an issue.
-	 */
-	load_cr3(pgtable_level2);
-}
-
-#else
-#define LEVEL1_SIZE (1UL << 21UL)
-#define LEVEL2_SIZE (1UL << 30UL)
-static u64 pgtable_level1[512] PAGE_ALIGNED;
-static u64 pgtable_level2[512] PAGE_ALIGNED;
-
-static void identity_map_page(unsigned long address)
-{
-	unsigned long level1_index, level2_index, level3_index;
-	u64 *pgtable_level3;
-
-	/* Find the current page table */
-	pgtable_level3 = __va(read_cr3());
-
-	/* Find the indexes of the physical address to identity map */
-	level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
-	level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
-	level3_index = address / LEVEL2_SIZE;
-
-	/* Identity map the page table entry */
-	pgtable_level1[level1_index] = address | L0_ATTR;
-	pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
-	set_64bit(&pgtable_level3[level3_index],
-					       __pa(pgtable_level2) | L2_ATTR);
-
-	/* Flush the tlb so the new mapping takes effect.
-	 * Global tlb entries are not flushed but that is not an issue.
-	 */
-	load_cr3(pgtable_level3);
-}
+static u32 kexec_pgd[1024] PAGE_ALIGNED;
+#ifdef CONFIG_X86_PAE
+static u32 kexec_pmd0[1024] PAGE_ALIGNED;
+static u32 kexec_pmd1[1024] PAGE_ALIGNED;
 #endif
 #endif
+static u32 kexec_pte0[1024] PAGE_ALIGNED;
+static u32 kexec_pte1[1024] PAGE_ALIGNED;
 
 
 static void set_idt(void *newidt, __u16 limit)
 static void set_idt(void *newidt, __u16 limit)
 {
 {
@@ -127,16 +71,6 @@ static void load_segments(void)
 #undef __STR
 #undef __STR
 }
 }
 
 
-typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
-					unsigned long indirection_page,
-					unsigned long reboot_code_buffer,
-					unsigned long start_address,
-					unsigned int has_pae) ATTRIB_NORET;
-
-extern const unsigned char relocate_new_kernel[];
-extern void relocate_new_kernel_end(void);
-extern const unsigned int relocate_new_kernel_size;
-
 /*
 /*
  * A architecture hook called to validate the
  * A architecture hook called to validate the
  * proposed image and prepare the control pages
  * proposed image and prepare the control pages
@@ -169,25 +103,29 @@ void machine_kexec_cleanup(struct kimage *image)
  */
  */
 NORET_TYPE void machine_kexec(struct kimage *image)
 NORET_TYPE void machine_kexec(struct kimage *image)
 {
 {
-	unsigned long page_list;
-	unsigned long reboot_code_buffer;
-
-	relocate_new_kernel_t rnk;
+	unsigned long page_list[PAGES_NR];
+	void *control_page;
 
 
 	/* Interrupts aren't acceptable while we reboot */
 	/* Interrupts aren't acceptable while we reboot */
 	local_irq_disable();
 	local_irq_disable();
 
 
-	/* Compute some offsets */
-	reboot_code_buffer = page_to_pfn(image->control_code_page)
-								<< PAGE_SHIFT;
-	page_list = image->head;
-
-	/* Set up an identity mapping for the reboot_code_buffer */
-	identity_map_page(reboot_code_buffer);
-
-	/* copy it out */
-	memcpy((void *)reboot_code_buffer, relocate_new_kernel,
-						relocate_new_kernel_size);
+	control_page = page_address(image->control_code_page);
+	memcpy(control_page, relocate_kernel, PAGE_SIZE);
+
+	page_list[PA_CONTROL_PAGE] = __pa(control_page);
+	page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
+	page_list[PA_PGD] = __pa(kexec_pgd);
+	page_list[VA_PGD] = (unsigned long)kexec_pgd;
+#ifdef CONFIG_X86_PAE
+	page_list[PA_PMD_0] = __pa(kexec_pmd0);
+	page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
+	page_list[PA_PMD_1] = __pa(kexec_pmd1);
+	page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
+#endif
+	page_list[PA_PTE_0] = __pa(kexec_pte0);
+	page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
+	page_list[PA_PTE_1] = __pa(kexec_pte1);
+	page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
 
 
 	/* The segment registers are funny things, they have both a
 	/* The segment registers are funny things, they have both a
 	 * visible and an invisible part.  Whenever the visible part is
 	 * visible and an invisible part.  Whenever the visible part is
@@ -206,6 +144,28 @@ NORET_TYPE void machine_kexec(struct kimage *image)
 	set_idt(phys_to_virt(0),0);
 	set_idt(phys_to_virt(0),0);
 
 
 	/* now call it */
 	/* now call it */
-	rnk = (relocate_new_kernel_t) reboot_code_buffer;
-	(*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae);
+	relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
+			image->start, cpu_has_pae);
+}
+
+/* crashkernel=size@addr specifies the location to reserve for
+ * a crash kernel.  By reserving this memory we guarantee
+ * that linux never sets it up as a DMA target.
+ * Useful for holding code to do something appropriate
+ * after a kernel panic.
+ */
+static int __init parse_crashkernel(char *arg)
+{
+	unsigned long size, base;
+	size = memparse(arg, &arg);
+	if (*arg == '@') {
+		base = memparse(arg+1, &arg);
+		/* FIXME: Do I want a sanity check
+		 * to validate the memory range?
+		 */
+		crashk_res.start = base;
+		crashk_res.end   = base + size - 1;
+	}
+	return 0;
 }
 }
+early_param("crashkernel", parse_crashkernel);

+ 5 - 3
arch/i386/kernel/mca.c

@@ -42,6 +42,7 @@
 #include <linux/errno.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/mca.h>
 #include <linux/mca.h>
+#include <linux/kprobes.h>
 #include <asm/system.h>
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/io.h>
 #include <linux/proc_fs.h>
 #include <linux/proc_fs.h>
@@ -414,7 +415,8 @@ subsys_initcall(mca_init);
 
 
 /*--------------------------------------------------------------------*/
 /*--------------------------------------------------------------------*/
 
 
-static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
+static __kprobes void
+mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
 {
 {
 	int slot = mca_dev->slot;
 	int slot = mca_dev->slot;
 
 
@@ -444,7 +446,7 @@ static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
 
 
 /*--------------------------------------------------------------------*/
 /*--------------------------------------------------------------------*/
 
 
-static int mca_handle_nmi_callback(struct device *dev, void *data)
+static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data)
 {
 {
 	struct mca_device *mca_dev = to_mca_device(dev);
 	struct mca_device *mca_dev = to_mca_device(dev);
 	unsigned char pos5;
 	unsigned char pos5;
@@ -462,7 +464,7 @@ static int mca_handle_nmi_callback(struct device *dev, void *data)
 	return 0;
 	return 0;
 }
 }
 
 
-void mca_handle_nmi(void)
+void __kprobes mca_handle_nmi(void)
 {
 {
 	/* First try - scan the various adapters and see if a specific
 	/* First try - scan the various adapters and see if a specific
 	 * adapter was responsible for the error.
 	 * adapter was responsible for the error.

+ 511 - 263
arch/i386/kernel/microcode.c

@@ -2,6 +2,7 @@
  *	Intel CPU Microcode Update Driver for Linux
  *	Intel CPU Microcode Update Driver for Linux
  *
  *
  *	Copyright (C) 2000-2004 Tigran Aivazian
  *	Copyright (C) 2000-2004 Tigran Aivazian
+ *		      2006	Shaohua Li <shaohua.li@intel.com>
  *
  *
  *	This driver allows to upgrade microcode on Intel processors
  *	This driver allows to upgrade microcode on Intel processors
  *	belonging to IA-32 family - PentiumPro, Pentium II, 
  *	belonging to IA-32 family - PentiumPro, Pentium II, 
@@ -82,6 +83,9 @@
 #include <linux/spinlock.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
 #include <linux/mutex.h>
+#include <linux/cpu.h>
+#include <linux/firmware.h>
+#include <linux/platform_device.h>
 
 
 #include <asm/msr.h>
 #include <asm/msr.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
@@ -91,9 +95,6 @@ MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
 MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>");
 MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>");
 MODULE_LICENSE("GPL");
 MODULE_LICENSE("GPL");
 
 
-static int verbose;
-module_param(verbose, int, 0644);
-
 #define MICROCODE_VERSION 	"1.14a"
 #define MICROCODE_VERSION 	"1.14a"
 
 
 #define DEFAULT_UCODE_DATASIZE 	(2000) 	  /* 2000 bytes */
 #define DEFAULT_UCODE_DATASIZE 	(2000) 	  /* 2000 bytes */
@@ -120,55 +121,40 @@ static DEFINE_SPINLOCK(microcode_update_lock);
 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
 /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
 static DEFINE_MUTEX(microcode_mutex);
 static DEFINE_MUTEX(microcode_mutex);
 
 
-static void __user *user_buffer;	/* user area microcode data buffer */
-static unsigned int user_buffer_size;	/* it's size */
-
-typedef enum mc_error_code {
-	MC_SUCCESS 	= 0,
-	MC_IGNORED 	= 1,
-	MC_NOTFOUND 	= 2,
-	MC_MARKED 	= 3,
-	MC_ALLOCATED 	= 4,
-} mc_error_code_t;
-
 static struct ucode_cpu_info {
 static struct ucode_cpu_info {
+	int valid;
 	unsigned int sig;
 	unsigned int sig;
-	unsigned int pf, orig_pf;
+	unsigned int pf;
 	unsigned int rev;
 	unsigned int rev;
-	unsigned int cksum;
-	mc_error_code_t err;
 	microcode_t *mc;
 	microcode_t *mc;
 } ucode_cpu_info[NR_CPUS];
 } ucode_cpu_info[NR_CPUS];
-				
-static int microcode_open (struct inode *unused1, struct file *unused2)
-{
-	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
-}
 
 
-static void collect_cpu_info (void *unused)
+static void collect_cpu_info(int cpu_num)
 {
 {
-	int cpu_num = smp_processor_id();
 	struct cpuinfo_x86 *c = cpu_data + cpu_num;
 	struct cpuinfo_x86 *c = cpu_data + cpu_num;
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
 	unsigned int val[2];
 	unsigned int val[2];
 
 
-	uci->sig = uci->pf = uci->rev = uci->cksum = 0;
-	uci->err = MC_NOTFOUND; 
+	/* We should bind the task to the CPU */
+	BUG_ON(raw_smp_processor_id() != cpu_num);
+	uci->pf = uci->rev = 0;
 	uci->mc = NULL;
 	uci->mc = NULL;
+	uci->valid = 1;
 
 
 	if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
 	if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
 	    	cpu_has(c, X86_FEATURE_IA64)) {
 	    	cpu_has(c, X86_FEATURE_IA64)) {
-		printk(KERN_ERR "microcode: CPU%d not a capable Intel processor\n", cpu_num);
+		printk(KERN_ERR "microcode: CPU%d not a capable Intel "
+			"processor\n", cpu_num);
+		uci->valid = 0;
 		return;
 		return;
-	} else {
-		uci->sig = cpuid_eax(0x00000001);
+	}
 
 
-		if ((c->x86_model >= 5) || (c->x86 > 6)) {
-			/* get processor flags from MSR 0x17 */
-			rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
-			uci->pf = 1 << ((val[1] >> 18) & 7);
-		}
-		uci->orig_pf = uci->pf;
+	uci->sig = cpuid_eax(0x00000001);
+
+	if ((c->x86_model >= 5) || (c->x86 > 6)) {
+		/* get processor flags from MSR 0x17 */
+		rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+		uci->pf = 1 << ((val[1] >> 18) & 7);
 	}
 	}
 
 
 	wrmsr(MSR_IA32_UCODE_REV, 0, 0);
 	wrmsr(MSR_IA32_UCODE_REV, 0, 0);
@@ -180,218 +166,159 @@ static void collect_cpu_info (void *unused)
 			uci->sig, uci->pf, uci->rev);
 			uci->sig, uci->pf, uci->rev);
 }
 }
 
 
-static inline void mark_microcode_update (int cpu_num, microcode_header_t *mc_header, int sig, int pf, int cksum)
+static inline int microcode_update_match(int cpu_num,
+	microcode_header_t *mc_header, int sig, int pf)
 {
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
 
 
-	pr_debug("Microcode Found.\n");
-	pr_debug("   Header Revision 0x%x\n", mc_header->hdrver);
-	pr_debug("   Loader Revision 0x%x\n", mc_header->ldrver);
-	pr_debug("   Revision 0x%x \n", mc_header->rev);
-	pr_debug("   Date %x/%x/%x\n",
-		((mc_header->date >> 24 ) & 0xff),
-		((mc_header->date >> 16 ) & 0xff),
-		(mc_header->date & 0xFFFF));
-	pr_debug("   Signature 0x%x\n", sig);
-	pr_debug("   Type 0x%x Family 0x%x Model 0x%x Stepping 0x%x\n",
-		((sig >> 12) & 0x3),
-		((sig >> 8) & 0xf),
-		((sig >> 4) & 0xf),
-		((sig & 0xf)));
-	pr_debug("   Processor Flags 0x%x\n", pf);
-	pr_debug("   Checksum 0x%x\n", cksum);
-
-	if (mc_header->rev < uci->rev) {
-		if (uci->err == MC_NOTFOUND) {
-			uci->err = MC_IGNORED;
-			uci->cksum = mc_header->rev;
-		} else if (uci->err == MC_IGNORED && uci->cksum < mc_header->rev)
-			uci->cksum = mc_header->rev;
-	} else if (mc_header->rev == uci->rev) {
-		if (uci->err < MC_MARKED) {
-			/* notify the caller of success on this cpu */
-			uci->err = MC_SUCCESS;
-		}
-	} else if (uci->err != MC_ALLOCATED || mc_header->rev > uci->mc->hdr.rev) {
-		pr_debug("microcode: CPU%d found a matching microcode update with "
-			" revision 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev);
-		uci->cksum = cksum;
-		uci->pf = pf; /* keep the original mc pf for cksum calculation */
-		uci->err = MC_MARKED; /* found the match */
-		for_each_online_cpu(cpu_num) {
-			if (ucode_cpu_info + cpu_num != uci
-			    && ucode_cpu_info[cpu_num].mc == uci->mc) {
-				uci->mc = NULL;
-				break;
-			}
-		}
-		if (uci->mc != NULL) {
-			vfree(uci->mc);
-			uci->mc = NULL;
-		}
-	}
-	return;
+	if (!sigmatch(sig, uci->sig, pf, uci->pf)
+		|| mc_header->rev <= uci->rev)
+		return 0;
+	return 1;
 }
 }
 
 
-static int find_matching_ucodes (void) 
+static int microcode_sanity_check(void *mc)
 {
 {
-	int cursor = 0;
-	int error = 0;
-
-	while (cursor + MC_HEADER_SIZE < user_buffer_size) {
-		microcode_header_t mc_header;
-		void *newmc = NULL;
-		int i, sum, cpu_num, allocated_flag, total_size, data_size, ext_table_size;
+	microcode_header_t *mc_header = mc;
+	struct extended_sigtable *ext_header = NULL;
+	struct extended_signature *ext_sig;
+	unsigned long total_size, data_size, ext_table_size;
+	int sum, orig_sum, ext_sigcount = 0, i;
+
+	total_size = get_totalsize(mc_header);
+	data_size = get_datasize(mc_header);
+	if (data_size + MC_HEADER_SIZE > total_size) {
+		printk(KERN_ERR "microcode: error! "
+			"Bad data size in microcode data file\n");
+		return -EINVAL;
+	}
 
 
-		if (copy_from_user(&mc_header, user_buffer + cursor, MC_HEADER_SIZE)) {
-			printk(KERN_ERR "microcode: error! Can not read user data\n");
-			error = -EFAULT;
-			goto out;
+	if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
+		printk(KERN_ERR "microcode: error! "
+			"Unknown microcode update format\n");
+		return -EINVAL;
+	}
+	ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
+	if (ext_table_size) {
+		if ((ext_table_size < EXT_HEADER_SIZE)
+		 || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
+			printk(KERN_ERR "microcode: error! "
+				"Small exttable size in microcode data file\n");
+			return -EINVAL;
 		}
 		}
-
-		total_size = get_totalsize(&mc_header);
-		if ((cursor + total_size > user_buffer_size) || (total_size < DEFAULT_UCODE_TOTALSIZE)) {
-			printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
-			error = -EINVAL;
-			goto out;
+		ext_header = mc + MC_HEADER_SIZE + data_size;
+		if (ext_table_size != exttable_size(ext_header)) {
+			printk(KERN_ERR "microcode: error! "
+				"Bad exttable size in microcode data file\n");
+			return -EFAULT;
 		}
 		}
+		ext_sigcount = ext_header->count;
+	}
 
 
-		data_size = get_datasize(&mc_header);
-		if ((data_size + MC_HEADER_SIZE > total_size) || (data_size < DEFAULT_UCODE_DATASIZE)) {
-			printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
-			error = -EINVAL;
-			goto out;
+	/* check extended table checksum */
+	if (ext_table_size) {
+		int ext_table_sum = 0;
+		int *ext_tablep = (int *)ext_header;
+
+		i = ext_table_size / DWSIZE;
+		while (i--)
+			ext_table_sum += ext_tablep[i];
+		if (ext_table_sum) {
+			printk(KERN_WARNING "microcode: aborting, "
+				"bad extended signature table checksum\n");
+			return -EINVAL;
 		}
 		}
+	}
 
 
-		if (mc_header.ldrver != 1 || mc_header.hdrver != 1) {
-			printk(KERN_ERR "microcode: error! Unknown microcode update format\n");
-			error = -EINVAL;
-			goto out;
+	/* calculate the checksum */
+	orig_sum = 0;
+	i = (MC_HEADER_SIZE + data_size) / DWSIZE;
+	while (i--)
+		orig_sum += ((int *)mc)[i];
+	if (orig_sum) {
+		printk(KERN_ERR "microcode: aborting, bad checksum\n");
+		return -EINVAL;
+	}
+	if (!ext_table_size)
+		return 0;
+	/* check extended signature checksum */
+	for (i = 0; i < ext_sigcount; i++) {
+		ext_sig = (struct extended_signature *)((void *)ext_header
+			+ EXT_HEADER_SIZE + EXT_SIGNATURE_SIZE * i);
+		sum = orig_sum
+			- (mc_header->sig + mc_header->pf + mc_header->cksum)
+			+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
+		if (sum) {
+			printk(KERN_ERR "microcode: aborting, bad checksum\n");
+			return -EINVAL;
 		}
 		}
+	}
+	return 0;
+}
 
 
-		for_each_online_cpu(cpu_num) {
-			struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
-
-			if (sigmatch(mc_header.sig, uci->sig, mc_header.pf, uci->orig_pf))
-				mark_microcode_update(cpu_num, &mc_header, mc_header.sig, mc_header.pf, mc_header.cksum);
-		}
+/*
+ * return 0 - no update found
+ * return 1 - found update
+ * return < 0 - error
+ */
+static int get_maching_microcode(void *mc, int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	microcode_header_t *mc_header = mc;
+	struct extended_sigtable *ext_header;
+	unsigned long total_size = get_totalsize(mc_header);
+	int ext_sigcount, i;
+	struct extended_signature *ext_sig;
+	void *new_mc;
+
+	if (microcode_update_match(cpu, mc_header,
+			mc_header->sig, mc_header->pf))
+		goto find;
+
+	if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
+		return 0;
+
+	ext_header = (struct extended_sigtable *)(mc +
+			get_datasize(mc_header) + MC_HEADER_SIZE);
+	ext_sigcount = ext_header->count;
+	ext_sig = (struct extended_signature *)((void *)ext_header
+			+ EXT_HEADER_SIZE);
+	for (i = 0; i < ext_sigcount; i++) {
+		if (microcode_update_match(cpu, mc_header,
+				ext_sig->sig, ext_sig->pf))
+			goto find;
+		ext_sig++;
+	}
+	return 0;
+find:
+	pr_debug("microcode: CPU %d found a matching microcode update with"
+		" version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev);
+	new_mc = vmalloc(total_size);
+	if (!new_mc) {
+		printk(KERN_ERR "microcode: error! Can not allocate memory\n");
+		return -ENOMEM;
+	}
 
 
-		ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
-		if (ext_table_size) {
-			struct extended_sigtable ext_header;
-			struct extended_signature ext_sig;
-			int ext_sigcount;
+	/* free previous update file */
+	vfree(uci->mc);
 
 
-			if ((ext_table_size < EXT_HEADER_SIZE) 
-					|| ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
-				printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
-				error = -EINVAL;
-				goto out;
-			}
-			if (copy_from_user(&ext_header, user_buffer + cursor 
-					+ MC_HEADER_SIZE + data_size, EXT_HEADER_SIZE)) {
-				printk(KERN_ERR "microcode: error! Can not read user data\n");
-				error = -EFAULT;
-				goto out;
-			}
-			if (ext_table_size != exttable_size(&ext_header)) {
-				printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
-				error = -EFAULT;
-				goto out;
-			}
-
-			ext_sigcount = ext_header.count;
-			
-			for (i = 0; i < ext_sigcount; i++) {
-				if (copy_from_user(&ext_sig, user_buffer + cursor + MC_HEADER_SIZE + data_size + EXT_HEADER_SIZE 
-						+ EXT_SIGNATURE_SIZE * i, EXT_SIGNATURE_SIZE)) {
-					printk(KERN_ERR "microcode: error! Can not read user data\n");
-					error = -EFAULT;
-					goto out;
-				}
-				for_each_online_cpu(cpu_num) {
-					struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
-
-					if (sigmatch(ext_sig.sig, uci->sig, ext_sig.pf, uci->orig_pf)) {
-						mark_microcode_update(cpu_num, &mc_header, ext_sig.sig, ext_sig.pf, ext_sig.cksum);
-					}
-				}
-			}
-		}
-		/* now check if any cpu has matched */
-		allocated_flag = 0;
-		sum = 0;
-		for_each_online_cpu(cpu_num) {
-			if (ucode_cpu_info[cpu_num].err == MC_MARKED) { 
-				struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
-				if (!allocated_flag) {
-					allocated_flag = 1;
-					newmc = vmalloc(total_size);
-					if (!newmc) {
-						printk(KERN_ERR "microcode: error! Can not allocate memory\n");
-						error = -ENOMEM;
-						goto out;
-					}
-					if (copy_from_user(newmc + MC_HEADER_SIZE, 
-								user_buffer + cursor + MC_HEADER_SIZE, 
-								total_size - MC_HEADER_SIZE)) {
-						printk(KERN_ERR "microcode: error! Can not read user data\n");
-						vfree(newmc);
-						error = -EFAULT;
-						goto out;
-					}
-					memcpy(newmc, &mc_header, MC_HEADER_SIZE);
-					/* check extended table checksum */
-					if (ext_table_size) {
-						int ext_table_sum = 0;
-						int * ext_tablep = (((void *) newmc) + MC_HEADER_SIZE + data_size);
-						i = ext_table_size / DWSIZE;
-						while (i--) ext_table_sum += ext_tablep[i];
-						if (ext_table_sum) {
-							printk(KERN_WARNING "microcode: aborting, bad extended signature table checksum\n");
-							vfree(newmc);
-							error = -EINVAL;
-							goto out;
-						}
-					}
-
-					/* calculate the checksum */
-					i = (MC_HEADER_SIZE + data_size) / DWSIZE;
-					while (i--) sum += ((int *)newmc)[i];
-					sum -= (mc_header.sig + mc_header.pf + mc_header.cksum);
-				}
-				ucode_cpu_info[cpu_num].mc = newmc;
-				ucode_cpu_info[cpu_num].err = MC_ALLOCATED; /* mc updated */
-				if (sum + uci->sig + uci->pf + uci->cksum != 0) {
-					printk(KERN_ERR "microcode: CPU%d aborting, bad checksum\n", cpu_num);
-					error = -EINVAL;
-					goto out;
-				}
-			}
-		}
-		cursor += total_size; /* goto the next update patch */
-	} /* end of while */
-out:
-	return error;
+	memcpy(new_mc, mc, total_size);
+	uci->mc = new_mc;
+	return 1;
 }
 }
 
 
-static void do_update_one (void * unused)
+static void apply_microcode(int cpu)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 	unsigned int val[2];
 	unsigned int val[2];
-	int cpu_num = smp_processor_id();
+	int cpu_num = raw_smp_processor_id();
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
 
 
-	if (uci->mc == NULL) {
-		if (verbose) {
-			if (uci->err == MC_SUCCESS)
-				printk(KERN_INFO "microcode: CPU%d already at revision 0x%x\n",
-					cpu_num, uci->rev);
-			else
-				printk(KERN_INFO "microcode: No new microcode data for CPU%d\n", cpu_num);
-		}
+	/* We should bind the task to the CPU */
+	BUG_ON(cpu_num != cpu);
+
+	if (uci->mc == NULL)
 		return;
 		return;
-	}
 
 
 	/* serialize access to the physical write to MSR 0x79 */
 	/* serialize access to the physical write to MSR 0x79 */
 	spin_lock_irqsave(&microcode_update_lock, flags);          
 	spin_lock_irqsave(&microcode_update_lock, flags);          
@@ -408,68 +335,107 @@ static void do_update_one (void * unused)
 	/* get the current revision from MSR 0x8B */
 	/* get the current revision from MSR 0x8B */
 	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
 	rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
 
 
-	/* notify the caller of success on this cpu */
-	uci->err = MC_SUCCESS;
 	spin_unlock_irqrestore(&microcode_update_lock, flags);
 	spin_unlock_irqrestore(&microcode_update_lock, flags);
-	printk(KERN_INFO "microcode: CPU%d updated from revision "
+	if (val[1] != uci->mc->hdr.rev) {
+		printk(KERN_ERR "microcode: CPU%d updated from revision "
+			"0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]);
+		return;
+	}
+	pr_debug("microcode: CPU%d updated from revision "
 	       "0x%x to 0x%x, date = %08x \n", 
 	       "0x%x to 0x%x, date = %08x \n", 
 	       cpu_num, uci->rev, val[1], uci->mc->hdr.date);
 	       cpu_num, uci->rev, val[1], uci->mc->hdr.date);
-	return;
+	uci->rev = val[1];
 }
 }
 
 
-static int do_microcode_update (void)
-{
-	int i, error;
+#ifdef CONFIG_MICROCODE_OLD_INTERFACE
+static void __user *user_buffer;	/* user area microcode data buffer */
+static unsigned int user_buffer_size;	/* it's size */
 
 
-	if (on_each_cpu(collect_cpu_info, NULL, 1, 1) != 0) {
-		printk(KERN_ERR "microcode: Error! Could not run on all processors\n");
-		error = -EIO;
-		goto out;
+static long get_next_ucode(void **mc, long offset)
+{
+	microcode_header_t mc_header;
+	unsigned long total_size;
+
+	/* No more data */
+	if (offset >= user_buffer_size)
+		return 0;
+	if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) {
+		printk(KERN_ERR "microcode: error! Can not read user data\n");
+		return -EFAULT;
 	}
 	}
-
-	if ((error = find_matching_ucodes())) {
-		printk(KERN_ERR "microcode: Error in the microcode data\n");
-		goto out_free;
+	total_size = get_totalsize(&mc_header);
+	if (offset + total_size > user_buffer_size) {
+		printk(KERN_ERR "microcode: error! Bad total size in microcode "
+				"data file\n");
+		return -EINVAL;
 	}
 	}
-
-	if (on_each_cpu(do_update_one, NULL, 1, 1) != 0) {
-		printk(KERN_ERR "microcode: Error! Could not run on all processors\n");
-		error = -EIO;
+	*mc = vmalloc(total_size);
+	if (!*mc)
+		return -ENOMEM;
+	if (copy_from_user(*mc, user_buffer + offset, total_size)) {
+		printk(KERN_ERR "microcode: error! Can not read user data\n");
+		vfree(*mc);
+		return -EFAULT;
 	}
 	}
+	return offset + total_size;
+}
+
+static int do_microcode_update (void)
+{
+	long cursor = 0;
+	int error = 0;
+	void *new_mc;
+	int cpu;
+	cpumask_t old;
+
+	old = current->cpus_allowed;
 
 
-out_free:
-	for_each_online_cpu(i) {
-		if (ucode_cpu_info[i].mc) {
-			int j;
-			void *tmp = ucode_cpu_info[i].mc;
-			vfree(tmp);
-			for_each_online_cpu(j) {
-				if (ucode_cpu_info[j].mc == tmp)
-					ucode_cpu_info[j].mc = NULL;
-			}
+	while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) {
+		error = microcode_sanity_check(new_mc);
+		if (error)
+			goto out;
+		/*
+		 * It's possible the data file has multiple matching ucode,
+		 * lets keep searching till the latest version
+		 */
+		for_each_online_cpu(cpu) {
+			struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+			if (!uci->valid)
+				continue;
+			set_cpus_allowed(current, cpumask_of_cpu(cpu));
+			error = get_maching_microcode(new_mc, cpu);
+			if (error < 0)
+				goto out;
+			if (error == 1)
+				apply_microcode(cpu);
 		}
 		}
-		if (ucode_cpu_info[i].err == MC_IGNORED && verbose)
-			printk(KERN_WARNING "microcode: CPU%d not 'upgrading' to earlier revision"
-			       " 0x%x (current=0x%x)\n", i, ucode_cpu_info[i].cksum, ucode_cpu_info[i].rev);
+		vfree(new_mc);
 	}
 	}
 out:
 out:
+	if (cursor > 0)
+		vfree(new_mc);
+	if (cursor < 0)
+		error = cursor;
+	set_cpus_allowed(current, old);
 	return error;
 	return error;
 }
 }
 
 
+static int microcode_open (struct inode *unused1, struct file *unused2)
+{
+	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+}
+
 static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
 static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
 {
 {
 	ssize_t ret;
 	ssize_t ret;
 
 
-	if (len < DEFAULT_UCODE_TOTALSIZE) {
-		printk(KERN_ERR "microcode: not enough data\n"); 
-		return -EINVAL;
-	}
-
 	if ((len >> PAGE_SHIFT) > num_physpages) {
 	if ((len >> PAGE_SHIFT) > num_physpages) {
 		printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages);
 		printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
+	lock_cpu_hotplug();
 	mutex_lock(&microcode_mutex);
 	mutex_lock(&microcode_mutex);
 
 
 	user_buffer = (void __user *) buf;
 	user_buffer = (void __user *) buf;
@@ -480,6 +446,7 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_
 		ret = (ssize_t)len;
 		ret = (ssize_t)len;
 
 
 	mutex_unlock(&microcode_mutex);
 	mutex_unlock(&microcode_mutex);
+	unlock_cpu_hotplug();
 
 
 	return ret;
 	return ret;
 }
 }
@@ -496,7 +463,7 @@ static struct miscdevice microcode_dev = {
 	.fops		= &microcode_fops,
 	.fops		= &microcode_fops,
 };
 };
 
 
-static int __init microcode_init (void)
+static int __init microcode_dev_init (void)
 {
 {
 	int error;
 	int error;
 
 
@@ -508,6 +475,280 @@ static int __init microcode_init (void)
 		return error;
 		return error;
 	}
 	}
 
 
+	return 0;
+}
+
+static void __exit microcode_dev_exit (void)
+{
+	misc_deregister(&microcode_dev);
+}
+
+MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
+#else
+#define microcode_dev_init() 0
+#define microcode_dev_exit() do { } while(0)
+#endif
+
+static long get_next_ucode_from_buffer(void **mc, void *buf,
+	unsigned long size, long offset)
+{
+	microcode_header_t *mc_header;
+	unsigned long total_size;
+
+	/* No more data */
+	if (offset >= size)
+		return 0;
+	mc_header = (microcode_header_t *)(buf + offset);
+	total_size = get_totalsize(mc_header);
+
+	if (offset + total_size > size) {
+		printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
+		return -EINVAL;
+	}
+
+	*mc = vmalloc(total_size);
+	if (!*mc) {
+		printk(KERN_ERR "microcode: error! Can not allocate memory\n");
+		return -ENOMEM;
+	}
+	memcpy(*mc, buf + offset, total_size);
+	return offset + total_size;
+}
+
+/* fake device for request_firmware */
+static struct platform_device *microcode_pdev;
+
+static int cpu_request_microcode(int cpu)
+{
+	char name[30];
+	struct cpuinfo_x86 *c = cpu_data + cpu;
+	const struct firmware *firmware;
+	void *buf;
+	unsigned long size;
+	long offset = 0;
+	int error;
+	void *mc;
+
+	/* We should bind the task to the CPU */
+	BUG_ON(cpu != raw_smp_processor_id());
+	sprintf(name,"intel-ucode/%02x-%02x-%02x",
+		c->x86, c->x86_model, c->x86_mask);
+	error = request_firmware(&firmware, name, &microcode_pdev->dev);
+	if (error) {
+		pr_debug("ucode data file %s load failed\n", name);
+		return error;
+	}
+	buf = (void *)firmware->data;
+	size = firmware->size;
+	while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset))
+			> 0) {
+		error = microcode_sanity_check(mc);
+		if (error)
+			break;
+		error = get_maching_microcode(mc, cpu);
+		if (error < 0)
+			break;
+		/*
+		 * It's possible the data file has multiple matching ucode,
+		 * lets keep searching till the latest version
+		 */
+		if (error == 1) {
+			apply_microcode(cpu);
+			error = 0;
+		}
+		vfree(mc);
+	}
+	if (offset > 0)
+		vfree(mc);
+	if (offset < 0)
+		error = offset;
+	release_firmware(firmware);
+
+	return error;
+}
+
+static void microcode_init_cpu(int cpu)
+{
+	cpumask_t old;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	old = current->cpus_allowed;
+
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	mutex_lock(&microcode_mutex);
+	collect_cpu_info(cpu);
+	if (uci->valid)
+		cpu_request_microcode(cpu);
+	mutex_unlock(&microcode_mutex);
+	set_cpus_allowed(current, old);
+}
+
+static void microcode_fini_cpu(int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	mutex_lock(&microcode_mutex);
+	uci->valid = 0;
+	vfree(uci->mc);
+	uci->mc = NULL;
+	mutex_unlock(&microcode_mutex);
+}
+
+static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
+	char *end;
+	unsigned long val = simple_strtoul(buf, &end, 0);
+	int err = 0;
+	int cpu = dev->id;
+
+	if (end == buf)
+		return -EINVAL;
+	if (val == 1) {
+		cpumask_t old;
+
+		old = current->cpus_allowed;
+
+		lock_cpu_hotplug();
+		set_cpus_allowed(current, cpumask_of_cpu(cpu));
+
+		mutex_lock(&microcode_mutex);
+		if (uci->valid)
+			err = cpu_request_microcode(cpu);
+		mutex_unlock(&microcode_mutex);
+		unlock_cpu_hotplug();
+		set_cpus_allowed(current, old);
+	}
+	if (err)
+		return err;
+	return sz;
+}
+
+static ssize_t version_show(struct sys_device *dev, char *buf)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
+
+	return sprintf(buf, "0x%x\n", uci->rev);
+}
+
+static ssize_t pf_show(struct sys_device *dev, char *buf)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
+
+	return sprintf(buf, "0x%x\n", uci->pf);
+}
+
+static SYSDEV_ATTR(reload, 0200, NULL, reload_store);
+static SYSDEV_ATTR(version, 0400, version_show, NULL);
+static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL);
+
+static struct attribute *mc_default_attrs[] = {
+	&attr_reload.attr,
+	&attr_version.attr,
+	&attr_processor_flags.attr,
+	NULL
+};
+
+static struct attribute_group mc_attr_group = {
+	.attrs = mc_default_attrs,
+	.name = "microcode",
+};
+
+static int mc_sysdev_add(struct sys_device *sys_dev)
+{
+	int cpu = sys_dev->id;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+	if (!cpu_online(cpu))
+		return 0;
+	pr_debug("Microcode:CPU %d added\n", cpu);
+	memset(uci, 0, sizeof(*uci));
+	sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
+
+	microcode_init_cpu(cpu);
+	return 0;
+}
+
+static int mc_sysdev_remove(struct sys_device *sys_dev)
+{
+	int cpu = sys_dev->id;
+
+	if (!cpu_online(cpu))
+		return 0;
+	pr_debug("Microcode:CPU %d removed\n", cpu);
+	microcode_fini_cpu(cpu);
+	sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
+	return 0;
+}
+
+static int mc_sysdev_resume(struct sys_device *dev)
+{
+	int cpu = dev->id;
+
+	if (!cpu_online(cpu))
+		return 0;
+	pr_debug("Microcode:CPU %d resumed\n", cpu);
+	/* only CPU 0 will apply ucode here */
+	apply_microcode(0);
+	return 0;
+}
+
+static struct sysdev_driver mc_sysdev_driver = {
+	.add = mc_sysdev_add,
+	.remove = mc_sysdev_remove,
+	.resume = mc_sysdev_resume,
+};
+
+#ifdef CONFIG_HOTPLUG_CPU
+static __cpuinit int
+mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	struct sys_device *sys_dev;
+
+	sys_dev = get_cpu_sysdev(cpu);
+	switch (action) {
+	case CPU_ONLINE:
+	case CPU_DOWN_FAILED:
+		mc_sysdev_add(sys_dev);
+		break;
+	case CPU_DOWN_PREPARE:
+		mc_sysdev_remove(sys_dev);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block mc_cpu_notifier = {
+	.notifier_call = mc_cpu_callback,
+};
+#endif
+
+static int __init microcode_init (void)
+{
+	int error;
+
+	error = microcode_dev_init();
+	if (error)
+		return error;
+	microcode_pdev = platform_device_register_simple("microcode", -1,
+							 NULL, 0);
+	if (IS_ERR(microcode_pdev)) {
+		microcode_dev_exit();
+		return PTR_ERR(microcode_pdev);
+	}
+
+	lock_cpu_hotplug();
+	error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
+	unlock_cpu_hotplug();
+	if (error) {
+		microcode_dev_exit();
+		platform_device_unregister(microcode_pdev);
+		return error;
+	}
+
+	register_hotcpu_notifier(&mc_cpu_notifier);
+
 	printk(KERN_INFO 
 	printk(KERN_INFO 
 		"IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
 		"IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
 	return 0;
 	return 0;
@@ -515,9 +756,16 @@ static int __init microcode_init (void)
 
 
 static void __exit microcode_exit (void)
 static void __exit microcode_exit (void)
 {
 {
-	misc_deregister(&microcode_dev);
+	microcode_dev_exit();
+
+	unregister_hotcpu_notifier(&mc_cpu_notifier);
+
+	lock_cpu_hotplug();
+	sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
+	unlock_cpu_hotplug();
+
+	platform_device_unregister(microcode_pdev);
 }
 }
 
 
 module_init(microcode_init)
 module_init(microcode_init)
 module_exit(microcode_exit)
 module_exit(microcode_exit)
-MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);

+ 21 - 49
arch/i386/kernel/mpparse.c

@@ -30,6 +30,7 @@
 #include <asm/io_apic.h>
 #include <asm/io_apic.h>
 
 
 #include <mach_apic.h>
 #include <mach_apic.h>
+#include <mach_apicdef.h>
 #include <mach_mpparse.h>
 #include <mach_mpparse.h>
 #include <bios_ebda.h>
 #include <bios_ebda.h>
 
 
@@ -68,7 +69,7 @@ unsigned int def_to_bigsmp = 0;
 /* Processor that is doing the boot up */
 /* Processor that is doing the boot up */
 unsigned int boot_cpu_physical_apicid = -1U;
 unsigned int boot_cpu_physical_apicid = -1U;
 /* Internal processor count */
 /* Internal processor count */
-static unsigned int __devinitdata num_processors;
+unsigned int __cpuinitdata num_processors;
 
 
 /* Bitmask of physically existing CPUs */
 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map;
 physid_mask_t phys_cpu_present_map;
@@ -228,12 +229,14 @@ static void __init MP_bus_info (struct mpc_config_bus *m)
 
 
 	mpc_oem_bus_info(m, str, translation_table[mpc_record]);
 	mpc_oem_bus_info(m, str, translation_table[mpc_record]);
 
 
+#if MAX_MP_BUSSES < 256
 	if (m->mpc_busid >= MAX_MP_BUSSES) {
 	if (m->mpc_busid >= MAX_MP_BUSSES) {
 		printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
 		printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
 			" is too large, max. supported is %d\n",
 			" is too large, max. supported is %d\n",
 			m->mpc_busid, str, MAX_MP_BUSSES - 1);
 			m->mpc_busid, str, MAX_MP_BUSSES - 1);
 		return;
 		return;
 	}
 	}
+#endif
 
 
 	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
 	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
@@ -293,19 +296,6 @@ static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
 			m->mpc_irqtype, m->mpc_irqflag & 3,
 			m->mpc_irqtype, m->mpc_irqflag & 3,
 			(m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
 			(m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
 			m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
 			m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
-	/*
-	 * Well it seems all SMP boards in existence
-	 * use ExtINT/LVT1 == LINT0 and
-	 * NMI/LVT2 == LINT1 - the following check
-	 * will show us if this assumptions is false.
-	 * Until then we do not have to add baggage.
-	 */
-	if ((m->mpc_irqtype == mp_ExtINT) &&
-		(m->mpc_destapiclint != 0))
-			BUG();
-	if ((m->mpc_irqtype == mp_NMI) &&
-		(m->mpc_destapiclint != 1))
-			BUG();
 }
 }
 
 
 #ifdef CONFIG_X86_NUMAQ
 #ifdef CONFIG_X86_NUMAQ
@@ -822,8 +812,7 @@ int es7000_plat;
 
 
 #ifdef CONFIG_ACPI
 #ifdef CONFIG_ACPI
 
 
-void __init mp_register_lapic_address (
-	u64			address)
+void __init mp_register_lapic_address(u64 address)
 {
 {
 	mp_lapic_addr = (unsigned long) address;
 	mp_lapic_addr = (unsigned long) address;
 
 
@@ -835,13 +824,10 @@ void __init mp_register_lapic_address (
 	Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
 	Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
 }
 }
 
 
-
-void __devinit mp_register_lapic (
-	u8			id, 
-	u8			enabled)
+void __devinit mp_register_lapic (u8 id, u8 enabled)
 {
 {
 	struct mpc_config_processor processor;
 	struct mpc_config_processor processor;
-	int			boot_cpu = 0;
+	int boot_cpu = 0;
 	
 	
 	if (MAX_APICS - id <= 0) {
 	if (MAX_APICS - id <= 0) {
 		printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
 		printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
@@ -878,11 +864,9 @@ static struct mp_ioapic_routing {
 	u32			pin_programmed[4];
 	u32			pin_programmed[4];
 } mp_ioapic_routing[MAX_IO_APICS];
 } mp_ioapic_routing[MAX_IO_APICS];
 
 
-
-static int mp_find_ioapic (
-	int			gsi)
+static int mp_find_ioapic (int gsi)
 {
 {
-	int			i = 0;
+	int i = 0;
 
 
 	/* Find the IOAPIC that manages this GSI. */
 	/* Find the IOAPIC that manages this GSI. */
 	for (i = 0; i < nr_ioapics; i++) {
 	for (i = 0; i < nr_ioapics; i++) {
@@ -895,15 +879,11 @@ static int mp_find_ioapic (
 
 
 	return -1;
 	return -1;
 }
 }
-	
 
 
-void __init mp_register_ioapic (
-	u8			id, 
-	u32			address,
-	u32			gsi_base)
+void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
 {
 {
-	int			idx = 0;
-	int			tmpid;
+	int idx = 0;
+	int tmpid;
 
 
 	if (nr_ioapics >= MAX_IO_APICS) {
 	if (nr_ioapics >= MAX_IO_APICS) {
 		printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
 		printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
@@ -949,16 +929,10 @@ void __init mp_register_ioapic (
 		mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
 		mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
 		mp_ioapic_routing[idx].gsi_base,
 		mp_ioapic_routing[idx].gsi_base,
 		mp_ioapic_routing[idx].gsi_end);
 		mp_ioapic_routing[idx].gsi_end);
-
-	return;
 }
 }
 
 
-
-void __init mp_override_legacy_irq (
-	u8			bus_irq,
-	u8			polarity, 
-	u8			trigger, 
-	u32			gsi)
+void __init
+mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
 {
 {
 	struct mpc_config_intsrc intsrc;
 	struct mpc_config_intsrc intsrc;
 	int			ioapic = -1;
 	int			ioapic = -1;
@@ -996,15 +970,13 @@ void __init mp_override_legacy_irq (
 	mp_irqs[mp_irq_entries] = intsrc;
 	mp_irqs[mp_irq_entries] = intsrc;
 	if (++mp_irq_entries == MAX_IRQ_SOURCES)
 	if (++mp_irq_entries == MAX_IRQ_SOURCES)
 		panic("Max # of irq sources exceeded!\n");
 		panic("Max # of irq sources exceeded!\n");
-
-	return;
 }
 }
 
 
 void __init mp_config_acpi_legacy_irqs (void)
 void __init mp_config_acpi_legacy_irqs (void)
 {
 {
 	struct mpc_config_intsrc intsrc;
 	struct mpc_config_intsrc intsrc;
-	int			i = 0;
-	int			ioapic = -1;
+	int i = 0;
+	int ioapic = -1;
 
 
 	/* 
 	/* 
 	 * Fabricate the legacy ISA bus (bus #31).
 	 * Fabricate the legacy ISA bus (bus #31).
@@ -1073,12 +1045,12 @@ void __init mp_config_acpi_legacy_irqs (void)
 
 
 #define MAX_GSI_NUM	4096
 #define MAX_GSI_NUM	4096
 
 
-int mp_register_gsi (u32 gsi, int triggering, int polarity)
+int mp_register_gsi(u32 gsi, int triggering, int polarity)
 {
 {
-	int			ioapic = -1;
-	int			ioapic_pin = 0;
-	int			idx, bit = 0;
-	static int		pci_irq = 16;
+	int ioapic = -1;
+	int ioapic_pin = 0;
+	int idx, bit = 0;
+	static int pci_irq = 16;
 	/*
 	/*
 	 * Mapping between Global System Interrups, which
 	 * Mapping between Global System Interrups, which
 	 * represent all possible interrupts, and IRQs
 	 * represent all possible interrupts, and IRQs

+ 651 - 289
arch/i386/kernel/nmi.c

@@ -21,83 +21,174 @@
 #include <linux/sysdev.h>
 #include <linux/sysdev.h>
 #include <linux/sysctl.h>
 #include <linux/sysctl.h>
 #include <linux/percpu.h>
 #include <linux/percpu.h>
+#include <linux/dmi.h>
+#include <linux/kprobes.h>
 
 
 #include <asm/smp.h>
 #include <asm/smp.h>
 #include <asm/nmi.h>
 #include <asm/nmi.h>
+#include <asm/kdebug.h>
 #include <asm/intel_arch_perfmon.h>
 #include <asm/intel_arch_perfmon.h>
 
 
 #include "mach_traps.h"
 #include "mach_traps.h"
 
 
-unsigned int nmi_watchdog = NMI_NONE;
-extern int unknown_nmi_panic;
-static unsigned int nmi_hz = HZ;
-static unsigned int nmi_perfctr_msr;	/* the MSR to reset in NMI handler */
-static unsigned int nmi_p4_cccr_val;
-extern void show_registers(struct pt_regs *regs);
+/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
+ * evtsel_nmi_owner tracks the ownership of the event selection
+ * - different performance counters/ event selection may be reserved for
+ *   different subsystems this reservation system just tries to coordinate
+ *   things a little
+ */
+static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
+static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
 
 
-/*
- * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
- * - it may be reserved by some other driver, or not
- * - when not reserved by some other driver, it may be used for
- *   the NMI watchdog, or not
- *
- * This is maintained separately from nmi_active because the NMI
- * watchdog may also be driven from the I/O APIC timer.
+/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
+ * offset from MSR_P4_BSU_ESCR0.  It will be the max for all platforms (for now)
  */
  */
-static DEFINE_SPINLOCK(lapic_nmi_owner_lock);
-static unsigned int lapic_nmi_owner;
-#define LAPIC_NMI_WATCHDOG	(1<<0)
-#define LAPIC_NMI_RESERVED	(1<<1)
+#define NMI_MAX_COUNTER_BITS 66
 
 
 /* nmi_active:
 /* nmi_active:
- * +1: the lapic NMI watchdog is active, but can be disabled
- *  0: the lapic NMI watchdog has not been set up, and cannot
+ * >0: the lapic NMI watchdog is active, but can be disabled
+ * <0: the lapic NMI watchdog has not been set up, and cannot
  *     be enabled
  *     be enabled
- * -1: the lapic NMI watchdog is disabled, but can be enabled
+ *  0: the lapic NMI watchdog is disabled, but can be enabled
  */
  */
-int nmi_active;
+atomic_t nmi_active = ATOMIC_INIT(0);		/* oprofile uses this */
 
 
-#define K7_EVNTSEL_ENABLE	(1 << 22)
-#define K7_EVNTSEL_INT		(1 << 20)
-#define K7_EVNTSEL_OS		(1 << 17)
-#define K7_EVNTSEL_USR		(1 << 16)
-#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
-#define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+unsigned int nmi_watchdog = NMI_DEFAULT;
+static unsigned int nmi_hz = HZ;
 
 
-#define P6_EVNTSEL0_ENABLE	(1 << 22)
-#define P6_EVNTSEL_INT		(1 << 20)
-#define P6_EVNTSEL_OS		(1 << 17)
-#define P6_EVNTSEL_USR		(1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
-#define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
+struct nmi_watchdog_ctlblk {
+	int enabled;
+	u64 check_bit;
+	unsigned int cccr_msr;
+	unsigned int perfctr_msr;  /* the MSR to reset in NMI handler */
+	unsigned int evntsel_msr;  /* the MSR to select the events to handle */
+};
+static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
 
 
-#define MSR_P4_MISC_ENABLE	0x1A0
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7)
-#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL	(1<<12)
-#define MSR_P4_PERFCTR0		0x300
-#define MSR_P4_CCCR0		0x360
-#define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
-#define P4_ESCR_OS		(1<<3)
-#define P4_ESCR_USR		(1<<2)
-#define P4_CCCR_OVF_PMI0	(1<<26)
-#define P4_CCCR_OVF_PMI1	(1<<27)
-#define P4_CCCR_THRESHOLD(N)	((N)<<20)
-#define P4_CCCR_COMPLEMENT	(1<<19)
-#define P4_CCCR_COMPARE		(1<<18)
-#define P4_CCCR_REQUIRED	(3<<16)
-#define P4_CCCR_ESCR_SELECT(N)	((N)<<13)
-#define P4_CCCR_ENABLE		(1<<12)
-/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
-   CRU_ESCR0 (with any non-null event selector) through a complemented
-   max threshold. [IA32-Vol3, Section 14.9.9] */
-#define MSR_P4_IQ_COUNTER0	0x30C
-#define P4_NMI_CRU_ESCR0	(P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
-#define P4_NMI_IQ_CCCR0	\
-	(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT|	\
-	 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
+/* local prototypes */
+static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
 
 
-#define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
-#define ARCH_PERFMON_NMI_EVENT_UMASK	ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+extern void show_registers(struct pt_regs *regs);
+extern int unknown_nmi_panic;
+
+/* converts an msr to an appropriate reservation bit */
+static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
+{
+	/* returns the bit offset of the performance counter register */
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		return (msr - MSR_K7_PERFCTR0);
+	case X86_VENDOR_INTEL:
+		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+			return (msr - MSR_ARCH_PERFMON_PERFCTR0);
+
+		switch (boot_cpu_data.x86) {
+		case 6:
+			return (msr - MSR_P6_PERFCTR0);
+		case 15:
+			return (msr - MSR_P4_BPU_PERFCTR0);
+		}
+	}
+	return 0;
+}
+
+/* converts an msr to an appropriate reservation bit */
+static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
+{
+	/* returns the bit offset of the event selection register */
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		return (msr - MSR_K7_EVNTSEL0);
+	case X86_VENDOR_INTEL:
+		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+			return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
+
+		switch (boot_cpu_data.x86) {
+		case 6:
+			return (msr - MSR_P6_EVNTSEL0);
+		case 15:
+			return (msr - MSR_P4_BSU_ESCR0);
+		}
+	}
+	return 0;
+}
+
+/* checks for a bit availability (hack for oprofile) */
+int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
+{
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
+}
+
+/* checks the an msr for availability */
+int avail_to_resrv_perfctr_nmi(unsigned int msr)
+{
+	unsigned int counter;
+
+	counter = nmi_perfctr_msr_to_bit(msr);
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
+}
+
+int reserve_perfctr_nmi(unsigned int msr)
+{
+	unsigned int counter;
+
+	counter = nmi_perfctr_msr_to_bit(msr);
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
+		return 1;
+	return 0;
+}
+
+void release_perfctr_nmi(unsigned int msr)
+{
+	unsigned int counter;
+
+	counter = nmi_perfctr_msr_to_bit(msr);
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
+}
+
+int reserve_evntsel_nmi(unsigned int msr)
+{
+	unsigned int counter;
+
+	counter = nmi_evntsel_msr_to_bit(msr);
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
+		return 1;
+	return 0;
+}
+
+void release_evntsel_nmi(unsigned int msr)
+{
+	unsigned int counter;
+
+	counter = nmi_evntsel_msr_to_bit(msr);
+	BUG_ON(counter > NMI_MAX_COUNTER_BITS);
+
+	clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
+}
+
+static __cpuinit inline int nmi_known_cpu(void)
+{
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
+	case X86_VENDOR_INTEL:
+		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
+			return 1;
+		else
+			return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
+	}
+	return 0;
+}
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
 /* The performance counters used by NMI_LOCAL_APIC don't trigger when
@@ -125,7 +216,18 @@ static int __init check_nmi_watchdog(void)
 	unsigned int *prev_nmi_count;
 	unsigned int *prev_nmi_count;
 	int cpu;
 	int cpu;
 
 
-	if (nmi_watchdog == NMI_NONE)
+	/* Enable NMI watchdog for newer systems.
+           Actually it should be safe for most systems before 2004 too except
+	   for some IBM systems that corrupt registers when NMI happens
+	   during SMM. Unfortunately we don't have more exact information
+ 	   on these and use this coarse check. */
+	if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004)
+		nmi_watchdog = NMI_LOCAL_APIC;
+
+	if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
+		return 0;
+
+	if (!atomic_read(&nmi_active))
 		return 0;
 		return 0;
 
 
 	prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
 	prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
@@ -149,25 +251,45 @@ static int __init check_nmi_watchdog(void)
 		if (!cpu_isset(cpu, cpu_callin_map))
 		if (!cpu_isset(cpu, cpu_callin_map))
 			continue;
 			continue;
 #endif
 #endif
+		if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
+			continue;
 		if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
 		if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
-			endflag = 1;
 			printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
 			printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
 				cpu,
 				cpu,
 				prev_nmi_count[cpu],
 				prev_nmi_count[cpu],
 				nmi_count(cpu));
 				nmi_count(cpu));
-			nmi_active = 0;
-			lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG;
-			kfree(prev_nmi_count);
-			return -1;
+			per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
+			atomic_dec(&nmi_active);
 		}
 		}
 	}
 	}
+	if (!atomic_read(&nmi_active)) {
+		kfree(prev_nmi_count);
+		atomic_set(&nmi_active, -1);
+		return -1;
+	}
 	endflag = 1;
 	endflag = 1;
 	printk("OK.\n");
 	printk("OK.\n");
 
 
 	/* now that we know it works we can reduce NMI frequency to
 	/* now that we know it works we can reduce NMI frequency to
 	   something more reasonable; makes a difference in some configs */
 	   something more reasonable; makes a difference in some configs */
-	if (nmi_watchdog == NMI_LOCAL_APIC)
+	if (nmi_watchdog == NMI_LOCAL_APIC) {
+		struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
 		nmi_hz = 1;
 		nmi_hz = 1;
+		/*
+		 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
+		 * are writable, with higher bits sign extending from bit 31.
+		 * So, we can only program the counter with 31 bit values and
+		 * 32nd bit should be 1, for 33.. to be 1.
+		 * Find the appropriate nmi_hz
+		 */
+	 	if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
+			((u64)cpu_khz * 1000) > 0x7fffffffULL) {
+			u64 count = (u64)cpu_khz * 1000;
+			do_div(count, 0x7fffffffUL);
+			nmi_hz = count + 1;
+		}
+	}
 
 
 	kfree(prev_nmi_count);
 	kfree(prev_nmi_count);
 	return 0;
 	return 0;
@@ -181,124 +303,70 @@ static int __init setup_nmi_watchdog(char *str)
 
 
 	get_option(&str, &nmi);
 	get_option(&str, &nmi);
 
 
-	if (nmi >= NMI_INVALID)
+	if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
 		return 0;
 		return 0;
-	if (nmi == NMI_NONE)
-		nmi_watchdog = nmi;
 	/*
 	/*
 	 * If any other x86 CPU has a local APIC, then
 	 * If any other x86 CPU has a local APIC, then
 	 * please test the NMI stuff there and send me the
 	 * please test the NMI stuff there and send me the
 	 * missing bits. Right now Intel P6/P4 and AMD K7 only.
 	 * missing bits. Right now Intel P6/P4 and AMD K7 only.
 	 */
 	 */
-	if ((nmi == NMI_LOCAL_APIC) &&
-			(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-			(boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
-		nmi_watchdog = nmi;
-	if ((nmi == NMI_LOCAL_APIC) &&
-			(boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
-	  		(boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
-		nmi_watchdog = nmi;
-	/*
-	 * We can enable the IO-APIC watchdog
-	 * unconditionally.
-	 */
-	if (nmi == NMI_IO_APIC) {
-		nmi_active = 1;
-		nmi_watchdog = nmi;
-	}
+	if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
+		return 0;  /* no lapic support */
+	nmi_watchdog = nmi;
 	return 1;
 	return 1;
 }
 }
 
 
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 __setup("nmi_watchdog=", setup_nmi_watchdog);
 
 
-static void disable_intel_arch_watchdog(void);
-
 static void disable_lapic_nmi_watchdog(void)
 static void disable_lapic_nmi_watchdog(void)
 {
 {
-	if (nmi_active <= 0)
+	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
+
+	if (atomic_read(&nmi_active) <= 0)
 		return;
 		return;
-	switch (boot_cpu_data.x86_vendor) {
-	case X86_VENDOR_AMD:
-		wrmsr(MSR_K7_EVNTSEL0, 0, 0);
-		break;
-	case X86_VENDOR_INTEL:
-		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
-			disable_intel_arch_watchdog();
-			break;
-		}
-		switch (boot_cpu_data.x86) {
-		case 6:
-			if (boot_cpu_data.x86_model > 0xd)
-				break;
 
 
-			wrmsr(MSR_P6_EVNTSEL0, 0, 0);
-			break;
-		case 15:
-			if (boot_cpu_data.x86_model > 0x4)
-				break;
+	on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
 
 
-			wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
-			wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
-			break;
-		}
-		break;
-	}
-	nmi_active = -1;
-	/* tell do_nmi() and others that we're not active any more */
-	nmi_watchdog = 0;
+	BUG_ON(atomic_read(&nmi_active) != 0);
 }
 }
 
 
 static void enable_lapic_nmi_watchdog(void)
 static void enable_lapic_nmi_watchdog(void)
 {
 {
-	if (nmi_active < 0) {
-		nmi_watchdog = NMI_LOCAL_APIC;
-		setup_apic_nmi_watchdog();
-	}
-}
+	BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
 
 
-int reserve_lapic_nmi(void)
-{
-	unsigned int old_owner;
-
-	spin_lock(&lapic_nmi_owner_lock);
-	old_owner = lapic_nmi_owner;
-	lapic_nmi_owner |= LAPIC_NMI_RESERVED;
-	spin_unlock(&lapic_nmi_owner_lock);
-	if (old_owner & LAPIC_NMI_RESERVED)
-		return -EBUSY;
-	if (old_owner & LAPIC_NMI_WATCHDOG)
-		disable_lapic_nmi_watchdog();
-	return 0;
-}
+	/* are we already enabled */
+	if (atomic_read(&nmi_active) != 0)
+		return;
 
 
-void release_lapic_nmi(void)
-{
-	unsigned int new_owner;
+	/* are we lapic aware */
+	if (nmi_known_cpu() <= 0)
+		return;
 
 
-	spin_lock(&lapic_nmi_owner_lock);
-	new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
-	lapic_nmi_owner = new_owner;
-	spin_unlock(&lapic_nmi_owner_lock);
-	if (new_owner & LAPIC_NMI_WATCHDOG)
-		enable_lapic_nmi_watchdog();
+	on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+	touch_nmi_watchdog();
 }
 }
 
 
 void disable_timer_nmi_watchdog(void)
 void disable_timer_nmi_watchdog(void)
 {
 {
-	if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0))
+	BUG_ON(nmi_watchdog != NMI_IO_APIC);
+
+	if (atomic_read(&nmi_active) <= 0)
 		return;
 		return;
 
 
-	unset_nmi_callback();
-	nmi_active = -1;
-	nmi_watchdog = NMI_NONE;
+	disable_irq(0);
+	on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
+
+	BUG_ON(atomic_read(&nmi_active) != 0);
 }
 }
 
 
 void enable_timer_nmi_watchdog(void)
 void enable_timer_nmi_watchdog(void)
 {
 {
-	if (nmi_active < 0) {
-		nmi_watchdog = NMI_IO_APIC;
+	BUG_ON(nmi_watchdog != NMI_IO_APIC);
+
+	if (atomic_read(&nmi_active) == 0) {
 		touch_nmi_watchdog();
 		touch_nmi_watchdog();
-		nmi_active = 1;
+		on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
+		enable_irq(0);
 	}
 	}
 }
 }
 
 
@@ -308,15 +376,20 @@ static int nmi_pm_active; /* nmi_active before suspend */
 
 
 static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
 static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
 {
 {
-	nmi_pm_active = nmi_active;
-	disable_lapic_nmi_watchdog();
+	/* only CPU0 goes here, other CPUs should be offline */
+	nmi_pm_active = atomic_read(&nmi_active);
+	stop_apic_nmi_watchdog(NULL);
+	BUG_ON(atomic_read(&nmi_active) != 0);
 	return 0;
 	return 0;
 }
 }
 
 
 static int lapic_nmi_resume(struct sys_device *dev)
 static int lapic_nmi_resume(struct sys_device *dev)
 {
 {
-	if (nmi_pm_active > 0)
-		enable_lapic_nmi_watchdog();
+	/* only CPU0 goes here, other CPUs should be offline */
+	if (nmi_pm_active > 0) {
+		setup_apic_nmi_watchdog(NULL);
+		touch_nmi_watchdog();
+	}
 	return 0;
 	return 0;
 }
 }
 
 
@@ -336,7 +409,13 @@ static int __init init_lapic_nmi_sysfs(void)
 {
 {
 	int error;
 	int error;
 
 
-	if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC)
+	/* should really be a BUG_ON but b/c this is an
+	 * init call, it just doesn't work.  -dcz
+	 */
+	if (nmi_watchdog != NMI_LOCAL_APIC)
+		return 0;
+
+	if ( atomic_read(&nmi_active) < 0 )
 		return 0;
 		return 0;
 
 
 	error = sysdev_class_register(&nmi_sysclass);
 	error = sysdev_class_register(&nmi_sysclass);
@@ -354,138 +433,269 @@ late_initcall(init_lapic_nmi_sysfs);
  * Original code written by Keith Owens.
  * Original code written by Keith Owens.
  */
  */
 
 
-static void clear_msr_range(unsigned int base, unsigned int n)
-{
-	unsigned int i;
-
-	for(i = 0; i < n; ++i)
-		wrmsr(base+i, 0, 0);
-}
-
-static void write_watchdog_counter(const char *descr)
+static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
 {
 {
 	u64 count = (u64)cpu_khz * 1000;
 	u64 count = (u64)cpu_khz * 1000;
 
 
 	do_div(count, nmi_hz);
 	do_div(count, nmi_hz);
 	if(descr)
 	if(descr)
 		Dprintk("setting %s to -0x%08Lx\n", descr, count);
 		Dprintk("setting %s to -0x%08Lx\n", descr, count);
-	wrmsrl(nmi_perfctr_msr, 0 - count);
+	wrmsrl(perfctr_msr, 0 - count);
 }
 }
 
 
-static void setup_k7_watchdog(void)
+/* Note that these events don't tick when the CPU idles. This means
+   the frequency varies with CPU load. */
+
+#define K7_EVNTSEL_ENABLE	(1 << 22)
+#define K7_EVNTSEL_INT		(1 << 20)
+#define K7_EVNTSEL_OS		(1 << 17)
+#define K7_EVNTSEL_USR		(1 << 16)
+#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
+#define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+
+static int setup_k7_watchdog(void)
 {
 {
+	unsigned int perfctr_msr, evntsel_msr;
 	unsigned int evntsel;
 	unsigned int evntsel;
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	perfctr_msr = MSR_K7_PERFCTR0;
+	evntsel_msr = MSR_K7_EVNTSEL0;
+	if (!reserve_perfctr_nmi(perfctr_msr))
+		goto fail;
 
 
-	nmi_perfctr_msr = MSR_K7_PERFCTR0;
+	if (!reserve_evntsel_nmi(evntsel_msr))
+		goto fail1;
 
 
-	clear_msr_range(MSR_K7_EVNTSEL0, 4);
-	clear_msr_range(MSR_K7_PERFCTR0, 4);
+	wrmsrl(perfctr_msr, 0UL);
 
 
 	evntsel = K7_EVNTSEL_INT
 	evntsel = K7_EVNTSEL_INT
 		| K7_EVNTSEL_OS
 		| K7_EVNTSEL_OS
 		| K7_EVNTSEL_USR
 		| K7_EVNTSEL_USR
 		| K7_NMI_EVENT;
 		| K7_NMI_EVENT;
 
 
-	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
-	write_watchdog_counter("K7_PERFCTR0");
+	/* setup the timer */
+	wrmsr(evntsel_msr, evntsel, 0);
+	write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	evntsel |= K7_EVNTSEL_ENABLE;
 	evntsel |= K7_EVNTSEL_ENABLE;
-	wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+	wrmsr(evntsel_msr, evntsel, 0);
+
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = 0;  //unused
+	wd->check_bit = 1ULL<<63;
+	return 1;
+fail1:
+	release_perfctr_nmi(perfctr_msr);
+fail:
+	return 0;
+}
+
+static void stop_k7_watchdog(void)
+{
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	wrmsr(wd->evntsel_msr, 0, 0);
+
+	release_evntsel_nmi(wd->evntsel_msr);
+	release_perfctr_nmi(wd->perfctr_msr);
 }
 }
 
 
-static void setup_p6_watchdog(void)
+#define P6_EVNTSEL0_ENABLE	(1 << 22)
+#define P6_EVNTSEL_INT		(1 << 20)
+#define P6_EVNTSEL_OS		(1 << 17)
+#define P6_EVNTSEL_USR		(1 << 16)
+#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
+#define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
+
+static int setup_p6_watchdog(void)
 {
 {
+	unsigned int perfctr_msr, evntsel_msr;
 	unsigned int evntsel;
 	unsigned int evntsel;
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	perfctr_msr = MSR_P6_PERFCTR0;
+	evntsel_msr = MSR_P6_EVNTSEL0;
+	if (!reserve_perfctr_nmi(perfctr_msr))
+		goto fail;
 
 
-	nmi_perfctr_msr = MSR_P6_PERFCTR0;
+	if (!reserve_evntsel_nmi(evntsel_msr))
+		goto fail1;
 
 
-	clear_msr_range(MSR_P6_EVNTSEL0, 2);
-	clear_msr_range(MSR_P6_PERFCTR0, 2);
+	wrmsrl(perfctr_msr, 0UL);
 
 
 	evntsel = P6_EVNTSEL_INT
 	evntsel = P6_EVNTSEL_INT
 		| P6_EVNTSEL_OS
 		| P6_EVNTSEL_OS
 		| P6_EVNTSEL_USR
 		| P6_EVNTSEL_USR
 		| P6_NMI_EVENT;
 		| P6_NMI_EVENT;
 
 
-	wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
-	write_watchdog_counter("P6_PERFCTR0");
+	/* setup the timer */
+	wrmsr(evntsel_msr, evntsel, 0);
+	write_watchdog_counter(perfctr_msr, "P6_PERFCTR0");
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	evntsel |= P6_EVNTSEL0_ENABLE;
 	evntsel |= P6_EVNTSEL0_ENABLE;
-	wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
+	wrmsr(evntsel_msr, evntsel, 0);
+
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = 0;  //unused
+	wd->check_bit = 1ULL<<39;
+	return 1;
+fail1:
+	release_perfctr_nmi(perfctr_msr);
+fail:
+	return 0;
+}
+
+static void stop_p6_watchdog(void)
+{
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	wrmsr(wd->evntsel_msr, 0, 0);
+
+	release_evntsel_nmi(wd->evntsel_msr);
+	release_perfctr_nmi(wd->perfctr_msr);
 }
 }
 
 
+/* Note that these events don't tick when the CPU idles. This means
+   the frequency varies with CPU load. */
+
+#define MSR_P4_MISC_ENABLE_PERF_AVAIL	(1<<7)
+#define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
+#define P4_ESCR_OS		(1<<3)
+#define P4_ESCR_USR		(1<<2)
+#define P4_CCCR_OVF_PMI0	(1<<26)
+#define P4_CCCR_OVF_PMI1	(1<<27)
+#define P4_CCCR_THRESHOLD(N)	((N)<<20)
+#define P4_CCCR_COMPLEMENT	(1<<19)
+#define P4_CCCR_COMPARE		(1<<18)
+#define P4_CCCR_REQUIRED	(3<<16)
+#define P4_CCCR_ESCR_SELECT(N)	((N)<<13)
+#define P4_CCCR_ENABLE		(1<<12)
+#define P4_CCCR_OVF 		(1<<31)
+/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
+   CRU_ESCR0 (with any non-null event selector) through a complemented
+   max threshold. [IA32-Vol3, Section 14.9.9] */
+
 static int setup_p4_watchdog(void)
 static int setup_p4_watchdog(void)
 {
 {
+	unsigned int perfctr_msr, evntsel_msr, cccr_msr;
+	unsigned int evntsel, cccr_val;
 	unsigned int misc_enable, dummy;
 	unsigned int misc_enable, dummy;
+	unsigned int ht_num;
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 
 
-	rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
+	rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
 	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
 	if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
 		return 0;
 		return 0;
 
 
-	nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
-	nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
-	if (smp_num_siblings == 2)
-		nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
+	/* detect which hyperthread we are on */
+	if (smp_num_siblings == 2) {
+		unsigned int ebx, apicid;
+
+        	ebx = cpuid_ebx(1);
+	        apicid = (ebx >> 24) & 0xff;
+        	ht_num = apicid & 1;
+	} else
 #endif
 #endif
+		ht_num = 0;
 
 
-	if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
-		clear_msr_range(0x3F1, 2);
-	/* MSR 0x3F0 seems to have a default value of 0xFC00, but current
-	   docs doesn't fully define it, so leave it alone for now. */
-	if (boot_cpu_data.x86_model >= 0x3) {
-		/* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */
-		clear_msr_range(0x3A0, 26);
-		clear_msr_range(0x3BC, 3);
+	/* performance counters are shared resources
+	 * assign each hyperthread its own set
+	 * (re-use the ESCR0 register, seems safe
+	 * and keeps the cccr_val the same)
+	 */
+	if (!ht_num) {
+		/* logical cpu 0 */
+		perfctr_msr = MSR_P4_IQ_PERFCTR0;
+		evntsel_msr = MSR_P4_CRU_ESCR0;
+		cccr_msr = MSR_P4_IQ_CCCR0;
+		cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
 	} else {
 	} else {
-		clear_msr_range(0x3A0, 31);
+		/* logical cpu 1 */
+		perfctr_msr = MSR_P4_IQ_PERFCTR1;
+		evntsel_msr = MSR_P4_CRU_ESCR0;
+		cccr_msr = MSR_P4_IQ_CCCR1;
+		cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
 	}
 	}
-	clear_msr_range(0x3C0, 6);
-	clear_msr_range(0x3C8, 6);
-	clear_msr_range(0x3E0, 2);
-	clear_msr_range(MSR_P4_CCCR0, 18);
-	clear_msr_range(MSR_P4_PERFCTR0, 18);
-
-	wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
-	wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
-	write_watchdog_counter("P4_IQ_COUNTER0");
+
+	if (!reserve_perfctr_nmi(perfctr_msr))
+		goto fail;
+
+	if (!reserve_evntsel_nmi(evntsel_msr))
+		goto fail1;
+
+	evntsel = P4_ESCR_EVENT_SELECT(0x3F)
+	 	| P4_ESCR_OS
+		| P4_ESCR_USR;
+
+	cccr_val |= P4_CCCR_THRESHOLD(15)
+		 | P4_CCCR_COMPLEMENT
+		 | P4_CCCR_COMPARE
+		 | P4_CCCR_REQUIRED;
+
+	wrmsr(evntsel_msr, evntsel, 0);
+	wrmsr(cccr_msr, cccr_val, 0);
+	write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
-	wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
+	cccr_val |= P4_CCCR_ENABLE;
+	wrmsr(cccr_msr, cccr_val, 0);
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = cccr_msr;
+	wd->check_bit = 1ULL<<39;
 	return 1;
 	return 1;
+fail1:
+	release_perfctr_nmi(perfctr_msr);
+fail:
+	return 0;
 }
 }
 
 
-static void disable_intel_arch_watchdog(void)
+static void stop_p4_watchdog(void)
 {
 {
-	unsigned ebx;
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 
 
-	/*
-	 * Check whether the Architectural PerfMon supports
-	 * Unhalted Core Cycles Event or not.
-	 * NOTE: Corresponding bit = 0 in ebp indicates event present.
-	 */
-	ebx = cpuid_ebx(10);
-	if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
-		wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
+	wrmsr(wd->cccr_msr, 0, 0);
+	wrmsr(wd->evntsel_msr, 0, 0);
+
+	release_evntsel_nmi(wd->evntsel_msr);
+	release_perfctr_nmi(wd->perfctr_msr);
 }
 }
 
 
+#define ARCH_PERFMON_NMI_EVENT_SEL	ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
+#define ARCH_PERFMON_NMI_EVENT_UMASK	ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
+
 static int setup_intel_arch_watchdog(void)
 static int setup_intel_arch_watchdog(void)
 {
 {
+	unsigned int ebx;
+	union cpuid10_eax eax;
+	unsigned int unused;
+	unsigned int perfctr_msr, evntsel_msr;
 	unsigned int evntsel;
 	unsigned int evntsel;
-	unsigned ebx;
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
 
 
 	/*
 	/*
 	 * Check whether the Architectural PerfMon supports
 	 * Check whether the Architectural PerfMon supports
 	 * Unhalted Core Cycles Event or not.
 	 * Unhalted Core Cycles Event or not.
-	 * NOTE: Corresponding bit = 0 in ebp indicates event present.
+	 * NOTE: Corresponding bit = 0 in ebx indicates event present.
 	 */
 	 */
-	ebx = cpuid_ebx(10);
-	if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
-		return 0;
+	cpuid(10, &(eax.full), &ebx, &unused, &unused);
+	if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
+	    (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+		goto fail;
+
+	perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+	evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
 
 
-	nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
+	if (!reserve_perfctr_nmi(perfctr_msr))
+		goto fail;
 
 
-	clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2);
-	clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2);
+	if (!reserve_evntsel_nmi(evntsel_msr))
+		goto fail1;
+
+	wrmsrl(perfctr_msr, 0UL);
 
 
 	evntsel = ARCH_PERFMON_EVENTSEL_INT
 	evntsel = ARCH_PERFMON_EVENTSEL_INT
 		| ARCH_PERFMON_EVENTSEL_OS
 		| ARCH_PERFMON_EVENTSEL_OS
@@ -493,51 +703,145 @@ static int setup_intel_arch_watchdog(void)
 		| ARCH_PERFMON_NMI_EVENT_SEL
 		| ARCH_PERFMON_NMI_EVENT_SEL
 		| ARCH_PERFMON_NMI_EVENT_UMASK;
 		| ARCH_PERFMON_NMI_EVENT_UMASK;
 
 
-	wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
-	write_watchdog_counter("INTEL_ARCH_PERFCTR0");
+	/* setup the timer */
+	wrmsr(evntsel_msr, evntsel, 0);
+	write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0");
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
 	evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-	wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0);
+	wrmsr(evntsel_msr, evntsel, 0);
+
+	wd->perfctr_msr = perfctr_msr;
+	wd->evntsel_msr = evntsel_msr;
+	wd->cccr_msr = 0;  //unused
+	wd->check_bit = 1ULL << (eax.split.bit_width - 1);
 	return 1;
 	return 1;
+fail1:
+	release_perfctr_nmi(perfctr_msr);
+fail:
+	return 0;
 }
 }
 
 
-void setup_apic_nmi_watchdog (void)
+static void stop_intel_arch_watchdog(void)
 {
 {
-	switch (boot_cpu_data.x86_vendor) {
-	case X86_VENDOR_AMD:
-		if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
-			return;
-		setup_k7_watchdog();
-		break;
-	case X86_VENDOR_INTEL:
-		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
-			if (!setup_intel_arch_watchdog())
+	unsigned int ebx;
+	union cpuid10_eax eax;
+	unsigned int unused;
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	/*
+	 * Check whether the Architectural PerfMon supports
+	 * Unhalted Core Cycles Event or not.
+	 * NOTE: Corresponding bit = 0 in ebx indicates event present.
+	 */
+	cpuid(10, &(eax.full), &ebx, &unused, &unused);
+	if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
+	    (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
+		return;
+
+	wrmsr(wd->evntsel_msr, 0, 0);
+	release_evntsel_nmi(wd->evntsel_msr);
+	release_perfctr_nmi(wd->perfctr_msr);
+}
+
+void setup_apic_nmi_watchdog (void *unused)
+{
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	/* only support LOCAL and IO APICs for now */
+	if ((nmi_watchdog != NMI_LOCAL_APIC) &&
+	    (nmi_watchdog != NMI_IO_APIC))
+	    	return;
+
+	if (wd->enabled == 1)
+		return;
+
+	/* cheap hack to support suspend/resume */
+	/* if cpu0 is not active neither should the other cpus */
+	if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
+		return;
+
+	if (nmi_watchdog == NMI_LOCAL_APIC) {
+		switch (boot_cpu_data.x86_vendor) {
+		case X86_VENDOR_AMD:
+			if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
 				return;
 				return;
-			break;
-		}
-		switch (boot_cpu_data.x86) {
-		case 6:
-			if (boot_cpu_data.x86_model > 0xd)
+			if (!setup_k7_watchdog())
 				return;
 				return;
-
-			setup_p6_watchdog();
 			break;
 			break;
-		case 15:
-			if (boot_cpu_data.x86_model > 0x4)
-				return;
+		case X86_VENDOR_INTEL:
+			if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+				if (!setup_intel_arch_watchdog())
+					return;
+				break;
+			}
+			switch (boot_cpu_data.x86) {
+			case 6:
+				if (boot_cpu_data.x86_model > 0xd)
+					return;
+
+				if (!setup_p6_watchdog())
+					return;
+				break;
+			case 15:
+				if (boot_cpu_data.x86_model > 0x4)
+					return;
 
 
-			if (!setup_p4_watchdog())
+				if (!setup_p4_watchdog())
+					return;
+				break;
+			default:
 				return;
 				return;
+			}
 			break;
 			break;
 		default:
 		default:
 			return;
 			return;
 		}
 		}
-		break;
-	default:
+	}
+	wd->enabled = 1;
+	atomic_inc(&nmi_active);
+}
+
+void stop_apic_nmi_watchdog(void *unused)
+{
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+
+	/* only support LOCAL and IO APICs for now */
+	if ((nmi_watchdog != NMI_LOCAL_APIC) &&
+	    (nmi_watchdog != NMI_IO_APIC))
+	    	return;
+
+	if (wd->enabled == 0)
 		return;
 		return;
+
+	if (nmi_watchdog == NMI_LOCAL_APIC) {
+		switch (boot_cpu_data.x86_vendor) {
+		case X86_VENDOR_AMD:
+			stop_k7_watchdog();
+			break;
+		case X86_VENDOR_INTEL:
+			if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+				stop_intel_arch_watchdog();
+				break;
+			}
+			switch (boot_cpu_data.x86) {
+			case 6:
+				if (boot_cpu_data.x86_model > 0xd)
+					break;
+				stop_p6_watchdog();
+				break;
+			case 15:
+				if (boot_cpu_data.x86_model > 0x4)
+					break;
+				stop_p4_watchdog();
+				break;
+			}
+			break;
+		default:
+			return;
+		}
 	}
 	}
-	lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
-	nmi_active = 1;
+	wd->enabled = 0;
+	atomic_dec(&nmi_active);
 }
 }
 
 
 /*
 /*
@@ -579,7 +883,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog);
 
 
 extern void die_nmi(struct pt_regs *, const char *msg);
 extern void die_nmi(struct pt_regs *, const char *msg);
 
 
-void nmi_watchdog_tick (struct pt_regs * regs)
+__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
 {
 {
 
 
 	/*
 	/*
@@ -588,11 +892,23 @@ void nmi_watchdog_tick (struct pt_regs * regs)
 	 * smp_processor_id().
 	 * smp_processor_id().
 	 */
 	 */
 	unsigned int sum;
 	unsigned int sum;
+	int touched = 0;
 	int cpu = smp_processor_id();
 	int cpu = smp_processor_id();
+	struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
+	u64 dummy;
+	int rc=0;
+
+	/* check for other users first */
+	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
+			== NOTIFY_STOP) {
+		rc = 1;
+		touched = 1;
+	}
 
 
 	sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
 	sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
 
 
-	if (last_irq_sums[cpu] == sum) {
+	/* if the apic timer isn't firing, this cpu isn't doing much */
+	if (!touched && last_irq_sums[cpu] == sum) {
 		/*
 		/*
 		 * Ayiee, looks like this CPU is stuck ...
 		 * Ayiee, looks like this CPU is stuck ...
 		 * wait a few IRQs (5 seconds) before doing the oops ...
 		 * wait a few IRQs (5 seconds) before doing the oops ...
@@ -607,27 +923,59 @@ void nmi_watchdog_tick (struct pt_regs * regs)
 		last_irq_sums[cpu] = sum;
 		last_irq_sums[cpu] = sum;
 		alert_counter[cpu] = 0;
 		alert_counter[cpu] = 0;
 	}
 	}
-	if (nmi_perfctr_msr) {
-		if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) {
-			/*
-			 * P4 quirks:
-			 * - An overflown perfctr will assert its interrupt
-			 *   until the OVF flag in its CCCR is cleared.
-			 * - LVTPC is masked on interrupt and must be
-			 *   unmasked by the LVTPC handler.
+	/* see if the nmi watchdog went off */
+	if (wd->enabled) {
+		if (nmi_watchdog == NMI_LOCAL_APIC) {
+			rdmsrl(wd->perfctr_msr, dummy);
+			if (dummy & wd->check_bit){
+				/* this wasn't a watchdog timer interrupt */
+				goto done;
+			}
+
+			/* only Intel P4 uses the cccr msr */
+	 		if (wd->cccr_msr != 0) {
+	 			/*
+	 			 * P4 quirks:
+	 			 * - An overflown perfctr will assert its interrupt
+	 			 *   until the OVF flag in its CCCR is cleared.
+	 			 * - LVTPC is masked on interrupt and must be
+	 			 *   unmasked by the LVTPC handler.
+	 			 */
+				rdmsrl(wd->cccr_msr, dummy);
+				dummy &= ~P4_CCCR_OVF;
+	 			wrmsrl(wd->cccr_msr, dummy);
+	 			apic_write(APIC_LVTPC, APIC_DM_NMI);
+	 		}
+			else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
+				 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
+				/* P6 based Pentium M need to re-unmask
+				 * the apic vector but it doesn't hurt
+				 * other P6 variant.
+				 * ArchPerfom/Core Duo also needs this */
+				apic_write(APIC_LVTPC, APIC_DM_NMI);
+			}
+			/* start the cycle over again */
+			write_watchdog_counter(wd->perfctr_msr, NULL);
+			rc = 1;
+		} else if (nmi_watchdog == NMI_IO_APIC) {
+			/* don't know how to accurately check for this.
+			 * just assume it was a watchdog timer interrupt
+			 * This matches the old behaviour.
 			 */
 			 */
-			wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0);
-			apic_write(APIC_LVTPC, APIC_DM_NMI);
+			rc = 1;
 		}
 		}
-		else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 ||
-		         nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
-			/* Only P6 based Pentium M need to re-unmask
-			 * the apic vector but it doesn't hurt
-			 * other P6 variant */
-			apic_write(APIC_LVTPC, APIC_DM_NMI);
-		}
-		write_watchdog_counter(NULL);
 	}
 	}
+done:
+	return rc;
+}
+
+int do_nmi_callback(struct pt_regs * regs, int cpu)
+{
+#ifdef CONFIG_SYSCTL
+	if (unknown_nmi_panic)
+		return unknown_nmi_panic_callback(regs, cpu);
+#endif
+	return 0;
 }
 }
 
 
 #ifdef CONFIG_SYSCTL
 #ifdef CONFIG_SYSCTL
@@ -637,36 +985,46 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
 	unsigned char reason = get_nmi_reason();
 	unsigned char reason = get_nmi_reason();
 	char buf[64];
 	char buf[64];
 
 
-	if (!(reason & 0xc0)) {
-		sprintf(buf, "NMI received for unknown reason %02x\n", reason);
-		die_nmi(regs, buf);
-	}
+	sprintf(buf, "NMI received for unknown reason %02x\n", reason);
+	die_nmi(regs, buf);
 	return 0;
 	return 0;
 }
 }
 
 
 /*
 /*
- * proc handler for /proc/sys/kernel/unknown_nmi_panic
+ * proc handler for /proc/sys/kernel/nmi
  */
  */
-int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file,
+int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
 			void __user *buffer, size_t *length, loff_t *ppos)
 			void __user *buffer, size_t *length, loff_t *ppos)
 {
 {
 	int old_state;
 	int old_state;
 
 
-	old_state = unknown_nmi_panic;
+	nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
+	old_state = nmi_watchdog_enabled;
 	proc_dointvec(table, write, file, buffer, length, ppos);
 	proc_dointvec(table, write, file, buffer, length, ppos);
-	if (!!old_state == !!unknown_nmi_panic)
+	if (!!old_state == !!nmi_watchdog_enabled)
 		return 0;
 		return 0;
 
 
-	if (unknown_nmi_panic) {
-		if (reserve_lapic_nmi() < 0) {
-			unknown_nmi_panic = 0;
-			return -EBUSY;
-		} else {
-			set_nmi_callback(unknown_nmi_panic_callback);
-		}
+	if (atomic_read(&nmi_active) < 0) {
+		printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
+		return -EIO;
+	}
+
+	if (nmi_watchdog == NMI_DEFAULT) {
+		if (nmi_known_cpu() > 0)
+			nmi_watchdog = NMI_LOCAL_APIC;
+		else
+			nmi_watchdog = NMI_IO_APIC;
+	}
+
+	if (nmi_watchdog == NMI_LOCAL_APIC) {
+		if (nmi_watchdog_enabled)
+			enable_lapic_nmi_watchdog();
+		else
+			disable_lapic_nmi_watchdog();
 	} else {
 	} else {
-		release_lapic_nmi();
-		unset_nmi_callback();
+		printk( KERN_WARNING
+			"NMI watchdog doesn't know what hardware to touch\n");
+		return -EIO;
 	}
 	}
 	return 0;
 	return 0;
 }
 }
@@ -675,7 +1033,11 @@ int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file,
 
 
 EXPORT_SYMBOL(nmi_active);
 EXPORT_SYMBOL(nmi_active);
 EXPORT_SYMBOL(nmi_watchdog);
 EXPORT_SYMBOL(nmi_watchdog);
-EXPORT_SYMBOL(reserve_lapic_nmi);
-EXPORT_SYMBOL(release_lapic_nmi);
+EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
+EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
+EXPORT_SYMBOL(reserve_perfctr_nmi);
+EXPORT_SYMBOL(release_perfctr_nmi);
+EXPORT_SYMBOL(reserve_evntsel_nmi);
+EXPORT_SYMBOL(release_evntsel_nmi);
 EXPORT_SYMBOL(disable_timer_nmi_watchdog);
 EXPORT_SYMBOL(disable_timer_nmi_watchdog);
 EXPORT_SYMBOL(enable_timer_nmi_watchdog);
 EXPORT_SYMBOL(enable_timer_nmi_watchdog);

+ 3 - 11
arch/i386/kernel/process.c

@@ -37,6 +37,7 @@
 #include <linux/kallsyms.h>
 #include <linux/kallsyms.h>
 #include <linux/ptrace.h>
 #include <linux/ptrace.h>
 #include <linux/random.h>
 #include <linux/random.h>
+#include <linux/personality.h>
 
 
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
@@ -320,15 +321,6 @@ void show_regs(struct pt_regs * regs)
  * the "args".
  * the "args".
  */
  */
 extern void kernel_thread_helper(void);
 extern void kernel_thread_helper(void);
-__asm__(".section .text\n"
-	".align 4\n"
-	"kernel_thread_helper:\n\t"
-	"movl %edx,%eax\n\t"
-	"pushl %edx\n\t"
-	"call *%ebx\n\t"
-	"pushl %eax\n\t"
-	"call do_exit\n"
-	".previous");
 
 
 /*
 /*
  * Create a kernel thread
  * Create a kernel thread
@@ -346,7 +338,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 	regs.xes = __USER_DS;
 	regs.xes = __USER_DS;
 	regs.orig_eax = -1;
 	regs.orig_eax = -1;
 	regs.eip = (unsigned long) kernel_thread_helper;
 	regs.eip = (unsigned long) kernel_thread_helper;
-	regs.xcs = __KERNEL_CS;
+	regs.xcs = __KERNEL_CS | get_kernel_rpl();
 	regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
 	regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
 
 
 	/* Ok, create the new process.. */
 	/* Ok, create the new process.. */
@@ -905,7 +897,7 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
 
 
 unsigned long arch_align_stack(unsigned long sp)
 unsigned long arch_align_stack(unsigned long sp)
 {
 {
-	if (randomize_va_space)
+	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
 		sp -= get_random_int() % 8192;
 		sp -= get_random_int() % 8192;
 	return sp & ~0xf;
 	return sp & ~0xf;
 }
 }

+ 5 - 5
arch/i386/kernel/ptrace.c

@@ -185,17 +185,17 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_
 	return addr;
 	return addr;
 }
 }
 
 
-static inline int is_at_popf(struct task_struct *child, struct pt_regs *regs)
+static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
 {
 {
 	int i, copied;
 	int i, copied;
-	unsigned char opcode[16];
+	unsigned char opcode[15];
 	unsigned long addr = convert_eip_to_linear(child, regs);
 	unsigned long addr = convert_eip_to_linear(child, regs);
 
 
 	copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
 	copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
 	for (i = 0; i < copied; i++) {
 	for (i = 0; i < copied; i++) {
 		switch (opcode[i]) {
 		switch (opcode[i]) {
-		/* popf */
-		case 0x9d:
+		/* popf and iret */
+		case 0x9d: case 0xcf:
 			return 1;
 			return 1;
 		/* opcode and address size prefixes */
 		/* opcode and address size prefixes */
 		case 0x66: case 0x67:
 		case 0x66: case 0x67:
@@ -247,7 +247,7 @@ static void set_singlestep(struct task_struct *child)
 	 * don't mark it as being "us" that set it, so that we
 	 * don't mark it as being "us" that set it, so that we
 	 * won't clear it by hand later.
 	 * won't clear it by hand later.
 	 */
 	 */
-	if (is_at_popf(child, regs))
+	if (is_setting_trap_flag(child, regs))
 		return;
 		return;
 	
 	
 	child->ptrace |= PT_DTRACE;
 	child->ptrace |= PT_DTRACE;

+ 147 - 15
arch/i386/kernel/relocate_kernel.S

@@ -7,16 +7,138 @@
  */
  */
 
 
 #include <linux/linkage.h>
 #include <linux/linkage.h>
+#include <asm/page.h>
+#include <asm/kexec.h>
+
+/*
+ * Must be relocatable PIC code callable as a C function
+ */
+
+#define PTR(x) (x << 2)
+#define PAGE_ALIGNED (1 << PAGE_SHIFT)
+#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
+#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */
+
+	.text
+	.align PAGE_ALIGNED
+	.globl relocate_kernel
+relocate_kernel:
+	movl	8(%esp), %ebp /* list of pages */
+
+#ifdef CONFIG_X86_PAE
+	/* map the control page at its virtual address */
+
+	movl	PTR(VA_PGD)(%ebp), %edi
+	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
+	andl	$0xc0000000, %eax
+	shrl	$27, %eax
+	addl	%edi, %eax
+
+	movl	PTR(PA_PMD_0)(%ebp), %edx
+	orl	$PAE_PGD_ATTR, %edx
+	movl	%edx, (%eax)
+
+	movl	PTR(VA_PMD_0)(%ebp), %edi
+	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
+	andl	$0x3fe00000, %eax
+	shrl	$18, %eax
+	addl	%edi, %eax
+
+	movl	PTR(PA_PTE_0)(%ebp), %edx
+	orl	$PAGE_ATTR, %edx
+	movl	%edx, (%eax)
+
+	movl	PTR(VA_PTE_0)(%ebp), %edi
+	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
+	andl	$0x001ff000, %eax
+	shrl	$9, %eax
+	addl	%edi, %eax
+
+	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
+	orl	$PAGE_ATTR, %edx
+	movl	%edx, (%eax)
+
+	/* identity map the control page at its physical address */
+
+	movl	PTR(VA_PGD)(%ebp), %edi
+	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
+	andl	$0xc0000000, %eax
+	shrl	$27, %eax
+	addl	%edi, %eax
+
+	movl	PTR(PA_PMD_1)(%ebp), %edx
+	orl	$PAE_PGD_ATTR, %edx
+	movl	%edx, (%eax)
+
+	movl	PTR(VA_PMD_1)(%ebp), %edi
+	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
+	andl	$0x3fe00000, %eax
+	shrl	$18, %eax
+	addl	%edi, %eax
+
+	movl	PTR(PA_PTE_1)(%ebp), %edx
+	orl	$PAGE_ATTR, %edx
+	movl	%edx, (%eax)
+
+	movl	PTR(VA_PTE_1)(%ebp), %edi
+	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
+	andl	$0x001ff000, %eax
+	shrl	$9, %eax
+	addl	%edi, %eax
+
+	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
+	orl	$PAGE_ATTR, %edx
+	movl	%edx, (%eax)
+#else
+	/* map the control page at its virtual address */
+
+	movl	PTR(VA_PGD)(%ebp), %edi
+	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
+	andl	$0xffc00000, %eax
+	shrl	$20, %eax
+	addl	%edi, %eax
+
+	movl	PTR(PA_PTE_0)(%ebp), %edx
+	orl	$PAGE_ATTR, %edx
+	movl	%edx, (%eax)
+
+	movl	PTR(VA_PTE_0)(%ebp), %edi
+	movl	PTR(VA_CONTROL_PAGE)(%ebp), %eax
+	andl	$0x003ff000, %eax
+	shrl	$10, %eax
+	addl	%edi, %eax
+
+	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
+	orl	$PAGE_ATTR, %edx
+	movl	%edx, (%eax)
+
+	/* identity map the control page at its physical address */
+
+	movl	PTR(VA_PGD)(%ebp), %edi
+	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
+	andl	$0xffc00000, %eax
+	shrl	$20, %eax
+	addl	%edi, %eax
+
+	movl	PTR(PA_PTE_1)(%ebp), %edx
+	orl	$PAGE_ATTR, %edx
+	movl	%edx, (%eax)
+
+	movl	PTR(VA_PTE_1)(%ebp), %edi
+	movl	PTR(PA_CONTROL_PAGE)(%ebp), %eax
+	andl	$0x003ff000, %eax
+	shrl	$10, %eax
+	addl	%edi, %eax
+
+	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edx
+	orl	$PAGE_ATTR, %edx
+	movl	%edx, (%eax)
+#endif
 
 
-	/*
-	 * Must be relocatable PIC code callable as a C function, that once
-	 * it starts can not use the previous processes stack.
-	 */
-	.globl relocate_new_kernel
 relocate_new_kernel:
 relocate_new_kernel:
 	/* read the arguments and say goodbye to the stack */
 	/* read the arguments and say goodbye to the stack */
 	movl  4(%esp), %ebx /* page_list */
 	movl  4(%esp), %ebx /* page_list */
-	movl  8(%esp), %ebp /* reboot_code_buffer */
+	movl  8(%esp), %ebp /* list of pages */
 	movl  12(%esp), %edx /* start address */
 	movl  12(%esp), %edx /* start address */
 	movl  16(%esp), %ecx /* cpu_has_pae */
 	movl  16(%esp), %ecx /* cpu_has_pae */
 
 
@@ -24,11 +146,26 @@ relocate_new_kernel:
 	pushl $0
 	pushl $0
 	popfl
 	popfl
 
 
-	/* set a new stack at the bottom of our page... */
-	lea   4096(%ebp), %esp
+	/* get physical address of control page now */
+	/* this is impossible after page table switch */
+	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edi
 
 
-	/* store the parameters back on the stack */
-	pushl   %edx /* store the start address */
+	/* switch to new set of page tables */
+	movl	PTR(PA_PGD)(%ebp), %eax
+	movl	%eax, %cr3
+
+	/* setup a new stack at the end of the physical control page */
+	lea	4096(%edi), %esp
+
+	/* jump to identity mapped page */
+	movl    %edi, %eax
+	addl    $(identity_mapped - relocate_kernel), %eax
+	pushl   %eax
+	ret
+
+identity_mapped:
+	/* store the start address on the stack */
+	pushl   %edx
 
 
 	/* Set cr0 to a known state:
 	/* Set cr0 to a known state:
 	 * 31 0 == Paging disabled
 	 * 31 0 == Paging disabled
@@ -113,8 +250,3 @@ relocate_new_kernel:
 	xorl    %edi, %edi
 	xorl    %edi, %edi
 	xorl    %ebp, %ebp
 	xorl    %ebp, %ebp
 	ret
 	ret
-relocate_new_kernel_end:
-
-	.globl relocate_new_kernel_size
-relocate_new_kernel_size:
-	.long relocate_new_kernel_end - relocate_new_kernel

+ 0 - 134
arch/i386/kernel/semaphore.c

@@ -1,134 +0,0 @@
-/*
- * i386 semaphore implementation.
- *
- * (C) Copyright 1999 Linus Torvalds
- *
- * Portions Copyright 1999 Red Hat, Inc.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
- */
-#include <asm/semaphore.h>
-
-/*
- * The semaphore operations have a special calling sequence that
- * allow us to do a simpler in-line version of them. These routines
- * need to convert that sequence back into the C sequence when
- * there is contention on the semaphore.
- *
- * %eax contains the semaphore pointer on entry. Save the C-clobbered
- * registers (%eax, %edx and %ecx) except %eax whish is either a return
- * value or just clobbered..
- */
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __down_failed\n"
-"__down_failed:\n\t"
-#if defined(CONFIG_FRAME_POINTER)
-	"pushl %ebp\n\t"
-	"movl  %esp,%ebp\n\t"
-#endif
-	"pushl %edx\n\t"
-	"pushl %ecx\n\t"
-	"call __down\n\t"
-	"popl %ecx\n\t"
-	"popl %edx\n\t"
-#if defined(CONFIG_FRAME_POINTER)
-	"movl %ebp,%esp\n\t"
-	"popl %ebp\n\t"
-#endif
-	"ret"
-);
-
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __down_failed_interruptible\n"
-"__down_failed_interruptible:\n\t"
-#if defined(CONFIG_FRAME_POINTER)
-	"pushl %ebp\n\t"
-	"movl  %esp,%ebp\n\t"
-#endif
-	"pushl %edx\n\t"
-	"pushl %ecx\n\t"
-	"call __down_interruptible\n\t"
-	"popl %ecx\n\t"
-	"popl %edx\n\t"
-#if defined(CONFIG_FRAME_POINTER)
-	"movl %ebp,%esp\n\t"
-	"popl %ebp\n\t"
-#endif
-	"ret"
-);
-
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __down_failed_trylock\n"
-"__down_failed_trylock:\n\t"
-#if defined(CONFIG_FRAME_POINTER)
-	"pushl %ebp\n\t"
-	"movl  %esp,%ebp\n\t"
-#endif
-	"pushl %edx\n\t"
-	"pushl %ecx\n\t"
-	"call __down_trylock\n\t"
-	"popl %ecx\n\t"
-	"popl %edx\n\t"
-#if defined(CONFIG_FRAME_POINTER)
-	"movl %ebp,%esp\n\t"
-	"popl %ebp\n\t"
-#endif
-	"ret"
-);
-
-asm(
-".section .sched.text\n"
-".align 4\n"
-".globl __up_wakeup\n"
-"__up_wakeup:\n\t"
-	"pushl %edx\n\t"
-	"pushl %ecx\n\t"
-	"call __up\n\t"
-	"popl %ecx\n\t"
-	"popl %edx\n\t"
-	"ret"
-);
-
-/*
- * rw spinlock fallbacks
- */
-#if defined(CONFIG_SMP)
-asm(
-".section .sched.text\n"
-".align	4\n"
-".globl	__write_lock_failed\n"
-"__write_lock_failed:\n\t"
-	LOCK_PREFIX "addl	$" RW_LOCK_BIAS_STR ",(%eax)\n"
-"1:	rep; nop\n\t"
-	"cmpl	$" RW_LOCK_BIAS_STR ",(%eax)\n\t"
-	"jne	1b\n\t"
-	LOCK_PREFIX "subl	$" RW_LOCK_BIAS_STR ",(%eax)\n\t"
-	"jnz	__write_lock_failed\n\t"
-	"ret"
-);
-
-asm(
-".section .sched.text\n"
-".align	4\n"
-".globl	__read_lock_failed\n"
-"__read_lock_failed:\n\t"
-	LOCK_PREFIX "incl	(%eax)\n"
-"1:	rep; nop\n\t"
-	"cmpl	$1,(%eax)\n\t"
-	"js	1b\n\t"
-	LOCK_PREFIX "decl	(%eax)\n\t"
-	"js	__read_lock_failed\n\t"
-	"ret"
-);
-#endif

+ 140 - 253
arch/i386/kernel/setup.c

@@ -90,18 +90,6 @@ EXPORT_SYMBOL(boot_cpu_data);
 
 
 unsigned long mmu_cr4_features;
 unsigned long mmu_cr4_features;
 
 
-#ifdef	CONFIG_ACPI
-	int acpi_disabled = 0;
-#else
-	int acpi_disabled = 1;
-#endif
-EXPORT_SYMBOL(acpi_disabled);
-
-#ifdef	CONFIG_ACPI
-int __initdata acpi_force = 0;
-extern acpi_interrupt_flags	acpi_sci_flags;
-#endif
-
 /* for MCA, but anyone else can use it if they want */
 /* for MCA, but anyone else can use it if they want */
 unsigned int machine_id;
 unsigned int machine_id;
 #ifdef CONFIG_MCA
 #ifdef CONFIG_MCA
@@ -149,7 +137,6 @@ EXPORT_SYMBOL(ist_info);
 struct e820map e820;
 struct e820map e820;
 
 
 extern void early_cpu_init(void);
 extern void early_cpu_init(void);
-extern void generic_apic_probe(char *);
 extern int root_mountflags;
 extern int root_mountflags;
 
 
 unsigned long saved_videomode;
 unsigned long saved_videomode;
@@ -701,238 +688,132 @@ static inline void copy_edd(void)
 }
 }
 #endif
 #endif
 
 
-static void __init parse_cmdline_early (char ** cmdline_p)
-{
-	char c = ' ', *to = command_line, *from = saved_command_line;
-	int len = 0;
-	int userdef = 0;
+static int __initdata user_defined_memmap = 0;
 
 
-	/* Save unparsed command line copy for /proc/cmdline */
-	saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
+/*
+ * "mem=nopentium" disables the 4MB page tables.
+ * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
+ * to <mem>, overriding the bios size.
+ * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
+ * <start> to <start>+<mem>, overriding the bios size.
+ *
+ * HPA tells me bootloaders need to parse mem=, so no new
+ * option should be mem=  [also see Documentation/i386/boot.txt]
+ */
+static int __init parse_mem(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
 
 
-	for (;;) {
-		if (c != ' ')
-			goto next_char;
-		/*
-		 * "mem=nopentium" disables the 4MB page tables.
-		 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
-		 * to <mem>, overriding the bios size.
-		 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
-		 * <start> to <start>+<mem>, overriding the bios size.
-		 *
-		 * HPA tells me bootloaders need to parse mem=, so no new
-		 * option should be mem=  [also see Documentation/i386/boot.txt]
+	if (strcmp(arg, "nopentium") == 0) {
+		clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+		disable_pse = 1;
+	} else {
+		/* If the user specifies memory size, we
+		 * limit the BIOS-provided memory map to
+		 * that size. exactmap can be used to specify
+		 * the exact map. mem=number can be used to
+		 * trim the existing memory map.
 		 */
 		 */
-		if (!memcmp(from, "mem=", 4)) {
-			if (to != command_line)
-				to--;
-			if (!memcmp(from+4, "nopentium", 9)) {
-				from += 9+4;
-				clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
-				disable_pse = 1;
-			} else {
-				/* If the user specifies memory size, we
-				 * limit the BIOS-provided memory map to
-				 * that size. exactmap can be used to specify
-				 * the exact map. mem=number can be used to
-				 * trim the existing memory map.
-				 */
-				unsigned long long mem_size;
+		unsigned long long mem_size;
  
  
-				mem_size = memparse(from+4, &from);
-				limit_regions(mem_size);
-				userdef=1;
-			}
-		}
-
-		else if (!memcmp(from, "memmap=", 7)) {
-			if (to != command_line)
-				to--;
-			if (!memcmp(from+7, "exactmap", 8)) {
-#ifdef CONFIG_CRASH_DUMP
-				/* If we are doing a crash dump, we
-				 * still need to know the real mem
-				 * size before original memory map is
-				 * reset.
-				 */
-				find_max_pfn();
-				saved_max_pfn = max_pfn;
-#endif
-				from += 8+7;
-				e820.nr_map = 0;
-				userdef = 1;
-			} else {
-				/* If the user specifies memory size, we
-				 * limit the BIOS-provided memory map to
-				 * that size. exactmap can be used to specify
-				 * the exact map. mem=number can be used to
-				 * trim the existing memory map.
-				 */
-				unsigned long long start_at, mem_size;
- 
-				mem_size = memparse(from+7, &from);
-				if (*from == '@') {
-					start_at = memparse(from+1, &from);
-					add_memory_region(start_at, mem_size, E820_RAM);
-				} else if (*from == '#') {
-					start_at = memparse(from+1, &from);
-					add_memory_region(start_at, mem_size, E820_ACPI);
-				} else if (*from == '$') {
-					start_at = memparse(from+1, &from);
-					add_memory_region(start_at, mem_size, E820_RESERVED);
-				} else {
-					limit_regions(mem_size);
-					userdef=1;
-				}
-			}
-		}
-
-		else if (!memcmp(from, "noexec=", 7))
-			noexec_setup(from + 7);
+		mem_size = memparse(arg, &arg);
+		limit_regions(mem_size);
+		user_defined_memmap = 1;
+	}
+	return 0;
+}
+early_param("mem", parse_mem);
 
 
+static int __init parse_memmap(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
 
 
-#ifdef  CONFIG_X86_SMP
-		/*
-		 * If the BIOS enumerates physical processors before logical,
-		 * maxcpus=N at enumeration-time can be used to disable HT.
+	if (strcmp(arg, "exactmap") == 0) {
+#ifdef CONFIG_CRASH_DUMP
+		/* If we are doing a crash dump, we
+		 * still need to know the real mem
+		 * size before original memory map is
+		 * reset.
 		 */
 		 */
-		else if (!memcmp(from, "maxcpus=", 8)) {
-			extern unsigned int maxcpus;
-
-			maxcpus = simple_strtoul(from + 8, NULL, 0);
-		}
+		find_max_pfn();
+		saved_max_pfn = max_pfn;
 #endif
 #endif
-
-#ifdef CONFIG_ACPI
-		/* "acpi=off" disables both ACPI table parsing and interpreter */
-		else if (!memcmp(from, "acpi=off", 8)) {
-			disable_acpi();
-		}
-
-		/* acpi=force to over-ride black-list */
-		else if (!memcmp(from, "acpi=force", 10)) {
-			acpi_force = 1;
-			acpi_ht = 1;
-			acpi_disabled = 0;
-		}
-
-		/* acpi=strict disables out-of-spec workarounds */
-		else if (!memcmp(from, "acpi=strict", 11)) {
-			acpi_strict = 1;
-		}
-
-		/* Limit ACPI just to boot-time to enable HT */
-		else if (!memcmp(from, "acpi=ht", 7)) {
-			if (!acpi_force)
-				disable_acpi();
-			acpi_ht = 1;
-		}
-		
-		/* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
-		else if (!memcmp(from, "pci=noacpi", 10)) {
-			acpi_disable_pci();
-		}
-		/* "acpi=noirq" disables ACPI interrupt routing */
-		else if (!memcmp(from, "acpi=noirq", 10)) {
-			acpi_noirq_set();
+		e820.nr_map = 0;
+		user_defined_memmap = 1;
+	} else {
+		/* If the user specifies memory size, we
+		 * limit the BIOS-provided memory map to
+		 * that size. exactmap can be used to specify
+		 * the exact map. mem=number can be used to
+		 * trim the existing memory map.
+		 */
+		unsigned long long start_at, mem_size;
+
+		mem_size = memparse(arg, &arg);
+		if (*arg == '@') {
+			start_at = memparse(arg+1, &arg);
+			add_memory_region(start_at, mem_size, E820_RAM);
+		} else if (*arg == '#') {
+			start_at = memparse(arg+1, &arg);
+			add_memory_region(start_at, mem_size, E820_ACPI);
+		} else if (*arg == '$') {
+			start_at = memparse(arg+1, &arg);
+			add_memory_region(start_at, mem_size, E820_RESERVED);
+		} else {
+			limit_regions(mem_size);
+			user_defined_memmap = 1;
 		}
 		}
+	}
+	return 0;
+}
+early_param("memmap", parse_memmap);
 
 
-		else if (!memcmp(from, "acpi_sci=edge", 13))
-			acpi_sci_flags.trigger =  1;
-
-		else if (!memcmp(from, "acpi_sci=level", 14))
-			acpi_sci_flags.trigger = 3;
-
-		else if (!memcmp(from, "acpi_sci=high", 13))
-			acpi_sci_flags.polarity = 1;
-
-		else if (!memcmp(from, "acpi_sci=low", 12))
-			acpi_sci_flags.polarity = 3;
-
-#ifdef CONFIG_X86_IO_APIC
-		else if (!memcmp(from, "acpi_skip_timer_override", 24))
-			acpi_skip_timer_override = 1;
-
-		if (!memcmp(from, "disable_timer_pin_1", 19))
-			disable_timer_pin_1 = 1;
-		if (!memcmp(from, "enable_timer_pin_1", 18))
-			disable_timer_pin_1 = -1;
+#ifdef CONFIG_PROC_VMCORE
+/* elfcorehdr= specifies the location of elf core header
+ * stored by the crashed kernel.
+ */
+static int __init parse_elfcorehdr(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
 
 
-		/* disable IO-APIC */
-		else if (!memcmp(from, "noapic", 6))
-			disable_ioapic_setup();
-#endif /* CONFIG_X86_IO_APIC */
-#endif /* CONFIG_ACPI */
+	elfcorehdr_addr = memparse(arg, &arg);
+	return 0;
+}
+early_param("elfcorehdr", parse_elfcorehdr);
+#endif /* CONFIG_PROC_VMCORE */
 
 
-#ifdef CONFIG_X86_LOCAL_APIC
-		/* enable local APIC */
-		else if (!memcmp(from, "lapic", 5))
-			lapic_enable();
+/*
+ * highmem=size forces highmem to be exactly 'size' bytes.
+ * This works even on boxes that have no highmem otherwise.
+ * This also works to reduce highmem size on bigger boxes.
+ */
+static int __init parse_highmem(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
 
 
-		/* disable local APIC */
-		else if (!memcmp(from, "nolapic", 6))
-			lapic_disable();
-#endif /* CONFIG_X86_LOCAL_APIC */
+	highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
+	return 0;
+}
+early_param("highmem", parse_highmem);
 
 
-#ifdef CONFIG_KEXEC
-		/* crashkernel=size@addr specifies the location to reserve for
-		 * a crash kernel.  By reserving this memory we guarantee
-		 * that linux never set's it up as a DMA target.
-		 * Useful for holding code to do something appropriate
-		 * after a kernel panic.
-		 */
-		else if (!memcmp(from, "crashkernel=", 12)) {
-			unsigned long size, base;
-			size = memparse(from+12, &from);
-			if (*from == '@') {
-				base = memparse(from+1, &from);
-				/* FIXME: Do I want a sanity check
-				 * to validate the memory range?
-				 */
-				crashk_res.start = base;
-				crashk_res.end   = base + size - 1;
-			}
-		}
-#endif
-#ifdef CONFIG_PROC_VMCORE
-		/* elfcorehdr= specifies the location of elf core header
-		 * stored by the crashed kernel.
-		 */
-		else if (!memcmp(from, "elfcorehdr=", 11))
-			elfcorehdr_addr = memparse(from+11, &from);
-#endif
+/*
+ * vmalloc=size forces the vmalloc area to be exactly 'size'
+ * bytes. This can be used to increase (or decrease) the
+ * vmalloc area - the default is 128m.
+ */
+static int __init parse_vmalloc(char *arg)
+{
+	if (!arg)
+		return -EINVAL;
 
 
-		/*
-		 * highmem=size forces highmem to be exactly 'size' bytes.
-		 * This works even on boxes that have no highmem otherwise.
-		 * This also works to reduce highmem size on bigger boxes.
-		 */
-		else if (!memcmp(from, "highmem=", 8))
-			highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
-	
-		/*
-		 * vmalloc=size forces the vmalloc area to be exactly 'size'
-		 * bytes. This can be used to increase (or decrease) the
-		 * vmalloc area - the default is 128m.
-		 */
-		else if (!memcmp(from, "vmalloc=", 8))
-			__VMALLOC_RESERVE = memparse(from+8, &from);
-
-	next_char:
-		c = *(from++);
-		if (!c)
-			break;
-		if (COMMAND_LINE_SIZE <= ++len)
-			break;
-		*(to++) = c;
-	}
-	*to = '\0';
-	*cmdline_p = command_line;
-	if (userdef) {
-		printk(KERN_INFO "user-defined physical RAM map:\n");
-		print_memory_map("user");
-	}
+	__VMALLOC_RESERVE = memparse(arg, &arg);
+	return 0;
 }
 }
+early_param("vmalloc", parse_vmalloc);
 
 
 /*
 /*
  * reservetop=size reserves a hole at the top of the kernel address space which
  * reservetop=size reserves a hole at the top of the kernel address space which
@@ -1189,6 +1070,14 @@ static unsigned long __init setup_memory(void)
 	}
 	}
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 		pages_to_mb(highend_pfn - highstart_pfn));
 		pages_to_mb(highend_pfn - highstart_pfn));
+	num_physpages = highend_pfn;
+	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
+#else
+	num_physpages = max_low_pfn;
+	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
+#endif
+#ifdef CONFIG_FLATMEM
+	max_mapnr = num_physpages;
 #endif
 #endif
 	printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
 	printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
 			pages_to_mb(max_low_pfn));
 			pages_to_mb(max_low_pfn));
@@ -1200,22 +1089,20 @@ static unsigned long __init setup_memory(void)
 
 
 void __init zone_sizes_init(void)
 void __init zone_sizes_init(void)
 {
 {
-	unsigned long zones_size[MAX_NR_ZONES] = { 0, };
-	unsigned int max_dma, low;
-
-	max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-	low = max_low_pfn;
-
-	if (low < max_dma)
-		zones_size[ZONE_DMA] = low;
-	else {
-		zones_size[ZONE_DMA] = max_dma;
-		zones_size[ZONE_NORMAL] = low - max_dma;
 #ifdef CONFIG_HIGHMEM
 #ifdef CONFIG_HIGHMEM
-		zones_size[ZONE_HIGHMEM] = highend_pfn - low;
+	unsigned long max_zone_pfns[MAX_NR_ZONES] = {
+			virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT,
+			max_low_pfn,
+			highend_pfn};
+	add_active_range(0, 0, highend_pfn);
+#else
+	unsigned long max_zone_pfns[MAX_NR_ZONES] = {
+			virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT,
+			max_low_pfn};
+	add_active_range(0, 0, max_low_pfn);
 #endif
 #endif
-	}
-	free_area_init(zones_size);
+
+	free_area_init_nodes(max_zone_pfns);
 }
 }
 #else
 #else
 extern unsigned long __init setup_memory(void);
 extern unsigned long __init setup_memory(void);
@@ -1518,17 +1405,15 @@ void __init setup_arch(char **cmdline_p)
 	data_resource.start = virt_to_phys(_etext);
 	data_resource.start = virt_to_phys(_etext);
 	data_resource.end = virt_to_phys(_edata)-1;
 	data_resource.end = virt_to_phys(_edata)-1;
 
 
-	parse_cmdline_early(cmdline_p);
+	parse_early_param();
 
 
-#ifdef CONFIG_EARLY_PRINTK
-	{
-		char *s = strstr(*cmdline_p, "earlyprintk=");
-		if (s) {
-			setup_early_printk(strchr(s, '=') + 1);
-			printk("early console enabled\n");
-		}
+	if (user_defined_memmap) {
+		printk(KERN_INFO "user-defined physical RAM map:\n");
+		print_memory_map("user");
 	}
 	}
-#endif
+
+	strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
+	*cmdline_p = command_line;
 
 
 	max_low_pfn = setup_memory();
 	max_low_pfn = setup_memory();
 
 
@@ -1557,7 +1442,7 @@ void __init setup_arch(char **cmdline_p)
 	dmi_scan_machine();
 	dmi_scan_machine();
 
 
 #ifdef CONFIG_X86_GENERICARCH
 #ifdef CONFIG_X86_GENERICARCH
-	generic_apic_probe(*cmdline_p);
+	generic_apic_probe();
 #endif	
 #endif	
 	if (efi_enabled)
 	if (efi_enabled)
 		efi_map_memmap();
 		efi_map_memmap();
@@ -1569,9 +1454,11 @@ void __init setup_arch(char **cmdline_p)
 	acpi_boot_table_init();
 	acpi_boot_table_init();
 #endif
 #endif
 
 
+#ifdef CONFIG_PCI
 #ifdef CONFIG_X86_IO_APIC
 #ifdef CONFIG_X86_IO_APIC
 	check_acpi_pci();	/* Checks more than just ACPI actually */
 	check_acpi_pci();	/* Checks more than just ACPI actually */
 #endif
 #endif
+#endif
 
 
 #ifdef CONFIG_ACPI
 #ifdef CONFIG_ACPI
 	acpi_boot_init();
 	acpi_boot_init();

+ 18 - 1
arch/i386/kernel/smpboot.c

@@ -177,6 +177,9 @@ static void __devinit smp_store_cpu_info(int id)
 	 */
 	 */
 	if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
 	if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
 
 
+		if (num_possible_cpus() == 1)
+			goto valid_k7;
+
 		/* Athlon 660/661 is valid. */	
 		/* Athlon 660/661 is valid. */	
 		if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
 		if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
 			goto valid_k7;
 			goto valid_k7;
@@ -1376,7 +1379,8 @@ int __cpu_disable(void)
 	 */
 	 */
 	if (cpu == 0)
 	if (cpu == 0)
 		return -EBUSY;
 		return -EBUSY;
-
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		stop_apic_nmi_watchdog(NULL);
 	clear_local_APIC();
 	clear_local_APIC();
 	/* Allow any queued timer interrupts to get serviced */
 	/* Allow any queued timer interrupts to get serviced */
 	local_irq_enable();
 	local_irq_enable();
@@ -1490,3 +1494,16 @@ void __init smp_intr_init(void)
 	/* IPI for generic function call */
 	/* IPI for generic function call */
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 }
 }
+
+/*
+ * If the BIOS enumerates physical processors before logical,
+ * maxcpus=N at enumeration-time can be used to disable HT.
+ */
+static int __init parse_maxcpus(char *arg)
+{
+	extern unsigned int maxcpus;
+
+	maxcpus = simple_strtoul(arg, NULL, 0);
+	return 0;
+}
+early_param("maxcpus", parse_maxcpus);

+ 2 - 95
arch/i386/kernel/srat.c

@@ -54,8 +54,6 @@ struct node_memory_chunk_s {
 static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS];
 static struct node_memory_chunk_s node_memory_chunk[MAXCHUNKS];
 
 
 static int num_memory_chunks;		/* total number of memory chunks */
 static int num_memory_chunks;		/* total number of memory chunks */
-static int zholes_size_init;
-static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES];
 
 
 extern void * boot_ioremap(unsigned long, unsigned long);
 extern void * boot_ioremap(unsigned long, unsigned long);
 
 
@@ -135,47 +133,6 @@ static void __init parse_memory_affinity_structure (char *sratp)
 		 "enabled and removable" : "enabled" ) );
 		 "enabled and removable" : "enabled" ) );
 }
 }
 
 
-/* Take a chunk of pages from page frame cstart to cend and count the number
- * of pages in each zone, returned via zones[].
- */
-static __init void chunk_to_zones(unsigned long cstart, unsigned long cend, 
-		unsigned long *zones)
-{
-	unsigned long max_dma;
-	extern unsigned long max_low_pfn;
-
-	int z;
-	unsigned long rend;
-
-	/* FIXME: MAX_DMA_ADDRESS and max_low_pfn are trying to provide
-	 * similarly scoped information and should be handled in a consistant
-	 * manner.
-	 */
-	max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
-	/* Split the hole into the zones in which it falls.  Repeatedly
-	 * take the segment in which the remaining hole starts, round it
-	 * to the end of that zone.
-	 */
-	memset(zones, 0, MAX_NR_ZONES * sizeof(long));
-	while (cstart < cend) {
-		if (cstart < max_dma) {
-			z = ZONE_DMA;
-			rend = (cend < max_dma)? cend : max_dma;
-
-		} else if (cstart < max_low_pfn) {
-			z = ZONE_NORMAL;
-			rend = (cend < max_low_pfn)? cend : max_low_pfn;
-
-		} else {
-			z = ZONE_HIGHMEM;
-			rend = cend;
-		}
-		zones[z] += rend - cstart;
-		cstart = rend;
-	}
-}
-
 /*
 /*
  * The SRAT table always lists ascending addresses, so can always
  * The SRAT table always lists ascending addresses, so can always
  * assume that the first "start" address that you see is the real
  * assume that the first "start" address that you see is the real
@@ -220,7 +177,6 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
 
 
 	memset(pxm_bitmap, 0, sizeof(pxm_bitmap));	/* init proximity domain bitmap */
 	memset(pxm_bitmap, 0, sizeof(pxm_bitmap));	/* init proximity domain bitmap */
 	memset(node_memory_chunk, 0, sizeof(node_memory_chunk));
 	memset(node_memory_chunk, 0, sizeof(node_memory_chunk));
-	memset(zholes_size, 0, sizeof(zholes_size));
 
 
 	num_memory_chunks = 0;
 	num_memory_chunks = 0;
 	while (p < end) {
 	while (p < end) {
@@ -284,6 +240,7 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
 		printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
 		printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
 		       j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
 		       j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
 		node_read_chunk(chunk->nid, chunk);
 		node_read_chunk(chunk->nid, chunk);
+		add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn);
 	}
 	}
  
  
 	for_each_online_node(nid) {
 	for_each_online_node(nid) {
@@ -392,57 +349,7 @@ int __init get_memcfg_from_srat(void)
 		return acpi20_parse_srat((struct acpi_table_srat *)header);
 		return acpi20_parse_srat((struct acpi_table_srat *)header);
 	}
 	}
 out_err:
 out_err:
+	remove_all_active_ranges();
 	printk("failed to get NUMA memory information from SRAT table\n");
 	printk("failed to get NUMA memory information from SRAT table\n");
 	return 0;
 	return 0;
 }
 }
-
-/* For each node run the memory list to determine whether there are
- * any memory holes.  For each hole determine which ZONE they fall
- * into.
- *
- * NOTE#1: this requires knowledge of the zone boundries and so
- * _cannot_ be performed before those are calculated in setup_memory.
- * 
- * NOTE#2: we rely on the fact that the memory chunks are ordered by
- * start pfn number during setup.
- */
-static void __init get_zholes_init(void)
-{
-	int nid;
-	int c;
-	int first;
-	unsigned long end = 0;
-
-	for_each_online_node(nid) {
-		first = 1;
-		for (c = 0; c < num_memory_chunks; c++){
-			if (node_memory_chunk[c].nid == nid) {
-				if (first) {
-					end = node_memory_chunk[c].end_pfn;
-					first = 0;
-
-				} else {
-					/* Record any gap between this chunk
-					 * and the previous chunk on this node
-					 * against the zones it spans.
-					 */
-					chunk_to_zones(end,
-						node_memory_chunk[c].start_pfn,
-						&zholes_size[nid * MAX_NR_ZONES]);
-				}
-			}
-		}
-	}
-}
-
-unsigned long * __init get_zholes_size(int nid)
-{
-	if (!zholes_size_init) {
-		zholes_size_init++;
-		get_zholes_init();
-	}
-	if (nid >= MAX_NUMNODES || !node_online(nid))
-		printk("%s: nid = %d is invalid/offline. num_online_nodes = %d",
-		       __FUNCTION__, nid, num_online_nodes());
-	return &zholes_size[nid * MAX_NR_ZONES];
-}

+ 0 - 98
arch/i386/kernel/stacktrace.c

@@ -1,98 +0,0 @@
-/*
- * arch/i386/kernel/stacktrace.c
- *
- * Stack trace management functions
- *
- *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
- */
-#include <linux/sched.h>
-#include <linux/stacktrace.h>
-
-static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
-{
-	return	p > (void *)tinfo &&
-		p < (void *)tinfo + THREAD_SIZE - 3;
-}
-
-/*
- * Save stack-backtrace addresses into a stack_trace buffer:
- */
-static inline unsigned long
-save_context_stack(struct stack_trace *trace, unsigned int skip,
-		   struct thread_info *tinfo, unsigned long *stack,
-		   unsigned long ebp)
-{
-	unsigned long addr;
-
-#ifdef CONFIG_FRAME_POINTER
-	while (valid_stack_ptr(tinfo, (void *)ebp)) {
-		addr = *(unsigned long *)(ebp + 4);
-		if (!skip)
-			trace->entries[trace->nr_entries++] = addr;
-		else
-			skip--;
-		if (trace->nr_entries >= trace->max_entries)
-			break;
-		/*
-		 * break out of recursive entries (such as
-		 * end_of_stack_stop_unwind_function):
-	 	 */
-		if (ebp == *(unsigned long *)ebp)
-			break;
-
-		ebp = *(unsigned long *)ebp;
-	}
-#else
-	while (valid_stack_ptr(tinfo, stack)) {
-		addr = *stack++;
-		if (__kernel_text_address(addr)) {
-			if (!skip)
-				trace->entries[trace->nr_entries++] = addr;
-			else
-				skip--;
-			if (trace->nr_entries >= trace->max_entries)
-				break;
-		}
-	}
-#endif
-
-	return ebp;
-}
-
-/*
- * Save stack-backtrace addresses into a stack_trace buffer.
- * If all_contexts is set, all contexts (hardirq, softirq and process)
- * are saved. If not set then only the current context is saved.
- */
-void save_stack_trace(struct stack_trace *trace,
-		      struct task_struct *task, int all_contexts,
-		      unsigned int skip)
-{
-	unsigned long ebp;
-	unsigned long *stack = &ebp;
-
-	WARN_ON(trace->nr_entries || !trace->max_entries);
-
-	if (!task || task == current) {
-		/* Grab ebp right from our regs: */
-		asm ("movl %%ebp, %0" : "=r" (ebp));
-	} else {
-		/* ebp is the last reg pushed by switch_to(): */
-		ebp = *(unsigned long *) task->thread.esp;
-	}
-
-	while (1) {
-		struct thread_info *context = (struct thread_info *)
-				((unsigned long)stack & (~(THREAD_SIZE - 1)));
-
-		ebp = save_context_stack(trace, skip, context, stack, ebp);
-		stack = (unsigned long *)context->previous_esp;
-		if (!all_contexts || !stack ||
-				trace->nr_entries >= trace->max_entries)
-			break;
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
-		if (trace->nr_entries >= trace->max_entries)
-			break;
-	}
-}
-

+ 1 - 0
arch/i386/kernel/syscall_table.S

@@ -317,3 +317,4 @@ ENTRY(sys_call_table)
 	.long sys_tee			/* 315 */
 	.long sys_tee			/* 315 */
 	.long sys_vmsplice
 	.long sys_vmsplice
 	.long sys_move_pages
 	.long sys_move_pages
+	.long sys_getcpu

+ 19 - 4
arch/i386/kernel/time.c

@@ -130,18 +130,33 @@ static int set_rtc_mmss(unsigned long nowtime)
 
 
 int timer_ack;
 int timer_ack;
 
 
-#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
 unsigned long profile_pc(struct pt_regs *regs)
 unsigned long profile_pc(struct pt_regs *regs)
 {
 {
 	unsigned long pc = instruction_pointer(regs);
 	unsigned long pc = instruction_pointer(regs);
 
 
-	if (!user_mode_vm(regs) && in_lock_functions(pc))
+#ifdef CONFIG_SMP
+	if (!user_mode_vm(regs) && in_lock_functions(pc)) {
+#ifdef CONFIG_FRAME_POINTER
 		return *(unsigned long *)(regs->ebp + 4);
 		return *(unsigned long *)(regs->ebp + 4);
-
+#else
+		unsigned long *sp;
+		if ((regs->xcs & 3) == 0)
+			sp = (unsigned long *)&regs->esp;
+		else
+			sp = (unsigned long *)regs->esp;
+		/* Return address is either directly at stack pointer
+		   or above a saved eflags. Eflags has bits 22-31 zero,
+		   kernel addresses don't. */
+ 		if (sp[0] >> 22)
+			return sp[0];
+		if (sp[1] >> 22)
+			return sp[1];
+#endif
+	}
+#endif
 	return pc;
 	return pc;
 }
 }
 EXPORT_SYMBOL(profile_pc);
 EXPORT_SYMBOL(profile_pc);
-#endif
 
 
 /*
 /*
  * This is the same as the above, except we _also_ save the current
  * This is the same as the above, except we _also_ save the current

+ 3 - 18
arch/i386/kernel/topology.c

@@ -28,6 +28,7 @@
 #include <linux/init.h>
 #include <linux/init.h>
 #include <linux/smp.h>
 #include <linux/smp.h>
 #include <linux/nodemask.h>
 #include <linux/nodemask.h>
+#include <linux/mmzone.h>
 #include <asm/cpu.h>
 #include <asm/cpu.h>
 
 
 static struct i386_cpu cpu_devices[NR_CPUS];
 static struct i386_cpu cpu_devices[NR_CPUS];
@@ -55,34 +56,18 @@ EXPORT_SYMBOL(arch_register_cpu);
 EXPORT_SYMBOL(arch_unregister_cpu);
 EXPORT_SYMBOL(arch_unregister_cpu);
 #endif /*CONFIG_HOTPLUG_CPU*/
 #endif /*CONFIG_HOTPLUG_CPU*/
 
 
-
-
-#ifdef CONFIG_NUMA
-#include <linux/mmzone.h>
-
 static int __init topology_init(void)
 static int __init topology_init(void)
 {
 {
 	int i;
 	int i;
 
 
+#ifdef CONFIG_NUMA
 	for_each_online_node(i)
 	for_each_online_node(i)
 		register_one_node(i);
 		register_one_node(i);
+#endif /* CONFIG_NUMA */
 
 
 	for_each_present_cpu(i)
 	for_each_present_cpu(i)
 		arch_register_cpu(i);
 		arch_register_cpu(i);
 	return 0;
 	return 0;
 }
 }
 
 
-#else /* !CONFIG_NUMA */
-
-static int __init topology_init(void)
-{
-	int i;
-
-	for_each_present_cpu(i)
-		arch_register_cpu(i);
-	return 0;
-}
-
-#endif /* CONFIG_NUMA */
-
 subsys_initcall(topology_init);
 subsys_initcall(topology_init);

+ 134 - 97
arch/i386/kernel/traps.c

@@ -28,6 +28,7 @@
 #include <linux/kprobes.h>
 #include <linux/kprobes.h>
 #include <linux/kexec.h>
 #include <linux/kexec.h>
 #include <linux/unwind.h>
 #include <linux/unwind.h>
+#include <linux/uaccess.h>
 
 
 #ifdef CONFIG_EISA
 #ifdef CONFIG_EISA
 #include <linux/ioport.h>
 #include <linux/ioport.h>
@@ -40,7 +41,6 @@
 
 
 #include <asm/processor.h>
 #include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/system.h>
-#include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/io.h>
 #include <asm/atomic.h>
 #include <asm/atomic.h>
 #include <asm/debugreg.h>
 #include <asm/debugreg.h>
@@ -51,6 +51,7 @@
 #include <asm/smp.h>
 #include <asm/smp.h>
 #include <asm/arch_hooks.h>
 #include <asm/arch_hooks.h>
 #include <asm/kdebug.h>
 #include <asm/kdebug.h>
+#include <asm/stacktrace.h>
 
 
 #include <linux/module.h>
 #include <linux/module.h>
 
 
@@ -118,26 +119,16 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
 		p < (void *)tinfo + THREAD_SIZE - 3;
 		p < (void *)tinfo + THREAD_SIZE - 3;
 }
 }
 
 
-/*
- * Print one address/symbol entries per line.
- */
-static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
-{
-	printk(" [<%08lx>] ", addr);
-
-	print_symbol("%s\n", addr);
-}
-
 static inline unsigned long print_context_stack(struct thread_info *tinfo,
 static inline unsigned long print_context_stack(struct thread_info *tinfo,
 				unsigned long *stack, unsigned long ebp,
 				unsigned long *stack, unsigned long ebp,
-				char *log_lvl)
+				struct stacktrace_ops *ops, void *data)
 {
 {
 	unsigned long addr;
 	unsigned long addr;
 
 
 #ifdef	CONFIG_FRAME_POINTER
 #ifdef	CONFIG_FRAME_POINTER
 	while (valid_stack_ptr(tinfo, (void *)ebp)) {
 	while (valid_stack_ptr(tinfo, (void *)ebp)) {
 		addr = *(unsigned long *)(ebp + 4);
 		addr = *(unsigned long *)(ebp + 4);
-		print_addr_and_symbol(addr, log_lvl);
+		ops->address(data, addr);
 		/*
 		/*
 		 * break out of recursive entries (such as
 		 * break out of recursive entries (such as
 		 * end_of_stack_stop_unwind_function):
 		 * end_of_stack_stop_unwind_function):
@@ -150,30 +141,37 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 	while (valid_stack_ptr(tinfo, stack)) {
 	while (valid_stack_ptr(tinfo, stack)) {
 		addr = *stack++;
 		addr = *stack++;
 		if (__kernel_text_address(addr))
 		if (__kernel_text_address(addr))
-			print_addr_and_symbol(addr, log_lvl);
+			ops->address(data, addr);
 	}
 	}
 #endif
 #endif
 	return ebp;
 	return ebp;
 }
 }
 
 
+struct ops_and_data {
+	struct stacktrace_ops *ops;
+	void *data;
+};
+
 static asmlinkage int
 static asmlinkage int
-show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
+dump_trace_unwind(struct unwind_frame_info *info, void *data)
 {
 {
+	struct ops_and_data *oad = (struct ops_and_data *)data;
 	int n = 0;
 	int n = 0;
 
 
 	while (unwind(info) == 0 && UNW_PC(info)) {
 	while (unwind(info) == 0 && UNW_PC(info)) {
 		n++;
 		n++;
-		print_addr_and_symbol(UNW_PC(info), log_lvl);
+		oad->ops->address(oad->data, UNW_PC(info));
 		if (arch_unw_user_mode(info))
 		if (arch_unw_user_mode(info))
 			break;
 			break;
 	}
 	}
 	return n;
 	return n;
 }
 }
 
 
-static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-			       unsigned long *stack, char *log_lvl)
+void dump_trace(struct task_struct *task, struct pt_regs *regs,
+	        unsigned long *stack,
+		struct stacktrace_ops *ops, void *data)
 {
 {
-	unsigned long ebp;
+	unsigned long ebp = 0;
 
 
 	if (!task)
 	if (!task)
 		task = current;
 		task = current;
@@ -181,54 +179,116 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
 	if (call_trace >= 0) {
 	if (call_trace >= 0) {
 		int unw_ret = 0;
 		int unw_ret = 0;
 		struct unwind_frame_info info;
 		struct unwind_frame_info info;
+		struct ops_and_data oad = { .ops = ops, .data = data };
 
 
 		if (regs) {
 		if (regs) {
 			if (unwind_init_frame_info(&info, task, regs) == 0)
 			if (unwind_init_frame_info(&info, task, regs) == 0)
-				unw_ret = show_trace_unwind(&info, log_lvl);
+				unw_ret = dump_trace_unwind(&info, &oad);
 		} else if (task == current)
 		} else if (task == current)
-			unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
+			unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
 		else {
 		else {
 			if (unwind_init_blocked(&info, task) == 0)
 			if (unwind_init_blocked(&info, task) == 0)
-				unw_ret = show_trace_unwind(&info, log_lvl);
+				unw_ret = dump_trace_unwind(&info, &oad);
 		}
 		}
 		if (unw_ret > 0) {
 		if (unw_ret > 0) {
 			if (call_trace == 1 && !arch_unw_user_mode(&info)) {
 			if (call_trace == 1 && !arch_unw_user_mode(&info)) {
-				print_symbol("DWARF2 unwinder stuck at %s\n",
+				ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
 					     UNW_PC(&info));
 					     UNW_PC(&info));
 				if (UNW_SP(&info) >= PAGE_OFFSET) {
 				if (UNW_SP(&info) >= PAGE_OFFSET) {
-					printk("Leftover inexact backtrace:\n");
+					ops->warning(data, "Leftover inexact backtrace:\n");
 					stack = (void *)UNW_SP(&info);
 					stack = (void *)UNW_SP(&info);
+					if (!stack)
+						return;
+					ebp = UNW_FP(&info);
 				} else
 				} else
-					printk("Full inexact backtrace again:\n");
+					ops->warning(data, "Full inexact backtrace again:\n");
 			} else if (call_trace >= 1)
 			} else if (call_trace >= 1)
 				return;
 				return;
 			else
 			else
-				printk("Full inexact backtrace again:\n");
+				ops->warning(data, "Full inexact backtrace again:\n");
 		} else
 		} else
-			printk("Inexact backtrace:\n");
+			ops->warning(data, "Inexact backtrace:\n");
+	}
+	if (!stack) {
+		unsigned long dummy;
+		stack = &dummy;
+		if (task && task != current)
+			stack = (unsigned long *)task->thread.esp;
 	}
 	}
 
 
-	if (task == current) {
-		/* Grab ebp right from our regs */
-		asm ("movl %%ebp, %0" : "=r" (ebp) : );
-	} else {
-		/* ebp is the last reg pushed by switch_to */
-		ebp = *(unsigned long *) task->thread.esp;
+#ifdef CONFIG_FRAME_POINTER
+	if (!ebp) {
+		if (task == current) {
+			/* Grab ebp right from our regs */
+			asm ("movl %%ebp, %0" : "=r" (ebp) : );
+		} else {
+			/* ebp is the last reg pushed by switch_to */
+			ebp = *(unsigned long *) task->thread.esp;
+		}
 	}
 	}
+#endif
 
 
 	while (1) {
 	while (1) {
 		struct thread_info *context;
 		struct thread_info *context;
 		context = (struct thread_info *)
 		context = (struct thread_info *)
 			((unsigned long)stack & (~(THREAD_SIZE - 1)));
 			((unsigned long)stack & (~(THREAD_SIZE - 1)));
-		ebp = print_context_stack(context, stack, ebp, log_lvl);
+		ebp = print_context_stack(context, stack, ebp, ops, data);
+		/* Should be after the line below, but somewhere
+		   in early boot context comes out corrupted and we
+		   can't reference it -AK */
+		if (ops->stack(data, "IRQ") < 0)
+			break;
 		stack = (unsigned long*)context->previous_esp;
 		stack = (unsigned long*)context->previous_esp;
 		if (!stack)
 		if (!stack)
 			break;
 			break;
-		printk("%s =======================\n", log_lvl);
 	}
 	}
 }
 }
+EXPORT_SYMBOL(dump_trace);
+
+static void
+print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+	printk(data);
+	print_symbol(msg, symbol);
+	printk("\n");
+}
+
+static void print_trace_warning(void *data, char *msg)
+{
+	printk("%s%s\n", (char *)data, msg);
+}
+
+static int print_trace_stack(void *data, char *name)
+{
+	return 0;
+}
+
+/*
+ * Print one address/symbol entries per line.
+ */
+static void print_trace_address(void *data, unsigned long addr)
+{
+	printk("%s [<%08lx>] ", (char *)data, addr);
+	print_symbol("%s\n", addr);
+}
+
+static struct stacktrace_ops print_trace_ops = {
+	.warning = print_trace_warning,
+	.warning_symbol = print_trace_warning_symbol,
+	.stack = print_trace_stack,
+	.address = print_trace_address,
+};
+
+static void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+		   unsigned long * stack, char *log_lvl)
+{
+	dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
+	printk("%s =======================\n", log_lvl);
+}
 
 
-void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
+void show_trace(struct task_struct *task, struct pt_regs *regs,
+		unsigned long * stack)
 {
 {
 	show_trace_log_lvl(task, regs, stack, "");
 	show_trace_log_lvl(task, regs, stack, "");
 }
 }
@@ -291,8 +351,9 @@ void show_registers(struct pt_regs *regs)
 		ss = regs->xss & 0xffff;
 		ss = regs->xss & 0xffff;
 	}
 	}
 	print_modules();
 	print_modules();
-	printk(KERN_EMERG "CPU:    %d\nEIP:    %04x:[<%08lx>]    %s VLI\n"
-			"EFLAGS: %08lx   (%s %.*s) \n",
+	printk(KERN_EMERG "CPU:    %d\n"
+		KERN_EMERG "EIP:    %04x:[<%08lx>]    %s VLI\n"
+		KERN_EMERG "EFLAGS: %08lx   (%s %.*s)\n",
 		smp_processor_id(), 0xffff & regs->xcs, regs->eip,
 		smp_processor_id(), 0xffff & regs->xcs, regs->eip,
 		print_tainted(), regs->eflags, system_utsname.release,
 		print_tainted(), regs->eflags, system_utsname.release,
 		(int)strcspn(system_utsname.version, " "),
 		(int)strcspn(system_utsname.version, " "),
@@ -348,7 +409,7 @@ static void handle_BUG(struct pt_regs *regs)
 
 
 	if (eip < PAGE_OFFSET)
 	if (eip < PAGE_OFFSET)
 		return;
 		return;
-	if (__get_user(ud2, (unsigned short __user *)eip))
+	if (probe_kernel_address((unsigned short __user *)eip, ud2))
 		return;
 		return;
 	if (ud2 != 0x0b0f)
 	if (ud2 != 0x0b0f)
 		return;
 		return;
@@ -361,7 +422,8 @@ static void handle_BUG(struct pt_regs *regs)
 		char *file;
 		char *file;
 		char c;
 		char c;
 
 
-		if (__get_user(line, (unsigned short __user *)(eip + 2)))
+		if (probe_kernel_address((unsigned short __user *)(eip + 2),
+					line))
 			break;
 			break;
 		if (__get_user(file, (char * __user *)(eip + 4)) ||
 		if (__get_user(file, (char * __user *)(eip + 4)) ||
 		    (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
 		    (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
@@ -634,18 +696,24 @@ gp_in_kernel:
 	}
 	}
 }
 }
 
 
-static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
+static __kprobes void
+mem_parity_error(unsigned char reason, struct pt_regs * regs)
 {
 {
-	printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying "
-			"to continue\n");
+	printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
+		"CPU %d.\n", reason, smp_processor_id());
 	printk(KERN_EMERG "You probably have a hardware problem with your RAM "
 	printk(KERN_EMERG "You probably have a hardware problem with your RAM "
 			"chips\n");
 			"chips\n");
+	if (panic_on_unrecovered_nmi)
+                panic("NMI: Not continuing");
+
+	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
 
 
 	/* Clear and disable the memory parity error line. */
 	/* Clear and disable the memory parity error line. */
 	clear_mem_error(reason);
 	clear_mem_error(reason);
 }
 }
 
 
-static void io_check_error(unsigned char reason, struct pt_regs * regs)
+static __kprobes void
+io_check_error(unsigned char reason, struct pt_regs * regs)
 {
 {
 	unsigned long i;
 	unsigned long i;
 
 
@@ -661,7 +729,8 @@ static void io_check_error(unsigned char reason, struct pt_regs * regs)
 	outb(reason, 0x61);
 	outb(reason, 0x61);
 }
 }
 
 
-static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+static __kprobes void
+unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
 {
 {
 #ifdef CONFIG_MCA
 #ifdef CONFIG_MCA
 	/* Might actually be able to figure out what the guilty party
 	/* Might actually be able to figure out what the guilty party
@@ -671,15 +740,18 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
 		return;
 		return;
 	}
 	}
 #endif
 #endif
-	printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
-		reason, smp_processor_id());
-	printk("Dazed and confused, but trying to continue\n");
-	printk("Do you have a strange power saving mode enabled?\n");
+	printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
+		"CPU %d.\n", reason, smp_processor_id());
+	printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
+	if (panic_on_unrecovered_nmi)
+                panic("NMI: Not continuing");
+
+	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
 }
 }
 
 
 static DEFINE_SPINLOCK(nmi_print_lock);
 static DEFINE_SPINLOCK(nmi_print_lock);
 
 
-void die_nmi (struct pt_regs *regs, const char *msg)
+void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
 {
 {
 	if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
 	if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
 	    NOTIFY_STOP)
 	    NOTIFY_STOP)
@@ -711,7 +783,7 @@ void die_nmi (struct pt_regs *regs, const char *msg)
 	do_exit(SIGSEGV);
 	do_exit(SIGSEGV);
 }
 }
 
 
-static void default_do_nmi(struct pt_regs * regs)
+static __kprobes void default_do_nmi(struct pt_regs * regs)
 {
 {
 	unsigned char reason = 0;
 	unsigned char reason = 0;
 
 
@@ -728,12 +800,12 @@ static void default_do_nmi(struct pt_regs * regs)
 		 * Ok, so this is none of the documented NMI sources,
 		 * Ok, so this is none of the documented NMI sources,
 		 * so it must be the NMI watchdog.
 		 * so it must be the NMI watchdog.
 		 */
 		 */
-		if (nmi_watchdog) {
-			nmi_watchdog_tick(regs);
+		if (nmi_watchdog_tick(regs, reason))
 			return;
 			return;
-		}
+		if (!do_nmi_callback(regs, smp_processor_id()))
 #endif
 #endif
-		unknown_nmi_error(reason, regs);
+			unknown_nmi_error(reason, regs);
+
 		return;
 		return;
 	}
 	}
 	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
 	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
@@ -749,14 +821,7 @@ static void default_do_nmi(struct pt_regs * regs)
 	reassert_nmi();
 	reassert_nmi();
 }
 }
 
 
-static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
-{
-	return 0;
-}
- 
-static nmi_callback_t nmi_callback = dummy_nmi_callback;
- 
-fastcall void do_nmi(struct pt_regs * regs, long error_code)
+fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
 {
 {
 	int cpu;
 	int cpu;
 
 
@@ -766,25 +831,11 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
 
 
 	++nmi_count(cpu);
 	++nmi_count(cpu);
 
 
-	if (!rcu_dereference(nmi_callback)(regs, cpu))
-		default_do_nmi(regs);
+	default_do_nmi(regs);
 
 
 	nmi_exit();
 	nmi_exit();
 }
 }
 
 
-void set_nmi_callback(nmi_callback_t callback)
-{
-	vmalloc_sync_all();
-	rcu_assign_pointer(nmi_callback, callback);
-}
-EXPORT_SYMBOL_GPL(set_nmi_callback);
-
-void unset_nmi_callback(void)
-{
-	nmi_callback = dummy_nmi_callback;
-}
-EXPORT_SYMBOL_GPL(unset_nmi_callback);
-
 #ifdef CONFIG_KPROBES
 #ifdef CONFIG_KPROBES
 fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
 fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
 {
 {
@@ -1124,20 +1175,6 @@ void __init trap_init_f00f_bug(void)
 }
 }
 #endif
 #endif
 
 
-#define _set_gate(gate_addr,type,dpl,addr,seg) \
-do { \
-  int __d0, __d1; \
-  __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
-	"movw %4,%%dx\n\t" \
-	"movl %%eax,%0\n\t" \
-	"movl %%edx,%1" \
-	:"=m" (*((long *) (gate_addr))), \
-	 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
-	:"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
-	 "3" ((char *) (addr)),"2" ((seg) << 16)); \
-} while (0)
-
-
 /*
 /*
  * This needs to use 'idt_table' rather than 'idt', and
  * This needs to use 'idt_table' rather than 'idt', and
  * thus use the _nonmapped_ version of the IDT, as the
  * thus use the _nonmapped_ version of the IDT, as the
@@ -1146,7 +1183,7 @@ do { \
  */
  */
 void set_intr_gate(unsigned int n, void *addr)
 void set_intr_gate(unsigned int n, void *addr)
 {
 {
-	_set_gate(idt_table+n,14,0,addr,__KERNEL_CS);
+	_set_gate(n, DESCTYPE_INT, addr, __KERNEL_CS);
 }
 }
 
 
 /*
 /*
@@ -1154,22 +1191,22 @@ void set_intr_gate(unsigned int n, void *addr)
  */
  */
 static inline void set_system_intr_gate(unsigned int n, void *addr)
 static inline void set_system_intr_gate(unsigned int n, void *addr)
 {
 {
-	_set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS);
+	_set_gate(n, DESCTYPE_INT | DESCTYPE_DPL3, addr, __KERNEL_CS);
 }
 }
 
 
 static void __init set_trap_gate(unsigned int n, void *addr)
 static void __init set_trap_gate(unsigned int n, void *addr)
 {
 {
-	_set_gate(idt_table+n,15,0,addr,__KERNEL_CS);
+	_set_gate(n, DESCTYPE_TRAP, addr, __KERNEL_CS);
 }
 }
 
 
 static void __init set_system_gate(unsigned int n, void *addr)
 static void __init set_system_gate(unsigned int n, void *addr)
 {
 {
-	_set_gate(idt_table+n,15,3,addr,__KERNEL_CS);
+	_set_gate(n, DESCTYPE_TRAP | DESCTYPE_DPL3, addr, __KERNEL_CS);
 }
 }
 
 
 static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
 static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
 {
 {
-	_set_gate(idt_table+n,5,0,0,(gdt_entry<<3));
+	_set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3));
 }
 }
 
 
 
 

+ 1 - 1
arch/i386/kernel/tsc.c

@@ -192,7 +192,7 @@ int recalibrate_cpu_khz(void)
 
 
 EXPORT_SYMBOL(recalibrate_cpu_khz);
 EXPORT_SYMBOL(recalibrate_cpu_khz);
 
 
-void tsc_init(void)
+void __init tsc_init(void)
 {
 {
 	if (!cpu_has_tsc || tsc_disable)
 	if (!cpu_has_tsc || tsc_disable)
 		return;
 		return;

+ 1 - 1
arch/i386/lib/Makefile

@@ -4,6 +4,6 @@
 
 
 
 
 lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
 lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
-	bitops.o
+	bitops.o semaphore.o
 
 
 lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
 lib-$(CONFIG_X86_USE_3DNOW) += mmx.o

+ 217 - 0
arch/i386/lib/semaphore.S

@@ -0,0 +1,217 @@
+/*
+ * i386 semaphore implementation.
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ *
+ * Portions Copyright 1999 Red Hat, Inc.
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
+ */
+
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/rwlock.h>
+#include <asm/alternative-asm.i>
+#include <asm/frame.i>
+#include <asm/dwarf2.h>
+
+/*
+ * The semaphore operations have a special calling sequence that
+ * allow us to do a simpler in-line version of them. These routines
+ * need to convert that sequence back into the C sequence when
+ * there is contention on the semaphore.
+ *
+ * %eax contains the semaphore pointer on entry. Save the C-clobbered
+ * registers (%eax, %edx and %ecx) except %eax whish is either a return
+ * value or just clobbered..
+ */
+	.section .sched.text
+ENTRY(__down_failed)
+	CFI_STARTPROC
+	FRAME
+	pushl %edx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET edx,0
+	pushl %ecx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ecx,0
+	call __down
+	popl %ecx
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE ecx
+	popl %edx
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE edx
+	ENDFRAME
+	ret
+	CFI_ENDPROC
+	END(__down_failed)
+
+ENTRY(__down_failed_interruptible)
+	CFI_STARTPROC
+	FRAME
+	pushl %edx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET edx,0
+	pushl %ecx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ecx,0
+	call __down_interruptible
+	popl %ecx
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE ecx
+	popl %edx
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE edx
+	ENDFRAME
+	ret
+	CFI_ENDPROC
+	END(__down_failed_interruptible)
+
+ENTRY(__down_failed_trylock)
+	CFI_STARTPROC
+	FRAME
+	pushl %edx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET edx,0
+	pushl %ecx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ecx,0
+	call __down_trylock
+	popl %ecx
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE ecx
+	popl %edx
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE edx
+	ENDFRAME
+	ret
+	CFI_ENDPROC
+	END(__down_failed_trylock)
+
+ENTRY(__up_wakeup)
+	CFI_STARTPROC
+	FRAME
+	pushl %edx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET edx,0
+	pushl %ecx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ecx,0
+	call __up
+	popl %ecx
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE ecx
+	popl %edx
+	CFI_ADJUST_CFA_OFFSET -4
+	CFI_RESTORE edx
+	ENDFRAME
+	ret
+	CFI_ENDPROC
+	END(__up_wakeup)
+
+/*
+ * rw spinlock fallbacks
+ */
+#ifdef CONFIG_SMP
+ENTRY(__write_lock_failed)
+	CFI_STARTPROC simple
+	FRAME
+2: 	LOCK_PREFIX
+	addl	$ RW_LOCK_BIAS,(%eax)
+1:	rep; nop
+	cmpl	$ RW_LOCK_BIAS,(%eax)
+	jne	1b
+	LOCK_PREFIX
+	subl	$ RW_LOCK_BIAS,(%eax)
+	jnz	2b
+	ENDFRAME
+	ret
+	CFI_ENDPROC
+	END(__write_lock_failed)
+
+ENTRY(__read_lock_failed)
+	CFI_STARTPROC
+	FRAME
+2: 	LOCK_PREFIX
+	incl	(%eax)
+1:	rep; nop
+	cmpl	$1,(%eax)
+	js	1b
+	LOCK_PREFIX
+	decl	(%eax)
+	js	2b
+	ENDFRAME
+	ret
+	CFI_ENDPROC
+	END(__read_lock_failed)
+
+#endif
+
+/* Fix up special calling conventions */
+ENTRY(call_rwsem_down_read_failed)
+	CFI_STARTPROC
+	push %ecx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ecx,0
+	push %edx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET edx,0
+	call rwsem_down_read_failed
+	pop %edx
+	CFI_ADJUST_CFA_OFFSET -4
+	pop %ecx
+	CFI_ADJUST_CFA_OFFSET -4
+	ret
+	CFI_ENDPROC
+	END(call_rwsem_down_read_failed)
+
+ENTRY(call_rwsem_down_write_failed)
+	CFI_STARTPROC
+	push %ecx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ecx,0
+	calll rwsem_down_write_failed
+	pop %ecx
+	CFI_ADJUST_CFA_OFFSET -4
+	ret
+	CFI_ENDPROC
+	END(call_rwsem_down_write_failed)
+
+ENTRY(call_rwsem_wake)
+	CFI_STARTPROC
+	decw %dx    /* do nothing if still outstanding active readers */
+	jnz 1f
+	push %ecx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ecx,0
+	call rwsem_wake
+	pop %ecx
+	CFI_ADJUST_CFA_OFFSET -4
+1:	ret
+	CFI_ENDPROC
+	END(call_rwsem_wake)
+
+/* Fix up special calling conventions */
+ENTRY(call_rwsem_downgrade_wake)
+	CFI_STARTPROC
+	push %ecx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET ecx,0
+	push %edx
+	CFI_ADJUST_CFA_OFFSET 4
+	CFI_REL_OFFSET edx,0
+	call rwsem_downgrade_wake
+	pop %edx
+	CFI_ADJUST_CFA_OFFSET -4
+	pop %ecx
+	CFI_ADJUST_CFA_OFFSET -4
+	ret
+	CFI_ENDPROC
+	END(call_rwsem_downgrade_wake)
+

+ 1 - 0
arch/i386/mach-generic/bigsmp.c

@@ -5,6 +5,7 @@
 #define APIC_DEFINITION 1
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/cpumask.h>
+#include <asm/smp.h>
 #include <asm/mpspec.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/fixmap.h>

+ 1 - 0
arch/i386/mach-generic/es7000.c

@@ -4,6 +4,7 @@
 #define APIC_DEFINITION 1
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/cpumask.h>
+#include <asm/smp.h>
 #include <asm/mpspec.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/fixmap.h>

+ 30 - 30
arch/i386/mach-generic/probe.c

@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/ctype.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/init.h>
+#include <linux/errno.h>
 #include <asm/fixmap.h>
 #include <asm/fixmap.h>
 #include <asm/mpspec.h>
 #include <asm/mpspec.h>
 #include <asm/apicdef.h>
 #include <asm/apicdef.h>
@@ -29,7 +30,24 @@ struct genapic *apic_probe[] __initdata = {
 	NULL,
 	NULL,
 };
 };
 
 
-static int cmdline_apic;
+static int cmdline_apic __initdata;
+static int __init parse_apic(char *arg)
+{
+	int i;
+
+	if (!arg)
+		return -EINVAL;
+
+	for (i = 0; apic_probe[i]; i++) {
+		if (!strcmp(apic_probe[i]->name, arg)) {
+			genapic = apic_probe[i];
+			cmdline_apic = 1;
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+early_param("apic", parse_apic);
 
 
 void __init generic_bigsmp_probe(void)
 void __init generic_bigsmp_probe(void)
 {
 {
@@ -48,40 +66,20 @@ void __init generic_bigsmp_probe(void)
 		}
 		}
 }
 }
 
 
-void __init generic_apic_probe(char *command_line) 
+void __init generic_apic_probe(void)
 { 
 { 
-	char *s;
-	int i;
-	int changed = 0;
-
-	s = strstr(command_line, "apic=");
-	if (s && (s == command_line || isspace(s[-1]))) { 
-		char *p = strchr(s, ' '), old; 
-		if (!p)
-			p = strchr(s, '\0'); 
-		old = *p; 
-		*p = 0; 
-		for (i = 0; !changed && apic_probe[i]; i++) {
-			if (!strcmp(apic_probe[i]->name, s+5)) { 
-				changed = 1;
+	if (!cmdline_apic) {
+		int i;
+		for (i = 0; apic_probe[i]; i++) {
+			if (apic_probe[i]->probe()) {
 				genapic = apic_probe[i];
 				genapic = apic_probe[i];
+				break;
 			}
 			}
 		}
 		}
-		if (!changed)
-			printk(KERN_ERR "Unknown genapic `%s' specified.\n", s);
-		*p = old;
-		cmdline_apic = changed;
-	} 
-	for (i = 0; !changed && apic_probe[i]; i++) { 
-		if (apic_probe[i]->probe()) {
-			changed = 1;
-			genapic = apic_probe[i]; 
-		} 
+		/* Not visible without early console */
+		if (!apic_probe[i])
+			panic("Didn't find an APIC driver");
 	}
 	}
-	/* Not visible without early console */ 
-	if (!changed) 
-		panic("Didn't find an APIC driver"); 
-
 	printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
 	printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
 } 
 } 
 
 
@@ -119,7 +117,9 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;	
 	return 0;	
 }
 }
 
 
+#ifdef CONFIG_SMP
 int hard_smp_processor_id(void)
 int hard_smp_processor_id(void)
 {
 {
 	return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID));
 	return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID));
 }
 }
+#endif

+ 1 - 0
arch/i386/mach-generic/summit.c

@@ -4,6 +4,7 @@
 #define APIC_DEFINITION 1
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/cpumask.h>
+#include <asm/smp.h>
 #include <asm/mpspec.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/fixmap.h>

+ 22 - 52
arch/i386/mm/discontig.c

@@ -157,21 +157,6 @@ static void __init find_max_pfn_node(int nid)
 		BUG();
 		BUG();
 }
 }
 
 
-/* Find the owning node for a pfn. */
-int early_pfn_to_nid(unsigned long pfn)
-{
-	int nid;
-
-	for_each_node(nid) {
-		if (node_end_pfn[nid] == 0)
-			break;
-		if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn)
-			return nid;
-	}
-
-	return 0;
-}
-
 /* 
 /* 
  * Allocate memory for the pg_data_t for this node via a crude pre-bootmem
  * Allocate memory for the pg_data_t for this node via a crude pre-bootmem
  * method.  For node zero take this from the bottom of memory, for
  * method.  For node zero take this from the bottom of memory, for
@@ -227,6 +212,8 @@ static unsigned long calculate_numa_remap_pages(void)
 	unsigned long pfn;
 	unsigned long pfn;
 
 
 	for_each_online_node(nid) {
 	for_each_online_node(nid) {
+		unsigned old_end_pfn = node_end_pfn[nid];
+
 		/*
 		/*
 		 * The acpi/srat node info can show hot-add memroy zones
 		 * The acpi/srat node info can show hot-add memroy zones
 		 * where memory could be added but not currently present.
 		 * where memory could be added but not currently present.
@@ -276,6 +263,7 @@ static unsigned long calculate_numa_remap_pages(void)
 
 
 		node_end_pfn[nid] -= size;
 		node_end_pfn[nid] -= size;
 		node_remap_start_pfn[nid] = node_end_pfn[nid];
 		node_remap_start_pfn[nid] = node_end_pfn[nid];
+		shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]);
 	}
 	}
 	printk("Reserving total of %ld pages for numa KVA remap\n",
 	printk("Reserving total of %ld pages for numa KVA remap\n",
 			reserve_pages);
 			reserve_pages);
@@ -322,6 +310,11 @@ unsigned long __init setup_memory(void)
 		highstart_pfn = system_max_low_pfn;
 		highstart_pfn = system_max_low_pfn;
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 	       pages_to_mb(highend_pfn - highstart_pfn));
 	       pages_to_mb(highend_pfn - highstart_pfn));
+	num_physpages = highend_pfn;
+	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
+#else
+	num_physpages = system_max_low_pfn;
+	high_memory = (void *) __va(system_max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
 #endif
 	printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
 	printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
 			pages_to_mb(system_max_low_pfn));
 			pages_to_mb(system_max_low_pfn));
@@ -364,45 +357,22 @@ void __init numa_kva_reserve(void)
 void __init zone_sizes_init(void)
 void __init zone_sizes_init(void)
 {
 {
 	int nid;
 	int nid;
-
-
-	for_each_online_node(nid) {
-		unsigned long zones_size[MAX_NR_ZONES] = {0, };
-		unsigned long *zholes_size;
-		unsigned int max_dma;
-
-		unsigned long low = max_low_pfn;
-		unsigned long start = node_start_pfn[nid];
-		unsigned long high = node_end_pfn[nid];
-
-		max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
-
-		if (node_has_online_mem(nid)){
-			if (start > low) {
-#ifdef CONFIG_HIGHMEM
-				BUG_ON(start > high);
-				zones_size[ZONE_HIGHMEM] = high - start;
-#endif
-			} else {
-				if (low < max_dma)
-					zones_size[ZONE_DMA] = low;
-				else {
-					BUG_ON(max_dma > low);
-					BUG_ON(low > high);
-					zones_size[ZONE_DMA] = max_dma;
-					zones_size[ZONE_NORMAL] = low - max_dma;
-#ifdef CONFIG_HIGHMEM
-					zones_size[ZONE_HIGHMEM] = high - low;
-#endif
-				}
-			}
+	unsigned long max_zone_pfns[MAX_NR_ZONES] = {
+		virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT,
+		max_low_pfn,
+		highend_pfn
+	};
+
+	/* If SRAT has not registered memory, register it now */
+	if (find_max_pfn_with_active_regions() == 0) {
+		for_each_online_node(nid) {
+			if (node_has_online_mem(nid))
+				add_active_range(nid, node_start_pfn[nid],
+							node_end_pfn[nid]);
 		}
 		}
-
-		zholes_size = get_zholes_size(nid);
-
-		free_area_init_node(nid, NODE_DATA(nid), zones_size, start,
-				zholes_size);
 	}
 	}
+
+	free_area_init_nodes(max_zone_pfns);
 	return;
 	return;
 }
 }
 
 

+ 1 - 1
arch/i386/mm/extable.c

@@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs)
 	const struct exception_table_entry *fixup;
 	const struct exception_table_entry *fixup;
 
 
 #ifdef CONFIG_PNPBIOS
 #ifdef CONFIG_PNPBIOS
-	if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3)))
+	if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs)))
 	{
 	{
 		extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
 		extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
 		extern u32 pnp_bios_is_utter_crap;
 		extern u32 pnp_bios_is_utter_crap;

+ 8 - 17
arch/i386/mm/fault.c

@@ -27,21 +27,24 @@
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include <asm/desc.h>
 #include <asm/desc.h>
 #include <asm/kdebug.h>
 #include <asm/kdebug.h>
+#include <asm/segment.h>
 
 
 extern void die(const char *,struct pt_regs *,long);
 extern void die(const char *,struct pt_regs *,long);
 
 
-#ifdef CONFIG_KPROBES
-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+
 int register_page_fault_notifier(struct notifier_block *nb)
 int register_page_fault_notifier(struct notifier_block *nb)
 {
 {
 	vmalloc_sync_all();
 	vmalloc_sync_all();
 	return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
 	return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
 }
 }
+EXPORT_SYMBOL_GPL(register_page_fault_notifier);
 
 
 int unregister_page_fault_notifier(struct notifier_block *nb)
 int unregister_page_fault_notifier(struct notifier_block *nb)
 {
 {
 	return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
 	return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
 }
 }
+EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
 
 
 static inline int notify_page_fault(enum die_val val, const char *str,
 static inline int notify_page_fault(enum die_val val, const char *str,
 			struct pt_regs *regs, long err, int trap, int sig)
 			struct pt_regs *regs, long err, int trap, int sig)
@@ -55,14 +58,6 @@ static inline int notify_page_fault(enum die_val val, const char *str,
 	};
 	};
 	return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
 	return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
 }
 }
-#else
-static inline int notify_page_fault(enum die_val val, const char *str,
-			struct pt_regs *regs, long err, int trap, int sig)
-{
-	return NOTIFY_DONE;
-}
-#endif
-
 
 
 /*
 /*
  * Unlock any spinlocks which will prevent us from getting the
  * Unlock any spinlocks which will prevent us from getting the
@@ -119,10 +114,10 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs,
 	}
 	}
 
 
 	/* The standard kernel/user address space limit. */
 	/* The standard kernel/user address space limit. */
-	*eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg;
+	*eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
 	
 	
 	/* By far the most common cases. */
 	/* By far the most common cases. */
-	if (likely(seg == __USER_CS || seg == __KERNEL_CS))
+	if (likely(SEGMENT_IS_FLAT_CODE(seg)))
 		return eip;
 		return eip;
 
 
 	/* Check the segment exists, is within the current LDT/GDT size,
 	/* Check the segment exists, is within the current LDT/GDT size,
@@ -436,11 +431,7 @@ good_area:
 	write = 0;
 	write = 0;
 	switch (error_code & 3) {
 	switch (error_code & 3) {
 		default:	/* 3: write, present */
 		default:	/* 3: write, present */
-#ifdef TEST_VERIFY_AREA
-			if (regs->cs == KERNEL_CS)
-				printk("WP fault at %08lx\n", regs->eip);
-#endif
-			/* fall through */
+				/* fall through */
 		case 2:		/* write, not present */
 		case 2:		/* write, not present */
 			if (!(vma->vm_flags & VM_WRITE))
 			if (!(vma->vm_flags & VM_WRITE))
 				goto bad_area;
 				goto bad_area;

+ 1 - 1
arch/i386/mm/highmem.c

@@ -54,7 +54,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
 	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
 	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
 
 
-	if (vaddr < FIXADDR_START) { // FIXME
+	if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
 		dec_preempt_count();
 		dec_preempt_count();
 		preempt_check_resched();
 		preempt_check_resched();
 		return;
 		return;

+ 12 - 26
arch/i386/mm/init.c

@@ -435,16 +435,22 @@ u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
  * on      Enable
  * on      Enable
  * off     Disable
  * off     Disable
  */
  */
-void __init noexec_setup(const char *str)
+static int __init noexec_setup(char *str)
 {
 {
-	if (!strncmp(str, "on",2) && cpu_has_nx) {
-		__supported_pte_mask |= _PAGE_NX;
-		disable_nx = 0;
-	} else if (!strncmp(str,"off",3)) {
+	if (!str || !strcmp(str, "on")) {
+		if (cpu_has_nx) {
+			__supported_pte_mask |= _PAGE_NX;
+			disable_nx = 0;
+		}
+	} else if (!strcmp(str,"off")) {
 		disable_nx = 1;
 		disable_nx = 1;
 		__supported_pte_mask &= ~_PAGE_NX;
 		__supported_pte_mask &= ~_PAGE_NX;
-	}
+	} else
+		return -EINVAL;
+
+	return 0;
 }
 }
+early_param("noexec", noexec_setup);
 
 
 int nx_enabled = 0;
 int nx_enabled = 0;
 #ifdef CONFIG_X86_PAE
 #ifdef CONFIG_X86_PAE
@@ -552,18 +558,6 @@ static void __init test_wp_bit(void)
 	}
 	}
 }
 }
 
 
-static void __init set_max_mapnr_init(void)
-{
-#ifdef CONFIG_HIGHMEM
-	num_physpages = highend_pfn;
-#else
-	num_physpages = max_low_pfn;
-#endif
-#ifdef CONFIG_FLATMEM
-	max_mapnr = num_physpages;
-#endif
-}
-
 static struct kcore_list kcore_mem, kcore_vmalloc; 
 static struct kcore_list kcore_mem, kcore_vmalloc; 
 
 
 void __init mem_init(void)
 void __init mem_init(void)
@@ -590,14 +584,6 @@ void __init mem_init(void)
 	}
 	}
 #endif
 #endif
  
  
-	set_max_mapnr_init();
-
-#ifdef CONFIG_HIGHMEM
-	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
-#else
-	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
-#endif
-
 	/* this will put all low memory onto the freelists */
 	/* this will put all low memory onto the freelists */
 	totalram_pages += free_all_bootmem();
 	totalram_pages += free_all_bootmem();
 
 

+ 59 - 29
arch/i386/oprofile/nmi_int.c

@@ -17,14 +17,15 @@
 #include <asm/nmi.h>
 #include <asm/nmi.h>
 #include <asm/msr.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
 #include <asm/apic.h>
+#include <asm/kdebug.h>
  
  
 #include "op_counter.h"
 #include "op_counter.h"
 #include "op_x86_model.h"
 #include "op_x86_model.h"
- 
+
 static struct op_x86_model_spec const * model;
 static struct op_x86_model_spec const * model;
 static struct op_msrs cpu_msrs[NR_CPUS];
 static struct op_msrs cpu_msrs[NR_CPUS];
 static unsigned long saved_lvtpc[NR_CPUS];
 static unsigned long saved_lvtpc[NR_CPUS];
- 
+
 static int nmi_start(void);
 static int nmi_start(void);
 static void nmi_stop(void);
 static void nmi_stop(void);
 
 
@@ -82,13 +83,24 @@ static void exit_driverfs(void)
 #define exit_driverfs() do { } while (0)
 #define exit_driverfs() do { } while (0)
 #endif /* CONFIG_PM */
 #endif /* CONFIG_PM */
 
 
-
-static int nmi_callback(struct pt_regs * regs, int cpu)
+static int profile_exceptions_notify(struct notifier_block *self,
+				     unsigned long val, void *data)
 {
 {
-	return model->check_ctrs(regs, &cpu_msrs[cpu]);
+	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+	int cpu = smp_processor_id();
+
+	switch(val) {
+	case DIE_NMI:
+		if (model->check_ctrs(args->regs, &cpu_msrs[cpu]))
+			ret = NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+	return ret;
 }
 }
- 
- 
+
 static void nmi_cpu_save_registers(struct op_msrs * msrs)
 static void nmi_cpu_save_registers(struct op_msrs * msrs)
 {
 {
 	unsigned int const nr_ctrs = model->num_counters;
 	unsigned int const nr_ctrs = model->num_counters;
@@ -98,15 +110,19 @@ static void nmi_cpu_save_registers(struct op_msrs * msrs)
 	unsigned int i;
 	unsigned int i;
 
 
 	for (i = 0; i < nr_ctrs; ++i) {
 	for (i = 0; i < nr_ctrs; ++i) {
-		rdmsr(counters[i].addr,
-			counters[i].saved.low,
-			counters[i].saved.high);
+		if (counters[i].addr){
+			rdmsr(counters[i].addr,
+				counters[i].saved.low,
+				counters[i].saved.high);
+		}
 	}
 	}
  
  
 	for (i = 0; i < nr_ctrls; ++i) {
 	for (i = 0; i < nr_ctrls; ++i) {
-		rdmsr(controls[i].addr,
-			controls[i].saved.low,
-			controls[i].saved.high);
+		if (controls[i].addr){
+			rdmsr(controls[i].addr,
+				controls[i].saved.low,
+				controls[i].saved.high);
+		}
 	}
 	}
 }
 }
 
 
@@ -170,27 +186,29 @@ static void nmi_cpu_setup(void * dummy)
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 }
 }
 
 
+static struct notifier_block profile_exceptions_nb = {
+	.notifier_call = profile_exceptions_notify,
+	.next = NULL,
+	.priority = 0
+};
 
 
 static int nmi_setup(void)
 static int nmi_setup(void)
 {
 {
+	int err=0;
+
 	if (!allocate_msrs())
 	if (!allocate_msrs())
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	/* We walk a thin line between law and rape here.
-	 * We need to be careful to install our NMI handler
-	 * without actually triggering any NMIs as this will
-	 * break the core code horrifically.
-	 */
-	if (reserve_lapic_nmi() < 0) {
+	if ((err = register_die_notifier(&profile_exceptions_nb))){
 		free_msrs();
 		free_msrs();
-		return -EBUSY;
+		return err;
 	}
 	}
+
 	/* We need to serialize save and setup for HT because the subset
 	/* We need to serialize save and setup for HT because the subset
 	 * of msrs are distinct for save and setup operations
 	 * of msrs are distinct for save and setup operations
 	 */
 	 */
 	on_each_cpu(nmi_save_registers, NULL, 0, 1);
 	on_each_cpu(nmi_save_registers, NULL, 0, 1);
 	on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
 	on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
-	set_nmi_callback(nmi_callback);
 	nmi_enabled = 1;
 	nmi_enabled = 1;
 	return 0;
 	return 0;
 }
 }
@@ -205,15 +223,19 @@ static void nmi_restore_registers(struct op_msrs * msrs)
 	unsigned int i;
 	unsigned int i;
 
 
 	for (i = 0; i < nr_ctrls; ++i) {
 	for (i = 0; i < nr_ctrls; ++i) {
-		wrmsr(controls[i].addr,
-			controls[i].saved.low,
-			controls[i].saved.high);
+		if (controls[i].addr){
+			wrmsr(controls[i].addr,
+				controls[i].saved.low,
+				controls[i].saved.high);
+		}
 	}
 	}
  
  
 	for (i = 0; i < nr_ctrs; ++i) {
 	for (i = 0; i < nr_ctrs; ++i) {
-		wrmsr(counters[i].addr,
-			counters[i].saved.low,
-			counters[i].saved.high);
+		if (counters[i].addr){
+			wrmsr(counters[i].addr,
+				counters[i].saved.low,
+				counters[i].saved.high);
+		}
 	}
 	}
 }
 }
  
  
@@ -234,6 +256,7 @@ static void nmi_cpu_shutdown(void * dummy)
 	apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
 	apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
 	apic_write(APIC_LVTERR, v);
 	apic_write(APIC_LVTERR, v);
 	nmi_restore_registers(msrs);
 	nmi_restore_registers(msrs);
+	model->shutdown(msrs);
 }
 }
 
 
  
  
@@ -241,8 +264,7 @@ static void nmi_shutdown(void)
 {
 {
 	nmi_enabled = 0;
 	nmi_enabled = 0;
 	on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
 	on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
-	unset_nmi_callback();
-	release_lapic_nmi();
+	unregister_die_notifier(&profile_exceptions_nb);
 	free_msrs();
 	free_msrs();
 }
 }
 
 
@@ -284,6 +306,14 @@ static int nmi_create_files(struct super_block * sb, struct dentry * root)
 		struct dentry * dir;
 		struct dentry * dir;
 		char buf[4];
 		char buf[4];
  
  
+ 		/* quick little hack to _not_ expose a counter if it is not
+		 * available for use.  This should protect userspace app.
+		 * NOTE:  assumes 1:1 mapping here (that counters are organized
+		 *        sequentially in their struct assignment).
+		 */
+		if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
+			continue;
+
 		snprintf(buf,  sizeof(buf), "%d", i);
 		snprintf(buf,  sizeof(buf), "%d", i);
 		dir = oprofilefs_mkdir(sb, root, buf);
 		dir = oprofilefs_mkdir(sb, root, buf);
 		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); 
 		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); 

+ 25 - 10
arch/i386/oprofile/nmi_timer_int.c

@@ -17,34 +17,49 @@
 #include <asm/nmi.h>
 #include <asm/nmi.h>
 #include <asm/apic.h>
 #include <asm/apic.h>
 #include <asm/ptrace.h>
 #include <asm/ptrace.h>
+#include <asm/kdebug.h>
  
  
-static int nmi_timer_callback(struct pt_regs * regs, int cpu)
+static int profile_timer_exceptions_notify(struct notifier_block *self,
+					   unsigned long val, void *data)
 {
 {
-	oprofile_add_sample(regs, 0);
-	return 1;
+	struct die_args *args = (struct die_args *)data;
+	int ret = NOTIFY_DONE;
+
+	switch(val) {
+	case DIE_NMI:
+		oprofile_add_sample(args->regs, 0);
+		ret = NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+	return ret;
 }
 }
 
 
+static struct notifier_block profile_timer_exceptions_nb = {
+	.notifier_call = profile_timer_exceptions_notify,
+	.next = NULL,
+	.priority = 0
+};
+
 static int timer_start(void)
 static int timer_start(void)
 {
 {
-	disable_timer_nmi_watchdog();
-	set_nmi_callback(nmi_timer_callback);
+	if (register_die_notifier(&profile_timer_exceptions_nb))
+		return 1;
 	return 0;
 	return 0;
 }
 }
 
 
 
 
 static void timer_stop(void)
 static void timer_stop(void)
 {
 {
-	enable_timer_nmi_watchdog();
-	unset_nmi_callback();
+	unregister_die_notifier(&profile_timer_exceptions_nb);
 	synchronize_sched();  /* Allow already-started NMIs to complete. */
 	synchronize_sched();  /* Allow already-started NMIs to complete. */
 }
 }
 
 
 
 
 int __init op_nmi_timer_init(struct oprofile_operations * ops)
 int __init op_nmi_timer_init(struct oprofile_operations * ops)
 {
 {
-	extern int nmi_active;
-
-	if (nmi_active <= 0)
+	if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
 		return -ENODEV;
 		return -ENODEV;
 
 
 	ops->start = timer_start;
 	ops->start = timer_start;

+ 42 - 12
arch/i386/oprofile/op_model_athlon.c

@@ -21,10 +21,12 @@
 #define NUM_COUNTERS 4
 #define NUM_COUNTERS 4
 #define NUM_CONTROLS 4
 #define NUM_CONTROLS 4
 
 
+#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
 #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
 #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
 #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
 #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
 #define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
 #define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
 
 
+#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
 #define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
 #define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
 #define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
 #define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
 #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
 #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
@@ -40,15 +42,21 @@ static unsigned long reset_value[NUM_COUNTERS];
  
  
 static void athlon_fill_in_addresses(struct op_msrs * const msrs)
 static void athlon_fill_in_addresses(struct op_msrs * const msrs)
 {
 {
-	msrs->counters[0].addr = MSR_K7_PERFCTR0;
-	msrs->counters[1].addr = MSR_K7_PERFCTR1;
-	msrs->counters[2].addr = MSR_K7_PERFCTR2;
-	msrs->counters[3].addr = MSR_K7_PERFCTR3;
-
-	msrs->controls[0].addr = MSR_K7_EVNTSEL0;
-	msrs->controls[1].addr = MSR_K7_EVNTSEL1;
-	msrs->controls[2].addr = MSR_K7_EVNTSEL2;
-	msrs->controls[3].addr = MSR_K7_EVNTSEL3;
+	int i;
+
+	for (i=0; i < NUM_COUNTERS; i++) {
+		if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
+			msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
+		else
+			msrs->counters[i].addr = 0;
+	}
+
+	for (i=0; i < NUM_CONTROLS; i++) {
+		if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
+			msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
+		else
+			msrs->controls[i].addr = 0;
+	}
 }
 }
 
 
  
  
@@ -59,19 +67,23 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs)
  
  
 	/* clear all counters */
 	/* clear all counters */
 	for (i = 0 ; i < NUM_CONTROLS; ++i) {
 	for (i = 0 ; i < NUM_CONTROLS; ++i) {
+		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+			continue;
 		CTRL_READ(low, high, msrs, i);
 		CTRL_READ(low, high, msrs, i);
 		CTRL_CLEAR(low);
 		CTRL_CLEAR(low);
 		CTRL_WRITE(low, high, msrs, i);
 		CTRL_WRITE(low, high, msrs, i);
 	}
 	}
-	
+
 	/* avoid a false detection of ctr overflows in NMI handler */
 	/* avoid a false detection of ctr overflows in NMI handler */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
 	for (i = 0; i < NUM_COUNTERS; ++i) {
+		if (unlikely(!CTR_IS_RESERVED(msrs,i)))
+			continue;
 		CTR_WRITE(1, msrs, i);
 		CTR_WRITE(1, msrs, i);
 	}
 	}
 
 
 	/* enable active counters */
 	/* enable active counters */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if (counter_config[i].enabled) {
+		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
 			reset_value[i] = counter_config[i].count;
 			reset_value[i] = counter_config[i].count;
 
 
 			CTR_WRITE(counter_config[i].count, msrs, i);
 			CTR_WRITE(counter_config[i].count, msrs, i);
@@ -98,6 +110,8 @@ static int athlon_check_ctrs(struct pt_regs * const regs,
 	int i;
 	int i;
 
 
 	for (i = 0 ; i < NUM_COUNTERS; ++i) {
 	for (i = 0 ; i < NUM_COUNTERS; ++i) {
+		if (!reset_value[i])
+			continue;
 		CTR_READ(low, high, msrs, i);
 		CTR_READ(low, high, msrs, i);
 		if (CTR_OVERFLOWED(low)) {
 		if (CTR_OVERFLOWED(low)) {
 			oprofile_add_sample(regs, i);
 			oprofile_add_sample(regs, i);
@@ -132,12 +146,27 @@ static void athlon_stop(struct op_msrs const * const msrs)
 	/* Subtle: stop on all counters to avoid race with
 	/* Subtle: stop on all counters to avoid race with
 	 * setting our pm callback */
 	 * setting our pm callback */
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+		if (!reset_value[i])
+			continue;
 		CTRL_READ(low, high, msrs, i);
 		CTRL_READ(low, high, msrs, i);
 		CTRL_SET_INACTIVE(low);
 		CTRL_SET_INACTIVE(low);
 		CTRL_WRITE(low, high, msrs, i);
 		CTRL_WRITE(low, high, msrs, i);
 	}
 	}
 }
 }
 
 
+static void athlon_shutdown(struct op_msrs const * const msrs)
+{
+	int i;
+
+	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+		if (CTR_IS_RESERVED(msrs,i))
+			release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
+	}
+	for (i = 0 ; i < NUM_CONTROLS ; ++i) {
+		if (CTRL_IS_RESERVED(msrs,i))
+			release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
+	}
+}
 
 
 struct op_x86_model_spec const op_athlon_spec = {
 struct op_x86_model_spec const op_athlon_spec = {
 	.num_counters = NUM_COUNTERS,
 	.num_counters = NUM_COUNTERS,
@@ -146,5 +175,6 @@ struct op_x86_model_spec const op_athlon_spec = {
 	.setup_ctrs = &athlon_setup_ctrs,
 	.setup_ctrs = &athlon_setup_ctrs,
 	.check_ctrs = &athlon_check_ctrs,
 	.check_ctrs = &athlon_check_ctrs,
 	.start = &athlon_start,
 	.start = &athlon_start,
-	.stop = &athlon_stop
+	.stop = &athlon_stop,
+	.shutdown = &athlon_shutdown
 };
 };

+ 74 - 78
arch/i386/oprofile/op_model_p4.c

@@ -32,7 +32,7 @@
 #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
 #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
 
 
 static unsigned int num_counters = NUM_COUNTERS_NON_HT;
 static unsigned int num_counters = NUM_COUNTERS_NON_HT;
-
+static unsigned int num_controls = NUM_CONTROLS_NON_HT;
 
 
 /* this has to be checked dynamically since the
 /* this has to be checked dynamically since the
    hyper-threadedness of a chip is discovered at
    hyper-threadedness of a chip is discovered at
@@ -40,8 +40,10 @@ static unsigned int num_counters = NUM_COUNTERS_NON_HT;
 static inline void setup_num_counters(void)
 static inline void setup_num_counters(void)
 {
 {
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
-	if (smp_num_siblings == 2)
+	if (smp_num_siblings == 2){
 		num_counters = NUM_COUNTERS_HT2;
 		num_counters = NUM_COUNTERS_HT2;
+		num_controls = NUM_CONTROLS_HT2;
+	}
 #endif
 #endif
 }
 }
 
 
@@ -97,15 +99,6 @@ static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
 
 
 #define NUM_UNUSED_CCCRS	NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
 #define NUM_UNUSED_CCCRS	NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
 
 
-/* All cccr we don't use. */
-static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
-	MSR_P4_BPU_CCCR1,	MSR_P4_BPU_CCCR3,
-	MSR_P4_MS_CCCR1,	MSR_P4_MS_CCCR3,
-	MSR_P4_FLAME_CCCR1,	MSR_P4_FLAME_CCCR3,
-	MSR_P4_IQ_CCCR0,	MSR_P4_IQ_CCCR1,
-	MSR_P4_IQ_CCCR2,	MSR_P4_IQ_CCCR3
-};
-
 /* p4 event codes in libop/op_event.h are indices into this table. */
 /* p4 event codes in libop/op_event.h are indices into this table. */
 
 
 static struct p4_event_binding p4_events[NUM_EVENTS] = {
 static struct p4_event_binding p4_events[NUM_EVENTS] = {
@@ -372,6 +365,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = {
 #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
 #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
 #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
 #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
 
 
+#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
+#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
 #define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
 #define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
 #define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
 #define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
 #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
 #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
@@ -401,29 +396,34 @@ static unsigned long reset_value[NUM_COUNTERS_NON_HT];
 static void p4_fill_in_addresses(struct op_msrs * const msrs)
 static void p4_fill_in_addresses(struct op_msrs * const msrs)
 {
 {
 	unsigned int i; 
 	unsigned int i; 
-	unsigned int addr, stag;
+	unsigned int addr, cccraddr, stag;
 
 
 	setup_num_counters();
 	setup_num_counters();
 	stag = get_stagger();
 	stag = get_stagger();
 
 
-	/* the counter registers we pay attention to */
+	/* initialize some registers */
 	for (i = 0; i < num_counters; ++i) {
 	for (i = 0; i < num_counters; ++i) {
-		msrs->counters[i].addr = 
-			p4_counters[VIRT_CTR(stag, i)].counter_address;
+		msrs->counters[i].addr = 0;
 	}
 	}
-
-	/* FIXME: bad feeling, we don't save the 10 counters we don't use. */
-
-	/* 18 CCCR registers */
-	for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
-	     addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
-		msrs->controls[i].addr = addr;
+	for (i = 0; i < num_controls; ++i) {
+		msrs->controls[i].addr = 0;
 	}
 	}
 	
 	
+	/* the counter & cccr registers we pay attention to */
+	for (i = 0; i < num_counters; ++i) {
+		addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
+		cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
+		if (reserve_perfctr_nmi(addr)){
+			msrs->counters[i].addr = addr;
+			msrs->controls[i].addr = cccraddr;
+		}
+	}
+
 	/* 43 ESCR registers in three or four discontiguous group */
 	/* 43 ESCR registers in three or four discontiguous group */
 	for (addr = MSR_P4_BSU_ESCR0 + stag;
 	for (addr = MSR_P4_BSU_ESCR0 + stag;
 	     addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
 	     addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
-		msrs->controls[i].addr = addr;
+		if (reserve_evntsel_nmi(addr))
+			msrs->controls[i].addr = addr;
 	}
 	}
 
 
 	/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
 	/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
@@ -431,47 +431,57 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs)
 	if (boot_cpu_data.x86_model >= 0x3) {
 	if (boot_cpu_data.x86_model >= 0x3) {
 		for (addr = MSR_P4_BSU_ESCR0 + stag;
 		for (addr = MSR_P4_BSU_ESCR0 + stag;
 		     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
 		     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
-			msrs->controls[i].addr = addr;
+			if (reserve_evntsel_nmi(addr))
+				msrs->controls[i].addr = addr;
 		}
 		}
 	} else {
 	} else {
 		for (addr = MSR_P4_IQ_ESCR0 + stag;
 		for (addr = MSR_P4_IQ_ESCR0 + stag;
 		     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
 		     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
-			msrs->controls[i].addr = addr;
+			if (reserve_evntsel_nmi(addr))
+				msrs->controls[i].addr = addr;
 		}
 		}
 	}
 	}
 
 
 	for (addr = MSR_P4_RAT_ESCR0 + stag;
 	for (addr = MSR_P4_RAT_ESCR0 + stag;
 	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
 	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
-		msrs->controls[i].addr = addr;
+		if (reserve_evntsel_nmi(addr))
+			msrs->controls[i].addr = addr;
 	}
 	}
 	
 	
 	for (addr = MSR_P4_MS_ESCR0 + stag;
 	for (addr = MSR_P4_MS_ESCR0 + stag;
 	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
 	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
-		msrs->controls[i].addr = addr;
+		if (reserve_evntsel_nmi(addr))
+			msrs->controls[i].addr = addr;
 	}
 	}
 	
 	
 	for (addr = MSR_P4_IX_ESCR0 + stag;
 	for (addr = MSR_P4_IX_ESCR0 + stag;
 	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
 	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
-		msrs->controls[i].addr = addr;
+		if (reserve_evntsel_nmi(addr))
+			msrs->controls[i].addr = addr;
 	}
 	}
 
 
 	/* there are 2 remaining non-contiguously located ESCRs */
 	/* there are 2 remaining non-contiguously located ESCRs */
 
 
 	if (num_counters == NUM_COUNTERS_NON_HT) {		
 	if (num_counters == NUM_COUNTERS_NON_HT) {		
 		/* standard non-HT CPUs handle both remaining ESCRs*/
 		/* standard non-HT CPUs handle both remaining ESCRs*/
-		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
-		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
+			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
+			msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
 
 
 	} else if (stag == 0) {
 	} else if (stag == 0) {
 		/* HT CPUs give the first remainder to the even thread, as
 		/* HT CPUs give the first remainder to the even thread, as
 		   the 32nd control register */
 		   the 32nd control register */
-		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
+			msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
 
 
 	} else {
 	} else {
 		/* and two copies of the second to the odd thread,
 		/* and two copies of the second to the odd thread,
 		   for the 22st and 23nd control registers */
 		   for the 22st and 23nd control registers */
-		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
-		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
+			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+		}
 	}
 	}
 }
 }
 
 
@@ -544,7 +554,6 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
 {
 {
 	unsigned int i;
 	unsigned int i;
 	unsigned int low, high;
 	unsigned int low, high;
-	unsigned int addr;
 	unsigned int stag;
 	unsigned int stag;
 
 
 	stag = get_stagger();
 	stag = get_stagger();
@@ -557,59 +566,24 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs)
 
 
 	/* clear the cccrs we will use */
 	/* clear the cccrs we will use */
 	for (i = 0 ; i < num_counters ; i++) {
 	for (i = 0 ; i < num_counters ; i++) {
+		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+			continue;
 		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 		CCCR_CLEAR(low);
 		CCCR_CLEAR(low);
 		CCCR_SET_REQUIRED_BITS(low);
 		CCCR_SET_REQUIRED_BITS(low);
 		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
 	}
 	}
 
 
-	/* clear cccrs outside our concern */
-	for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
-		rdmsr(p4_unused_cccr[i], low, high);
-		CCCR_CLEAR(low);
-		CCCR_SET_REQUIRED_BITS(low);
-		wrmsr(p4_unused_cccr[i], low, high);
-	}
-
 	/* clear all escrs (including those outside our concern) */
 	/* clear all escrs (including those outside our concern) */
-	for (addr = MSR_P4_BSU_ESCR0 + stag;
-	     addr <  MSR_P4_IQ_ESCR0; addr += addr_increment()) {
-		wrmsr(addr, 0, 0);
-	}
-
-	/* On older models clear also MSR_P4_IQ_ESCR0/1 */
-	if (boot_cpu_data.x86_model < 0x3) {
-		wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
-		wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
-	}
-
-	for (addr = MSR_P4_RAT_ESCR0 + stag;
-	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
-		wrmsr(addr, 0, 0);
-	}
-	
-	for (addr = MSR_P4_MS_ESCR0 + stag;
-	     addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ 
-		wrmsr(addr, 0, 0);
-	}
-	
-	for (addr = MSR_P4_IX_ESCR0 + stag;
-	     addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ 
-		wrmsr(addr, 0, 0);
+	for (i = num_counters; i < num_controls; i++) {
+		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+			continue;
+		wrmsr(msrs->controls[i].addr, 0, 0);
 	}
 	}
 
 
-	if (num_counters == NUM_COUNTERS_NON_HT) {		
-		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
-		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
-	} else if (stag == 0) {
-		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
-	} else {
-		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
-	}		
-	
 	/* setup all counters */
 	/* setup all counters */
 	for (i = 0 ; i < num_counters ; ++i) {
 	for (i = 0 ; i < num_counters ; ++i) {
-		if (counter_config[i].enabled) {
+		if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) {
 			reset_value[i] = counter_config[i].count;
 			reset_value[i] = counter_config[i].count;
 			pmc_setup_one_p4_counter(i);
 			pmc_setup_one_p4_counter(i);
 			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
 			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
@@ -696,12 +670,32 @@ static void p4_stop(struct op_msrs const * const msrs)
 	stag = get_stagger();
 	stag = get_stagger();
 
 
 	for (i = 0; i < num_counters; ++i) {
 	for (i = 0; i < num_counters; ++i) {
+		if (!reset_value[i])
+			continue;
 		CCCR_READ(low, high, VIRT_CTR(stag, i));
 		CCCR_READ(low, high, VIRT_CTR(stag, i));
 		CCCR_SET_DISABLE(low);
 		CCCR_SET_DISABLE(low);
 		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
 		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
 	}
 	}
 }
 }
 
 
+static void p4_shutdown(struct op_msrs const * const msrs)
+{
+	int i;
+
+	for (i = 0 ; i < num_counters ; ++i) {
+		if (CTR_IS_RESERVED(msrs,i))
+			release_perfctr_nmi(msrs->counters[i].addr);
+	}
+	/* some of the control registers are specially reserved in
+	 * conjunction with the counter registers (hence the starting offset).
+	 * This saves a few bits.
+	 */
+	for (i = num_counters ; i < num_controls ; ++i) {
+		if (CTRL_IS_RESERVED(msrs,i))
+			release_evntsel_nmi(msrs->controls[i].addr);
+	}
+}
+
 
 
 #ifdef CONFIG_SMP
 #ifdef CONFIG_SMP
 struct op_x86_model_spec const op_p4_ht2_spec = {
 struct op_x86_model_spec const op_p4_ht2_spec = {
@@ -711,7 +705,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = {
 	.setup_ctrs = &p4_setup_ctrs,
 	.setup_ctrs = &p4_setup_ctrs,
 	.check_ctrs = &p4_check_ctrs,
 	.check_ctrs = &p4_check_ctrs,
 	.start = &p4_start,
 	.start = &p4_start,
-	.stop = &p4_stop
+	.stop = &p4_stop,
+	.shutdown = &p4_shutdown
 };
 };
 #endif
 #endif
 
 
@@ -722,5 +717,6 @@ struct op_x86_model_spec const op_p4_spec = {
 	.setup_ctrs = &p4_setup_ctrs,
 	.setup_ctrs = &p4_setup_ctrs,
 	.check_ctrs = &p4_check_ctrs,
 	.check_ctrs = &p4_check_ctrs,
 	.start = &p4_start,
 	.start = &p4_start,
-	.stop = &p4_stop
+	.stop = &p4_stop,
+	.shutdown = &p4_shutdown
 };
 };

+ 53 - 12
arch/i386/oprofile/op_model_ppro.c

@@ -22,10 +22,12 @@
 #define NUM_COUNTERS 2
 #define NUM_COUNTERS 2
 #define NUM_CONTROLS 2
 #define NUM_CONTROLS 2
 
 
+#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
 #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
 #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
 #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
 #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
 #define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
 #define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
 
 
+#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
 #define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
 #define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
 #define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
 #define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
 #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
 #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
@@ -41,11 +43,21 @@ static unsigned long reset_value[NUM_COUNTERS];
  
  
 static void ppro_fill_in_addresses(struct op_msrs * const msrs)
 static void ppro_fill_in_addresses(struct op_msrs * const msrs)
 {
 {
-	msrs->counters[0].addr = MSR_P6_PERFCTR0;
-	msrs->counters[1].addr = MSR_P6_PERFCTR1;
+	int i;
+
+	for (i=0; i < NUM_COUNTERS; i++) {
+		if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
+			msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
+		else
+			msrs->counters[i].addr = 0;
+	}
 	
 	
-	msrs->controls[0].addr = MSR_P6_EVNTSEL0;
-	msrs->controls[1].addr = MSR_P6_EVNTSEL1;
+	for (i=0; i < NUM_CONTROLS; i++) {
+		if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
+			msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
+		else
+			msrs->controls[i].addr = 0;
+	}
 }
 }
 
 
 
 
@@ -56,6 +68,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
 
 
 	/* clear all counters */
 	/* clear all counters */
 	for (i = 0 ; i < NUM_CONTROLS; ++i) {
 	for (i = 0 ; i < NUM_CONTROLS; ++i) {
+		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+			continue;
 		CTRL_READ(low, high, msrs, i);
 		CTRL_READ(low, high, msrs, i);
 		CTRL_CLEAR(low);
 		CTRL_CLEAR(low);
 		CTRL_WRITE(low, high, msrs, i);
 		CTRL_WRITE(low, high, msrs, i);
@@ -63,12 +77,14 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
 	
 	
 	/* avoid a false detection of ctr overflows in NMI handler */
 	/* avoid a false detection of ctr overflows in NMI handler */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
 	for (i = 0; i < NUM_COUNTERS; ++i) {
+		if (unlikely(!CTR_IS_RESERVED(msrs,i)))
+			continue;
 		CTR_WRITE(1, msrs, i);
 		CTR_WRITE(1, msrs, i);
 	}
 	}
 
 
 	/* enable active counters */
 	/* enable active counters */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if (counter_config[i].enabled) {
+		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
 			reset_value[i] = counter_config[i].count;
 			reset_value[i] = counter_config[i].count;
 
 
 			CTR_WRITE(counter_config[i].count, msrs, i);
 			CTR_WRITE(counter_config[i].count, msrs, i);
@@ -81,6 +97,8 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
 			CTRL_SET_UM(low, counter_config[i].unit_mask);
 			CTRL_SET_UM(low, counter_config[i].unit_mask);
 			CTRL_SET_EVENT(low, counter_config[i].event);
 			CTRL_SET_EVENT(low, counter_config[i].event);
 			CTRL_WRITE(low, high, msrs, i);
 			CTRL_WRITE(low, high, msrs, i);
+		} else {
+			reset_value[i] = 0;
 		}
 		}
 	}
 	}
 }
 }
@@ -93,6 +111,8 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
 	int i;
 	int i;
  
  
 	for (i = 0 ; i < NUM_COUNTERS; ++i) {
 	for (i = 0 ; i < NUM_COUNTERS; ++i) {
+		if (!reset_value[i])
+			continue;
 		CTR_READ(low, high, msrs, i);
 		CTR_READ(low, high, msrs, i);
 		if (CTR_OVERFLOWED(low)) {
 		if (CTR_OVERFLOWED(low)) {
 			oprofile_add_sample(regs, i);
 			oprofile_add_sample(regs, i);
@@ -118,18 +138,38 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
 static void ppro_start(struct op_msrs const * const msrs)
 static void ppro_start(struct op_msrs const * const msrs)
 {
 {
 	unsigned int low,high;
 	unsigned int low,high;
-	CTRL_READ(low, high, msrs, 0);
-	CTRL_SET_ACTIVE(low);
-	CTRL_WRITE(low, high, msrs, 0);
+
+	if (reset_value[0]) {
+		CTRL_READ(low, high, msrs, 0);
+		CTRL_SET_ACTIVE(low);
+		CTRL_WRITE(low, high, msrs, 0);
+	}
 }
 }
 
 
 
 
 static void ppro_stop(struct op_msrs const * const msrs)
 static void ppro_stop(struct op_msrs const * const msrs)
 {
 {
 	unsigned int low,high;
 	unsigned int low,high;
-	CTRL_READ(low, high, msrs, 0);
-	CTRL_SET_INACTIVE(low);
-	CTRL_WRITE(low, high, msrs, 0);
+
+	if (reset_value[0]) {
+		CTRL_READ(low, high, msrs, 0);
+		CTRL_SET_INACTIVE(low);
+		CTRL_WRITE(low, high, msrs, 0);
+	}
+}
+
+static void ppro_shutdown(struct op_msrs const * const msrs)
+{
+	int i;
+
+	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+		if (CTR_IS_RESERVED(msrs,i))
+			release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
+	}
+	for (i = 0 ; i < NUM_CONTROLS ; ++i) {
+		if (CTRL_IS_RESERVED(msrs,i))
+			release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
+	}
 }
 }
 
 
 
 
@@ -140,5 +180,6 @@ struct op_x86_model_spec const op_ppro_spec = {
 	.setup_ctrs = &ppro_setup_ctrs,
 	.setup_ctrs = &ppro_setup_ctrs,
 	.check_ctrs = &ppro_check_ctrs,
 	.check_ctrs = &ppro_check_ctrs,
 	.start = &ppro_start,
 	.start = &ppro_start,
-	.stop = &ppro_stop
+	.stop = &ppro_stop,
+	.shutdown = &ppro_shutdown
 };
 };

+ 1 - 0
arch/i386/oprofile/op_x86_model.h

@@ -40,6 +40,7 @@ struct op_x86_model_spec {
 		struct op_msrs const * const msrs);
 		struct op_msrs const * const msrs);
 	void (*start)(struct op_msrs const * const msrs);
 	void (*start)(struct op_msrs const * const msrs);
 	void (*stop)(struct op_msrs const * const msrs);
 	void (*stop)(struct op_msrs const * const msrs);
+	void (*shutdown)(struct op_msrs const * const msrs);
 };
 };
 
 
 extern struct op_x86_model_spec const op_ppro_spec;
 extern struct op_x86_model_spec const op_ppro_spec;

+ 1 - 1
arch/i386/pci/Makefile

@@ -11,4 +11,4 @@ pci-y				+= legacy.o irq.o
 pci-$(CONFIG_X86_VISWS)		:= visws.o fixup.o
 pci-$(CONFIG_X86_VISWS)		:= visws.o fixup.o
 pci-$(CONFIG_X86_NUMAQ)		:= numa.o irq.o
 pci-$(CONFIG_X86_NUMAQ)		:= numa.o irq.o
 
 
-obj-y				+= $(pci-y) common.o
+obj-y				+= $(pci-y) common.o early.o

+ 4 - 0
arch/i386/pci/common.c

@@ -242,6 +242,10 @@ char * __devinit  pcibios_setup(char *str)
 		acpi_noirq_set();
 		acpi_noirq_set();
 		return NULL;
 		return NULL;
 	}
 	}
+	else if (!strcmp(str, "noearly")) {
+		pci_probe |= PCI_PROBE_NOEARLY;
+		return NULL;
+	}
 #ifndef CONFIG_X86_VISWS
 #ifndef CONFIG_X86_VISWS
 	else if (!strcmp(str, "usepirqmask")) {
 	else if (!strcmp(str, "usepirqmask")) {
 		pci_probe |= PCI_USE_PIRQ_MASK;
 		pci_probe |= PCI_USE_PIRQ_MASK;

+ 16 - 9
arch/i386/pci/direct.c

@@ -254,7 +254,16 @@ static int __init pci_check_type2(void)
 	return works;
 	return works;
 }
 }
 
 
-void __init pci_direct_init(void)
+void __init pci_direct_init(int type)
+{
+	printk(KERN_INFO "PCI: Using configuration type %d\n", type);
+	if (type == 1)
+		raw_pci_ops = &pci_direct_conf1;
+	else
+		raw_pci_ops = &pci_direct_conf2;
+}
+
+int __init pci_direct_probe(void)
 {
 {
 	struct resource *region, *region2;
 	struct resource *region, *region2;
 
 
@@ -264,19 +273,16 @@ void __init pci_direct_init(void)
 	if (!region)
 	if (!region)
 		goto type2;
 		goto type2;
 
 
-	if (pci_check_type1()) {
-		printk(KERN_INFO "PCI: Using configuration type 1\n");
-		raw_pci_ops = &pci_direct_conf1;
-		return;
-	}
+	if (pci_check_type1())
+		return 1;
 	release_resource(region);
 	release_resource(region);
 
 
  type2:
  type2:
 	if ((pci_probe & PCI_PROBE_CONF2) == 0)
 	if ((pci_probe & PCI_PROBE_CONF2) == 0)
-		return;
+		return 0;
 	region = request_region(0xCF8, 4, "PCI conf2");
 	region = request_region(0xCF8, 4, "PCI conf2");
 	if (!region)
 	if (!region)
-		return;
+		return 0;
 	region2 = request_region(0xC000, 0x1000, "PCI conf2");
 	region2 = request_region(0xC000, 0x1000, "PCI conf2");
 	if (!region2)
 	if (!region2)
 		goto fail2;
 		goto fail2;
@@ -284,10 +290,11 @@ void __init pci_direct_init(void)
 	if (pci_check_type2()) {
 	if (pci_check_type2()) {
 		printk(KERN_INFO "PCI: Using configuration type 2\n");
 		printk(KERN_INFO "PCI: Using configuration type 2\n");
 		raw_pci_ops = &pci_direct_conf2;
 		raw_pci_ops = &pci_direct_conf2;
-		return;
+		return 2;
 	}
 	}
 
 
 	release_resource(region2);
 	release_resource(region2);
  fail2:
  fail2:
 	release_resource(region);
 	release_resource(region);
+	return 0;
 }
 }

+ 52 - 0
arch/i386/pci/early.c

@@ -0,0 +1,52 @@
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <asm/pci-direct.h>
+#include <asm/io.h>
+#include "pci.h"
+
+/* Direct PCI access. This is used for PCI accesses in early boot before
+   the PCI subsystem works. */
+
+#define PDprintk(x...)
+
+u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset)
+{
+	u32 v;
+	outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
+	v = inl(0xcfc);
+	if (v != 0xffffffff)
+		PDprintk("%x reading 4 from %x: %x\n", slot, offset, v);
+	return v;
+}
+
+u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset)
+{
+	u8 v;
+	outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
+	v = inb(0xcfc + (offset&3));
+	PDprintk("%x reading 1 from %x: %x\n", slot, offset, v);
+	return v;
+}
+
+u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset)
+{
+	u16 v;
+	outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
+	v = inw(0xcfc + (offset&2));
+	PDprintk("%x reading 2 from %x: %x\n", slot, offset, v);
+	return v;
+}
+
+void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset,
+				    u32 val)
+{
+	PDprintk("%x writing to %x: %x\n", slot, offset, val);
+	outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8);
+	outl(val, 0xcfc);
+}
+
+int early_pci_allowed(void)
+{
+	return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) ==
+			PCI_PROBE_CONF1;
+}

+ 7 - 2
arch/i386/pci/init.c

@@ -6,8 +6,13 @@
    in the right sequence from here. */
    in the right sequence from here. */
 static __init int pci_access_init(void)
 static __init int pci_access_init(void)
 {
 {
+	int type = 0;
+
+#ifdef CONFIG_PCI_DIRECT
+	type = pci_direct_probe();
+#endif
 #ifdef CONFIG_PCI_MMCONFIG
 #ifdef CONFIG_PCI_MMCONFIG
-	pci_mmcfg_init();
+	pci_mmcfg_init(type);
 #endif
 #endif
 	if (raw_pci_ops)
 	if (raw_pci_ops)
 		return 0;
 		return 0;
@@ -21,7 +26,7 @@ static __init int pci_access_init(void)
 	 * fails.
 	 * fails.
 	 */
 	 */
 #ifdef CONFIG_PCI_DIRECT
 #ifdef CONFIG_PCI_DIRECT
-	pci_direct_init();
+	pci_direct_init(type);
 #endif
 #endif
 	return 0;
 	return 0;
 }
 }

+ 39 - 2
arch/i386/pci/mmconfig.c

@@ -151,6 +151,38 @@ static struct pci_raw_ops pci_mmcfg = {
 	.write =	pci_mmcfg_write,
 	.write =	pci_mmcfg_write,
 };
 };
 
 
+
+static __init void pci_mmcfg_insert_resources(void)
+{
+#define PCI_MMCFG_RESOURCE_NAME_LEN 19
+	int i;
+	struct resource *res;
+	char *names;
+	unsigned num_buses;
+
+	res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res),
+			pci_mmcfg_config_num, GFP_KERNEL);
+
+	if (!res) {
+		printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n");
+		return;
+	}
+
+	names = (void *)&res[pci_mmcfg_config_num];
+	for (i = 0; i < pci_mmcfg_config_num; i++, res++) {
+		num_buses = pci_mmcfg_config[i].end_bus_number -
+		    pci_mmcfg_config[i].start_bus_number + 1;
+		res->name = names;
+		snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u",
+			pci_mmcfg_config[i].pci_segment_group_number);
+		res->start = pci_mmcfg_config[i].base_address;
+		res->end = res->start + (num_buses << 20) - 1;
+		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		insert_resource(&iomem_resource, res);
+		names += PCI_MMCFG_RESOURCE_NAME_LEN;
+	}
+}
+
 /* K8 systems have some devices (typically in the builtin northbridge)
 /* K8 systems have some devices (typically in the builtin northbridge)
    that are only accessible using type1
    that are only accessible using type1
    Normally this can be expressed in the MCFG by not listing them
    Normally this can be expressed in the MCFG by not listing them
@@ -187,7 +219,9 @@ static __init void unreachable_devices(void)
 	}
 	}
 }
 }
 
 
-void __init pci_mmcfg_init(void)
+
+
+void __init pci_mmcfg_init(int type)
 {
 {
 	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
 	if ((pci_probe & PCI_PROBE_MMCONF) == 0)
 		return;
 		return;
@@ -198,7 +232,9 @@ void __init pci_mmcfg_init(void)
 	    (pci_mmcfg_config[0].base_address == 0))
 	    (pci_mmcfg_config[0].base_address == 0))
 		return;
 		return;
 
 
-	if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
+	/* Only do this check when type 1 works. If it doesn't work
+	   assume we run on a Mac and always use MCFG */
+	if (type == 1 && !e820_all_mapped(pci_mmcfg_config[0].base_address,
 			pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
 			pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
 			E820_RESERVED)) {
 			E820_RESERVED)) {
 		printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
 		printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
@@ -212,4 +248,5 @@ void __init pci_mmcfg_init(void)
 	pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
 	pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
 
 
 	unreachable_devices();
 	unreachable_devices();
+	pci_mmcfg_insert_resources();
 }
 }

+ 5 - 2
arch/i386/pci/pci.h

@@ -17,6 +17,7 @@
 #define PCI_PROBE_CONF2		0x0004
 #define PCI_PROBE_CONF2		0x0004
 #define PCI_PROBE_MMCONF	0x0008
 #define PCI_PROBE_MMCONF	0x0008
 #define PCI_PROBE_MASK		0x000f
 #define PCI_PROBE_MASK		0x000f
+#define PCI_PROBE_NOEARLY	0x0010
 
 
 #define PCI_NO_SORT		0x0100
 #define PCI_NO_SORT		0x0100
 #define PCI_BIOS_SORT		0x0200
 #define PCI_BIOS_SORT		0x0200
@@ -81,7 +82,9 @@ extern int pci_conf1_write(unsigned int seg, unsigned int bus,
 extern int pci_conf1_read(unsigned int seg, unsigned int bus,
 extern int pci_conf1_read(unsigned int seg, unsigned int bus,
 			  unsigned int devfn, int reg, int len, u32 *value);
 			  unsigned int devfn, int reg, int len, u32 *value);
 
 
-extern void pci_direct_init(void);
+extern int pci_direct_probe(void);
+extern void pci_direct_init(int type);
 extern void pci_pcbios_init(void);
 extern void pci_pcbios_init(void);
-extern void pci_mmcfg_init(void);
+extern void pci_mmcfg_init(int type);
 extern void pcibios_sort(void);
 extern void pcibios_sort(void);
+

+ 11 - 0
arch/ia64/Kconfig

@@ -356,6 +356,9 @@ config NODES_SHIFT
 	  MAX_NUMNODES will be 2^(This value).
 	  MAX_NUMNODES will be 2^(This value).
 	  If in doubt, use the default.
 	  If in doubt, use the default.
 
 
+config ARCH_POPULATES_NODE_MAP
+	def_bool y
+
 # VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
 # VIRTUAL_MEM_MAP and FLAT_NODE_MEM_MAP are functionally equivalent.
 # VIRTUAL_MEM_MAP has been retained for historical reasons.
 # VIRTUAL_MEM_MAP has been retained for historical reasons.
 config VIRTUAL_MEM_MAP
 config VIRTUAL_MEM_MAP
@@ -420,6 +423,14 @@ config IA64_PALINFO
 config SGI_SN
 config SGI_SN
 	def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
 	def_bool y if (IA64_SGI_SN2 || IA64_GENERIC)
 
 
+config IA64_ESI
+	bool "ESI (Extensible SAL Interface) support"
+	help
+	  If you say Y here, support is built into the kernel to
+	  make ESI calls.  ESI calls are used to support vendor-specific
+	  firmware extensions, such as the ability to inject memory-errors
+	  for test-purposes.  If you're unsure, say N.
+
 source "drivers/sn/Kconfig"
 source "drivers/sn/Kconfig"
 
 
 source "drivers/firmware/Kconfig"
 source "drivers/firmware/Kconfig"

+ 1 - 1
arch/ia64/ia32/sys_ia32.c

@@ -1942,7 +1942,7 @@ struct sysctl32 {
 	unsigned int	__unused[4];
 	unsigned int	__unused[4];
 };
 };
 
 
-#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_SYSCTL_SYSCALL
 asmlinkage long
 asmlinkage long
 sys32_sysctl (struct sysctl32 __user *args)
 sys32_sysctl (struct sysctl32 __user *args)
 {
 {

+ 5 - 0
arch/ia64/kernel/Makefile

@@ -32,6 +32,11 @@ obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
 obj-$(CONFIG_AUDIT)		+= audit.o
 obj-$(CONFIG_AUDIT)		+= audit.o
 mca_recovery-y			+= mca_drv.o mca_drv_asm.o
 mca_recovery-y			+= mca_drv.o mca_drv_asm.o
 
 
+obj-$(CONFIG_IA64_ESI)		+= esi.o
+ifneq ($(CONFIG_IA64_ESI),)
+obj-y				+= esi_stub.o	# must be in kernel proper
+endif
+
 # The gate DSO image is built using a special linker script.
 # The gate DSO image is built using a special linker script.
 targets += gate.so gate-syms.o
 targets += gate.so gate-syms.o
 
 

+ 2 - 2
arch/ia64/kernel/entry.S

@@ -1605,8 +1605,8 @@ sys_call_table:
 	data8 sys_ni_syscall			// 1295 reserved for ppoll
 	data8 sys_ni_syscall			// 1295 reserved for ppoll
 	data8 sys_unshare
 	data8 sys_unshare
 	data8 sys_splice
 	data8 sys_splice
-	data8 sys_ni_syscall			// reserved for set_robust_list
-	data8 sys_ni_syscall			// reserved for get_robust_list
+	data8 sys_set_robust_list
+	data8 sys_get_robust_list
 	data8 sys_sync_file_range		// 1300
 	data8 sys_sync_file_range		// 1300
 	data8 sys_tee
 	data8 sys_tee
 	data8 sys_vmsplice
 	data8 sys_vmsplice

+ 205 - 0
arch/ia64/kernel/esi.c

@@ -0,0 +1,205 @@
+/*
+ * Extensible SAL Interface (ESI) support routines.
+ *
+ * Copyright (C) 2006 Hewlett-Packard Co
+ * 	Alex Williamson <alex.williamson@hp.com>
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <asm/esi.h>
+#include <asm/sal.h>
+
+MODULE_AUTHOR("Alex Williamson <alex.williamson@hp.com>");
+MODULE_DESCRIPTION("Extensible SAL Interface (ESI) support");
+MODULE_LICENSE("GPL");
+
+#define MODULE_NAME	"esi"
+
+#define ESI_TABLE_GUID					\
+    EFI_GUID(0x43EA58DC, 0xCF28, 0x4b06, 0xB3,		\
+	     0x91, 0xB7, 0x50, 0x59, 0x34, 0x2B, 0xD4)
+
+enum esi_systab_entry_type {
+	ESI_DESC_ENTRY_POINT = 0
+};
+
+/*
+ * Entry type:	Size:
+ *	0	48
+ */
+#define ESI_DESC_SIZE(type)	"\060"[(unsigned) (type)]
+
+typedef struct ia64_esi_desc_entry_point {
+	u8 type;
+	u8 reserved1[15];
+	u64 esi_proc;
+	u64 gp;
+	efi_guid_t guid;
+} ia64_esi_desc_entry_point_t;
+
+struct pdesc {
+	void *addr;
+	void *gp;
+};
+
+static struct ia64_sal_systab *esi_systab;
+
+static int __init esi_init (void)
+{
+	efi_config_table_t *config_tables;
+	struct ia64_sal_systab *systab;
+	unsigned long esi = 0;
+	char *p;
+	int i;
+
+	config_tables = __va(efi.systab->tables);
+
+	for (i = 0; i < (int) efi.systab->nr_tables; ++i) {
+		if (efi_guidcmp(config_tables[i].guid, ESI_TABLE_GUID) == 0) {
+			esi = config_tables[i].table;
+			break;
+		}
+	}
+
+	if (!esi)
+		return -ENODEV;;
+
+	systab = __va(esi);
+
+	if (strncmp(systab->signature, "ESIT", 4) != 0) {
+		printk(KERN_ERR "bad signature in ESI system table!");
+		return -ENODEV;
+	}
+
+	p = (char *) (systab + 1);
+	for (i = 0; i < systab->entry_count; i++) {
+		/*
+		 * The first byte of each entry type contains the type
+		 * descriptor.
+		 */
+		switch (*p) {
+		      case ESI_DESC_ENTRY_POINT:
+			break;
+		      default:
+			printk(KERN_WARNING "Unkown table type %d found in "
+			       "ESI table, ignoring rest of table\n", *p);
+			return -ENODEV;
+		}
+
+		p += ESI_DESC_SIZE(*p);
+	}
+
+	esi_systab = systab;
+	return 0;
+}
+
+
+int ia64_esi_call (efi_guid_t guid, struct ia64_sal_retval *isrvp,
+		   enum esi_proc_type proc_type, u64 func,
+		   u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6,
+		   u64 arg7)
+{
+	struct ia64_fpreg fr[6];
+	unsigned long flags = 0;
+	int i;
+	char *p;
+
+	if (!esi_systab)
+		return -1;
+
+	p = (char *) (esi_systab + 1);
+	for (i = 0; i < esi_systab->entry_count; i++) {
+		if (*p == ESI_DESC_ENTRY_POINT) {
+			ia64_esi_desc_entry_point_t *esi = (void *)p;
+			if (!efi_guidcmp(guid, esi->guid)) {
+				ia64_sal_handler esi_proc;
+				struct pdesc pdesc;
+
+				pdesc.addr = __va(esi->esi_proc);
+				pdesc.gp = __va(esi->gp);
+
+				esi_proc = (ia64_sal_handler) &pdesc;
+
+				ia64_save_scratch_fpregs(fr);
+				if (proc_type == ESI_PROC_SERIALIZED)
+					spin_lock_irqsave(&sal_lock, flags);
+				else if (proc_type == ESI_PROC_MP_SAFE)
+					local_irq_save(flags);
+				else
+					preempt_disable();
+				*isrvp = (*esi_proc)(func, arg1, arg2, arg3,
+						     arg4, arg5, arg6, arg7);
+				if (proc_type == ESI_PROC_SERIALIZED)
+					spin_unlock_irqrestore(&sal_lock,
+							       flags);
+				else if (proc_type == ESI_PROC_MP_SAFE)
+					local_irq_restore(flags);
+				else
+					preempt_enable();
+				ia64_load_scratch_fpregs(fr);
+				return 0;
+			}
+		}
+		p += ESI_DESC_SIZE(*p);
+	}
+	return -1;
+}
+EXPORT_SYMBOL_GPL(ia64_esi_call);
+
+int ia64_esi_call_phys (efi_guid_t guid, struct ia64_sal_retval *isrvp,
+			u64 func, u64 arg1, u64 arg2, u64 arg3, u64 arg4,
+			u64 arg5, u64 arg6, u64 arg7)
+{
+	struct ia64_fpreg fr[6];
+	unsigned long flags;
+	u64 esi_params[8];
+	char *p;
+	int i;
+
+	if (!esi_systab)
+		return -1;
+
+	p = (char *) (esi_systab + 1);
+	for (i = 0; i < esi_systab->entry_count; i++) {
+		if (*p == ESI_DESC_ENTRY_POINT) {
+			ia64_esi_desc_entry_point_t *esi = (void *)p;
+			if (!efi_guidcmp(guid, esi->guid)) {
+				ia64_sal_handler esi_proc;
+				struct pdesc pdesc;
+
+				pdesc.addr = (void *)esi->esi_proc;
+				pdesc.gp = (void *)esi->gp;
+
+				esi_proc = (ia64_sal_handler) &pdesc;
+
+				esi_params[0] = func;
+				esi_params[1] = arg1;
+				esi_params[2] = arg2;
+				esi_params[3] = arg3;
+				esi_params[4] = arg4;
+				esi_params[5] = arg5;
+				esi_params[6] = arg6;
+				esi_params[7] = arg7;
+				ia64_save_scratch_fpregs(fr);
+				spin_lock_irqsave(&sal_lock, flags);
+				*isrvp = esi_call_phys(esi_proc, esi_params);
+				spin_unlock_irqrestore(&sal_lock, flags);
+				ia64_load_scratch_fpregs(fr);
+				return 0;
+			}
+		}
+		p += ESI_DESC_SIZE(*p);
+	}
+	return -1;
+}
+EXPORT_SYMBOL_GPL(ia64_esi_call_phys);
+
+static void __exit esi_exit (void)
+{
+}
+
+module_init(esi_init);
+module_exit(esi_exit);	/* makes module removable... */

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно