Browse Source

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux

Pull powerpc updates from Michael Ellerman:
 "Here's a first pull request for powerpc updates for 3.18.

  The bulk of the additions are for the "cxl" driver, for IBM's Coherent
  Accelerator Processor Interface (CAPI).  Most of it's in drivers/misc,
  which Greg & Arnd maintain, Greg said he was happy for us to take it
  through our tree.

  There's the usual minor cleanups and fixes, including a bit of noise
  in drivers from some of those.  A bunch of updates to our EEH code,
  which has been getting more testing.  Several nice speedups from
  Anton, including 20% in clear_page().

  And a bunch of updates for freescale from Scott"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux: (130 commits)
  cxl: Fix afu_read() not doing finish_wait() on signal or non-blocking
  cxl: Add documentation for userspace APIs
  cxl: Add driver to Kbuild and Makefiles
  cxl: Add userspace header file
  cxl: Driver code for powernv PCIe based cards for userspace access
  cxl: Add base builtin support
  powerpc/mm: Add hooks for cxl
  powerpc/opal: Add PHB to cxl mode call
  powerpc/mm: Add new hash_page_mm()
  powerpc/powerpc: Add new PCIe functions for allocating cxl interrupts
  cxl: Add new header for call backs and structs
  powerpc/powernv: Split out set MSI IRQ chip code
  powerpc/mm: Export mmu_kernel_ssize and mmu_linear_psize
  powerpc/msi: Improve IRQ bitmap allocator
  powerpc/cell: Make spu_flush_all_slbs() generic
  powerpc/cell: Move data segment faulting code out of cell platform
  powerpc/cell: Move spu_handle_mm_fault() out of cell platform
  powerpc/pseries: Use new defines when calling H_SET_MODE
  powerpc: Update contact info in Documentation files
  powerpc/perf/hv-24x7: Simplify catalog_read()
  ...
Linus Torvalds 10 years ago
parent
commit
fd9879b9bb
100 changed files with 2836 additions and 1070 deletions
  1. 3 3
      Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
  2. 6 6
      Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
  3. 129 0
      Documentation/ABI/testing/sysfs-class-cxl
  4. 27 0
      Documentation/devicetree/bindings/pci/fsl,pci.txt
  5. 1 0
      Documentation/ioctl/ioctl-number.txt
  6. 2 0
      Documentation/powerpc/00-INDEX
  7. 379 0
      Documentation/powerpc/cxl.txt
  8. 12 0
      MAINTAINERS
  9. 10 1
      arch/powerpc/Kconfig
  10. 1 0
      arch/powerpc/Makefile
  11. 5 0
      arch/powerpc/boot/Makefile
  12. 2 2
      arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
  13. 2 2
      arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
  14. 48 0
      arch/powerpc/boot/dts/t1040rdb.dts
  15. 48 0
      arch/powerpc/boot/dts/t1042rdb.dts
  16. 57 0
      arch/powerpc/boot/dts/t1042rdb_pi.dts
  17. 156 0
      arch/powerpc/boot/dts/t104xrdb.dtsi
  18. 1 0
      arch/powerpc/configs/cell_defconfig
  19. 1 0
      arch/powerpc/configs/celleb_defconfig
  20. 2 0
      arch/powerpc/configs/corenet32_smp_defconfig
  21. 10 36
      arch/powerpc/configs/corenet64_smp_defconfig
  22. 1 0
      arch/powerpc/configs/g5_defconfig
  23. 1 0
      arch/powerpc/configs/maple_defconfig
  24. 3 1
      arch/powerpc/configs/mpc85xx_defconfig
  25. 3 1
      arch/powerpc/configs/mpc85xx_smp_defconfig
  26. 3 0
      arch/powerpc/configs/mpc86xx_defconfig
  27. 1 0
      arch/powerpc/configs/pasemi_defconfig
  28. 1 0
      arch/powerpc/configs/ppc64_defconfig
  29. 0 1
      arch/powerpc/include/asm/bug.h
  30. 29 0
      arch/powerpc/include/asm/copro.h
  31. 1 0
      arch/powerpc/include/asm/dma-mapping.h
  32. 24 15
      arch/powerpc/include/asm/eeh.h
  33. 0 1
      arch/powerpc/include/asm/hydra.h
  34. 0 5
      arch/powerpc/include/asm/irq.h
  35. 0 1
      arch/powerpc/include/asm/kexec.h
  36. 0 2
      arch/powerpc/include/asm/machdep.h
  37. 10 0
      arch/powerpc/include/asm/mmu-hash64.h
  38. 42 3
      arch/powerpc/include/asm/opal.h
  39. 31 12
      arch/powerpc/include/asm/page_64.h
  40. 2 4
      arch/powerpc/include/asm/pgtable-ppc32.h
  41. 1 1
      arch/powerpc/include/asm/pgtable-ppc64-4k.h
  42. 3 3
      arch/powerpc/include/asm/pgtable-ppc64.h
  43. 3 0
      arch/powerpc/include/asm/pgtable.h
  44. 6 6
      arch/powerpc/include/asm/plpar_wrappers.h
  45. 31 0
      arch/powerpc/include/asm/pnv-pci.h
  46. 0 2
      arch/powerpc/include/asm/prom.h
  47. 2 1
      arch/powerpc/include/asm/reg.h
  48. 0 1
      arch/powerpc/include/asm/rio.h
  49. 2 3
      arch/powerpc/include/asm/spu.h
  50. 62 0
      arch/powerpc/include/asm/sstep.h
  51. 0 4
      arch/powerpc/include/asm/tsi108.h
  52. 0 1
      arch/powerpc/include/asm/udbg.h
  53. 98 14
      arch/powerpc/include/asm/word-at-a-time.h
  54. 1 0
      arch/powerpc/include/asm/xics.h
  55. 3 0
      arch/powerpc/kernel/Makefile
  56. 1 0
      arch/powerpc/kernel/crash_dump.c
  57. 6 2
      arch/powerpc/kernel/dma-swiotlb.c
  58. 43 4
      arch/powerpc/kernel/dma.c
  59. 210 59
      arch/powerpc/kernel/eeh.c
  60. 91 15
      arch/powerpc/kernel/eeh_driver.c
  61. 19 4
      arch/powerpc/kernel/eeh_pe.c
  62. 40 1
      arch/powerpc/kernel/eeh_sysfs.c
  63. 56 94
      arch/powerpc/kernel/head_8xx.S
  64. 1 1
      arch/powerpc/kernel/hw_breakpoint.c
  65. 1 1
      arch/powerpc/kernel/ibmebus.c
  66. 1 1
      arch/powerpc/kernel/idle_power7.S
  67. 3 3
      arch/powerpc/kernel/irq.c
  68. 1 1
      arch/powerpc/kernel/legacy_serial.c
  69. 14 17
      arch/powerpc/kernel/module_32.c
  70. 17 19
      arch/powerpc/kernel/module_64.c
  71. 1 1
      arch/powerpc/kernel/nvram_64.c
  72. 1 1
      arch/powerpc/kernel/of_platform.c
  73. 1 2
      arch/powerpc/kernel/pci-common.c
  74. 1 1
      arch/powerpc/kernel/pci_of_scan.c
  75. 13 179
      arch/powerpc/kernel/ppc_ksyms.c
  76. 61 0
      arch/powerpc/kernel/ppc_ksyms_32.c
  77. 2 0
      arch/powerpc/kernel/process.c
  78. 8 5
      arch/powerpc/kernel/prom.c
  79. 6 16
      arch/powerpc/kernel/prom_init_check.sh
  80. 1 1
      arch/powerpc/kernel/ptrace.c
  81. 1 1
      arch/powerpc/kernel/rtasd.c
  82. 4 3
      arch/powerpc/kernel/setup-common.c
  83. 1 1
      arch/powerpc/kernel/setup_32.c
  84. 21 11
      arch/powerpc/kernel/setup_64.c
  85. 9 2
      arch/powerpc/kernel/smp.c
  86. 3 2
      arch/powerpc/kernel/time.c
  87. 1 1
      arch/powerpc/lib/Makefile
  88. 1 1
      arch/powerpc/lib/feature-fixups.c
  89. 39 0
      arch/powerpc/lib/ppc_ksyms.c
  90. 636 360
      arch/powerpc/lib/sstep.c
  91. 1 0
      arch/powerpc/mm/Makefile
  92. 62 7
      arch/powerpc/mm/copro_fault.c
  93. 28 15
      arch/powerpc/mm/fault.c
  94. 5 1
      arch/powerpc/mm/hash_native_64.c
  95. 84 76
      arch/powerpc/mm/hash_utils_64.c
  96. 2 2
      arch/powerpc/mm/init_32.c
  97. 0 3
      arch/powerpc/mm/init_64.c
  98. 62 6
      arch/powerpc/mm/mem.c
  99. 10 17
      arch/powerpc/mm/numa.c
  100. 1 1
      arch/powerpc/mm/pgtable.c

+ 3 - 3
Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7

@@ -1,6 +1,6 @@
 What:		/sys/bus/event_source/devices/hv_24x7/interface/catalog
 Date:		February 2014
-Contact:	Cody P Schafer <cody@linux.vnet.ibm.com>
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
 Description:
 		Provides access to the binary "24x7 catalog" provided by the
 		hypervisor on POWER7 and 8 systems. This catalog lists events
@@ -10,14 +10,14 @@ Description:
 
 What:		/sys/bus/event_source/devices/hv_24x7/interface/catalog_length
 Date:		February 2014
-Contact:	Cody P Schafer <cody@linux.vnet.ibm.com>
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
 Description:
 		A number equal to the length in bytes of the catalog. This is
 		also extractable from the provided binary "catalog" sysfs entry.
 
 What:		/sys/bus/event_source/devices/hv_24x7/interface/catalog_version
 Date:		February 2014
-Contact:	Cody P Schafer <cody@linux.vnet.ibm.com>
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
 Description:
 		Exposes the "version" field of the 24x7 catalog. This is also
 		extractable from the provided binary "catalog" sysfs entry.

+ 6 - 6
Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci

@@ -1,6 +1,6 @@
 What:		/sys/bus/event_source/devices/hv_gpci/interface/collect_privileged
 Date:		February 2014
-Contact:	Cody P Schafer <cody@linux.vnet.ibm.com>
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
 Description:
 		'0' if the hypervisor is configured to forbid access to event
 		counters being accumulated by other guests and to physical
@@ -9,35 +9,35 @@ Description:
 
 What:		/sys/bus/event_source/devices/hv_gpci/interface/ga
 Date:		February 2014
-Contact:	Cody P Schafer <cody@linux.vnet.ibm.com>
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
 Description:
 		0 or 1. Indicates whether we have access to "GA" events (listed
 		in arch/powerpc/perf/hv-gpci.h).
 
 What:		/sys/bus/event_source/devices/hv_gpci/interface/expanded
 Date:		February 2014
-Contact:	Cody P Schafer <cody@linux.vnet.ibm.com>
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
 Description:
 		0 or 1. Indicates whether we have access to "EXPANDED" events (listed
 		in arch/powerpc/perf/hv-gpci.h).
 
 What:		/sys/bus/event_source/devices/hv_gpci/interface/lab
 Date:		February 2014
-Contact:	Cody P Schafer <cody@linux.vnet.ibm.com>
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
 Description:
 		0 or 1. Indicates whether we have access to "LAB" events (listed
 		in arch/powerpc/perf/hv-gpci.h).
 
 What:		/sys/bus/event_source/devices/hv_gpci/interface/version
 Date:		February 2014
-Contact:	Cody P Schafer <cody@linux.vnet.ibm.com>
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
 Description:
 		A number indicating the version of the gpci interface that the
 		hypervisor reports supporting.
 
 What:		/sys/bus/event_source/devices/hv_gpci/interface/kernel_version
 Date:		February 2014
-Contact:	Cody P Schafer <cody@linux.vnet.ibm.com>
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
 Description:
 		A number indicating the latest version of the gpci interface
 		that the kernel is aware of.

+ 129 - 0
Documentation/ABI/testing/sysfs-class-cxl

@@ -0,0 +1,129 @@
+Slave contexts (eg. /sys/class/cxl/afu0.0s):
+
+What:           /sys/class/cxl/<afu>/irqs_max
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read/write
+                Decimal value of maximum number of interrupts that can be
+                requested by userspace.  The default on probe is the maximum
+                that hardware can support (eg. 2037). Write values will limit
+                userspace applications to that many userspace interrupts. Must
+                be >= irqs_min.
+
+What:           /sys/class/cxl/<afu>/irqs_min
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Decimal value of the minimum number of interrupts that
+                userspace must request on a CXL_START_WORK ioctl. Userspace may
+                omit the num_interrupts field in the START_WORK IOCTL to get
+                this minimum automatically.
+
+What:           /sys/class/cxl/<afu>/mmio_size
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Decimal value of the size of the MMIO space that may be mmaped
+                by userspace.
+
+What:           /sys/class/cxl/<afu>/modes_supported
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                List of the modes this AFU supports. One per line.
+                Valid entries are: "dedicated_process" and "afu_directed"
+
+What:           /sys/class/cxl/<afu>/mode
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read/write
+                The current mode the AFU is using. Will be one of the modes
+                given in modes_supported. Writing will change the mode
+                provided that no user contexts are attached.
+
+
+What:           /sys/class/cxl/<afu>/prefault_mode
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read/write
+                Set the mode for prefaulting in segments into the segment table
+                when performing the START_WORK ioctl. Possible values:
+                        none: No prefaulting (default)
+                        work_element_descriptor: Treat the work element
+                                 descriptor as an effective address and
+                                 prefault what it points to.
+                        all: all segments process calling START_WORK maps.
+
+What:           /sys/class/cxl/<afu>/reset
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    write only
+                Writing 1 here will reset the AFU provided there are not
+                contexts active on the AFU.
+
+What:           /sys/class/cxl/<afu>/api_version
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Decimal value of the current version of the kernel/user API.
+
+What:           /sys/class/cxl/<afu>/api_version_com
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Decimal value of the the lowest version of the userspace API
+                this this kernel supports.
+
+
+
+Master contexts (eg. /sys/class/cxl/afu0.0m)
+
+What:           /sys/class/cxl/<afu>m/mmio_size
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Decimal value of the size of the MMIO space that may be mmaped
+                by userspace. This includes all slave contexts space also.
+
+What:           /sys/class/cxl/<afu>m/pp_mmio_len
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Decimal value of the Per Process MMIO space length.
+
+What:           /sys/class/cxl/<afu>m/pp_mmio_off
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Decimal value of the Per Process MMIO space offset.
+
+
+Card info (eg. /sys/class/cxl/card0)
+
+What:           /sys/class/cxl/<card>/caia_version
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Identifies the CAIA Version the card implements.
+
+What:           /sys/class/cxl/<card>/psl_version
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Identifies the revision level of the PSL.
+
+What:           /sys/class/cxl/<card>/base_image
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Identifies the revision level of the base image for devices
+                that support loadable PSLs. For FPGAs this field identifies
+                the image contained in the on-adapter flash which is loaded
+                during the initial program load.
+
+What:           /sys/class/cxl/<card>/image_loaded
+Date:           September 2014
+Contact:        linuxppc-dev@lists.ozlabs.org
+Description:    read only
+                Will return "user" or "factory" depending on the image loaded
+                onto the card.

+ 27 - 0
Documentation/devicetree/bindings/pci/fsl,pci.txt

@@ -0,0 +1,27 @@
+* Bus Enumeration by Freescale PCI-X Agent
+
+Typically any Freescale PCI-X bridge hardware strapped into Agent mode
+is prevented from enumerating the bus. The PrPMC form-factor requires
+all mezzanines to be PCI-X Agents, but one per system may still
+enumerate the bus.
+
+The property defined below will allow a PCI-X bridge to be used for bus
+enumeration despite being strapped into Agent mode.
+
+Required properties:
+- fsl,pci-agent-force-enum : There is no value associated with this
+  property. The property itself is treated as a boolean.
+
+Example:
+
+	/* PCI-X bridge known to be PrPMC Monarch */
+	pci0: pci@ef008000 {
+		fsl,pci-agent-force-enum;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
+		device_type = "pci";
+		...
+		...
+	};

+ 1 - 0
Documentation/ioctl/ioctl-number.txt

@@ -313,6 +313,7 @@ Code  Seq#(hex)	Include File		Comments
 0xB1	00-1F	PPPoX			<mailto:mostrows@styx.uwaterloo.ca>
 0xB3	00	linux/mmc/ioctl.h
 0xC0	00-0F	linux/usb/iowarrior.h
+0xCA	00-0F	uapi/misc/cxl.h
 0xCB	00-1F	CBM serial IEC bus	in development:
 					<mailto:michael.klein@puffin.lb.shuttle.de>
 0xCD	01	linux/reiserfs_fs.h

+ 2 - 0
Documentation/powerpc/00-INDEX

@@ -11,6 +11,8 @@ bootwrapper.txt
 cpu_features.txt
 	- info on how we support a variety of CPUs with minimal compile-time
 	options.
+cxl.txt
+	- Overview of the CXL driver.
 eeh-pci-error-recovery.txt
 	- info on PCI Bus EEH Error Recovery
 firmware-assisted-dump.txt

+ 379 - 0
Documentation/powerpc/cxl.txt

@@ -0,0 +1,379 @@
+Coherent Accelerator Interface (CXL)
+====================================
+
+Introduction
+============
+
+    The coherent accelerator interface is designed to allow the
+    coherent connection of accelerators (FPGAs and other devices) to a
+    POWER system. These devices need to adhere to the Coherent
+    Accelerator Interface Architecture (CAIA).
+
+    IBM refers to this as the Coherent Accelerator Processor Interface
+    or CAPI. In the kernel it's referred to by the name CXL to avoid
+    confusion with the ISDN CAPI subsystem.
+
+    Coherent in this context means that the accelerator and CPUs can
+    both access system memory directly and with the same effective
+    addresses.
+
+
+Hardware overview
+=================
+
+          POWER8               FPGA
+       +----------+        +---------+
+       |          |        |         |
+       |   CPU    |        |   AFU   |
+       |          |        |         |
+       |          |        |         |
+       |          |        |         |
+       +----------+        +---------+
+       |   PHB    |        |         |
+       |   +------+        |   PSL   |
+       |   | CAPP |<------>|         |
+       +---+------+  PCIE  +---------+
+
+    The POWER8 chip has a Coherently Attached Processor Proxy (CAPP)
+    unit which is part of the PCIe Host Bridge (PHB). This is managed
+    by Linux by calls into OPAL. Linux doesn't directly program the
+    CAPP.
+
+    The FPGA (or coherently attached device) consists of two parts.
+    The POWER Service Layer (PSL) and the Accelerator Function Unit
+    (AFU). The AFU is used to implement specific functionality behind
+    the PSL. The PSL, among other things, provides memory address
+    translation services to allow each AFU direct access to userspace
+    memory.
+
+    The AFU is the core part of the accelerator (eg. the compression,
+    crypto etc function). The kernel has no knowledge of the function
+    of the AFU. Only userspace interacts directly with the AFU.
+
+    The PSL provides the translation and interrupt services that the
+    AFU needs. This is what the kernel interacts with. For example, if
+    the AFU needs to read a particular effective address, it sends
+    that address to the PSL, the PSL then translates it, fetches the
+    data from memory and returns it to the AFU. If the PSL has a
+    translation miss, it interrupts the kernel and the kernel services
+    the fault. The context to which this fault is serviced is based on
+    who owns that acceleration function.
+
+
+AFU Modes
+=========
+
+    There are two programming modes supported by the AFU. Dedicated
+    and AFU directed. AFU may support one or both modes.
+
+    When using dedicated mode only one MMU context is supported. In
+    this mode, only one userspace process can use the accelerator at
+    time.
+
+    When using AFU directed mode, up to 16K simultaneous contexts can
+    be supported. This means up to 16K simultaneous userspace
+    applications may use the accelerator (although specific AFUs may
+    support fewer). In this mode, the AFU sends a 16 bit context ID
+    with each of its requests. This tells the PSL which context is
+    associated with each operation. If the PSL can't translate an
+    operation, the ID can also be accessed by the kernel so it can
+    determine the userspace context associated with an operation.
+
+
+MMIO space
+==========
+
+    A portion of the accelerator MMIO space can be directly mapped
+    from the AFU to userspace. Either the whole space can be mapped or
+    just a per context portion. The hardware is self describing, hence
+    the kernel can determine the offset and size of the per context
+    portion.
+
+
+Interrupts
+==========
+
+    AFUs may generate interrupts that are destined for userspace. These
+    are received by the kernel as hardware interrupts and passed onto
+    userspace by a read syscall documented below.
+
+    Data storage faults and error interrupts are handled by the kernel
+    driver.
+
+
+Work Element Descriptor (WED)
+=============================
+
+    The WED is a 64-bit parameter passed to the AFU when a context is
+    started. Its format is up to the AFU hence the kernel has no
+    knowledge of what it represents. Typically it will be the
+    effective address of a work queue or status block where the AFU
+    and userspace can share control and status information.
+
+
+
+
+User API
+========
+
+    For AFUs operating in AFU directed mode, two character device
+    files will be created. /dev/cxl/afu0.0m will correspond to a
+    master context and /dev/cxl/afu0.0s will correspond to a slave
+    context. Master contexts have access to the full MMIO space an
+    AFU provides. Slave contexts have access to only the per process
+    MMIO space an AFU provides.
+
+    For AFUs operating in dedicated process mode, the driver will
+    only create a single character device per AFU called
+    /dev/cxl/afu0.0d. This will have access to the entire MMIO space
+    that the AFU provides (like master contexts in AFU directed).
+
+    The types described below are defined in include/uapi/misc/cxl.h
+
+    The following file operations are supported on both slave and
+    master devices.
+
+
+open
+----
+
+    Opens the device and allocates a file descriptor to be used with
+    the rest of the API.
+
+    A dedicated mode AFU only has one context and only allows the
+    device to be opened once.
+
+    An AFU directed mode AFU can have many contexts, the device can be
+    opened once for each context that is available.
+
+    When all available contexts are allocated the open call will fail
+    and return -ENOSPC.
+
+    Note: IRQs need to be allocated for each context, which may limit
+          the number of contexts that can be created, and therefore
+          how many times the device can be opened. The POWER8 CAPP
+          supports 2040 IRQs and 3 are used by the kernel, so 2037 are
+          left. If 1 IRQ is needed per context, then only 2037
+          contexts can be allocated. If 4 IRQs are needed per context,
+          then only 2037/4 = 509 contexts can be allocated.
+
+
+ioctl
+-----
+
+    CXL_IOCTL_START_WORK:
+        Starts the AFU context and associates it with the current
+        process. Once this ioctl is successfully executed, all memory
+        mapped into this process is accessible to this AFU context
+        using the same effective addresses. No additional calls are
+        required to map/unmap memory. The AFU memory context will be
+        updated as userspace allocates and frees memory. This ioctl
+        returns once the AFU context is started.
+
+        Takes a pointer to a struct cxl_ioctl_start_work:
+
+                struct cxl_ioctl_start_work {
+                        __u64 flags;
+                        __u64 work_element_descriptor;
+                        __u64 amr;
+                        __s16 num_interrupts;
+                        __s16 reserved1;
+                        __s32 reserved2;
+                        __u64 reserved3;
+                        __u64 reserved4;
+                        __u64 reserved5;
+                        __u64 reserved6;
+                };
+
+            flags:
+                Indicates which optional fields in the structure are
+                valid.
+
+            work_element_descriptor:
+                The Work Element Descriptor (WED) is a 64-bit argument
+                defined by the AFU. Typically this is an effective
+                address pointing to an AFU specific structure
+                describing what work to perform.
+
+            amr:
+                Authority Mask Register (AMR), same as the powerpc
+                AMR. This field is only used by the kernel when the
+                corresponding CXL_START_WORK_AMR value is specified in
+                flags. If not specified the kernel will use a default
+                value of 0.
+
+            num_interrupts:
+                Number of userspace interrupts to request. This field
+                is only used by the kernel when the corresponding
+                CXL_START_WORK_NUM_IRQS value is specified in flags.
+                If not specified the minimum number required by the
+                AFU will be allocated. The min and max number can be
+                obtained from sysfs.
+
+            reserved fields:
+                For ABI padding and future extensions
+
+    CXL_IOCTL_GET_PROCESS_ELEMENT:
+        Get the current context id, also known as the process element.
+        The value is returned from the kernel as a __u32.
+
+
+mmap
+----
+
+    An AFU may have an MMIO space to facilitate communication with the
+    AFU. If it does, the MMIO space can be accessed via mmap. The size
+    and contents of this area are specific to the particular AFU. The
+    size can be discovered via sysfs.
+
+    In AFU directed mode, master contexts are allowed to map all of
+    the MMIO space and slave contexts are allowed to only map the per
+    process MMIO space associated with the context. In dedicated
+    process mode the entire MMIO space can always be mapped.
+
+    This mmap call must be done after the START_WORK ioctl.
+
+    Care should be taken when accessing MMIO space. Only 32 and 64-bit
+    accesses are supported by POWER8. Also, the AFU will be designed
+    with a specific endianness, so all MMIO accesses should consider
+    endianness (recommend endian(3) variants like: le64toh(),
+    be64toh() etc). These endian issues equally apply to shared memory
+    queues the WED may describe.
+
+
+read
+----
+
+    Reads events from the AFU. Blocks if no events are pending
+    (unless O_NONBLOCK is supplied). Returns -EIO in the case of an
+    unrecoverable error or if the card is removed.
+
+    read() will always return an integral number of events.
+
+    The buffer passed to read() must be at least 4K bytes.
+
+    The result of the read will be a buffer of one or more events,
+    each event is of type struct cxl_event, of varying size.
+
+            struct cxl_event {
+                    struct cxl_event_header header;
+                    union {
+                            struct cxl_event_afu_interrupt irq;
+                            struct cxl_event_data_storage fault;
+                            struct cxl_event_afu_error afu_error;
+                    };
+            };
+
+    The struct cxl_event_header is defined as:
+
+            struct cxl_event_header {
+                    __u16 type;
+                    __u16 size;
+                    __u16 process_element;
+                    __u16 reserved1;
+            };
+
+        type:
+            This defines the type of event. The type determines how
+            the rest of the event is structured. These types are
+            described below and defined by enum cxl_event_type.
+
+        size:
+            This is the size of the event in bytes including the
+            struct cxl_event_header. The start of the next event can
+            be found at this offset from the start of the current
+            event.
+
+        process_element:
+            Context ID of the event.
+
+        reserved field:
+            For future extensions and padding.
+
+    If the event type is CXL_EVENT_AFU_INTERRUPT then the event
+    structure is defined as:
+
+            struct cxl_event_afu_interrupt {
+                    __u16 flags;
+                    __u16 irq; /* Raised AFU interrupt number */
+                    __u32 reserved1;
+            };
+
+        flags:
+            These flags indicate which optional fields are present
+            in this struct. Currently all fields are mandatory.
+
+        irq:
+            The IRQ number sent by the AFU.
+
+        reserved field:
+            For future extensions and padding.
+
+    If the event type is CXL_EVENT_DATA_STORAGE then the event
+    structure is defined as:
+
+            struct cxl_event_data_storage {
+                    __u16 flags;
+                    __u16 reserved1;
+                    __u32 reserved2;
+                    __u64 addr;
+                    __u64 dsisr;
+                    __u64 reserved3;
+            };
+
+        flags:
+            These flags indicate which optional fields are present in
+            this struct. Currently all fields are mandatory.
+
+        address:
+            The address that the AFU unsuccessfully attempted to
+            access. Valid accesses will be handled transparently by the
+            kernel but invalid accesses will generate this event.
+
+        dsisr:
+            This field gives information on the type of fault. It is a
+            copy of the DSISR from the PSL hardware when the address
+            fault occurred. The form of the DSISR is as defined in the
+            CAIA.
+
+        reserved fields:
+            For future extensions
+
+    If the event type is CXL_EVENT_AFU_ERROR then the event structure
+    is defined as:
+
+            struct cxl_event_afu_error {
+                    __u16 flags;
+                    __u16 reserved1;
+                    __u32 reserved2;
+                    __u64 error;
+            };
+
+        flags:
+            These flags indicate which optional fields are present in
+            this struct. Currently all fields are Mandatory.
+
+        error:
+            Error status from the AFU. Defined by the AFU.
+
+        reserved fields:
+            For future extensions and padding
+
+Sysfs Class
+===========
+
+    A cxl sysfs class is added under /sys/class/cxl to facilitate
+    enumeration and tuning of the accelerators. Its layout is
+    described in Documentation/ABI/testing/sysfs-class-cxl
+
+Udev rules
+==========
+
+    The following udev rules could be used to create a symlink to the
+    most logical chardev to use in any programming mode (afuX.Yd for
+    dedicated, afuX.Ys for afu directed), since the API is virtually
+    identical for each:
+
+	SUBSYSTEM=="cxl", ATTRS{mode}=="dedicated_process", SYMLINK="cxl/%b"
+	SUBSYSTEM=="cxl", ATTRS{mode}=="afu_directed", \
+	                  KERNEL=="afu[0-9]*.[0-9]*s", SYMLINK="cxl/%b"

+ 12 - 0
MAINTAINERS

@@ -2761,6 +2761,18 @@ W:	http://www.chelsio.com
 S:	Supported
 F:	drivers/net/ethernet/chelsio/cxgb4vf/
 
+CXL (IBM Coherent Accelerator Processor Interface CAPI) DRIVER
+M:	Ian Munsie <imunsie@au1.ibm.com>
+M:	Michael Neuling <mikey@neuling.org>
+L:	linuxppc-dev@lists.ozlabs.org
+S:	Supported
+F:	drivers/misc/cxl/
+F:	include/misc/cxl.h
+F:	include/uapi/misc/cxl.h
+F:	Documentation/powerpc/cxl.txt
+F:	Documentation/powerpc/cxl.txt
+F:	Documentation/ABI/testing/sysfs-class-cxl
+
 STMMAC ETHERNET DRIVER
 M:	Giuseppe Cavallaro <peppe.cavallaro@st.com>
 L:	netdev@vger.kernel.org

+ 10 - 1
arch/powerpc/Kconfig

@@ -147,6 +147,7 @@ config PPC
 	select ARCH_USE_CMPXCHG_LOCKREF if PPC64
 	select HAVE_ARCH_AUDITSYSCALL
 	select ARCH_SUPPORTS_ATOMIC_RMW
+	select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
 
 config GENERIC_CSUM
 	def_bool CPU_LITTLE_ENDIAN
@@ -182,7 +183,7 @@ config SCHED_OMIT_FRAME_POINTER
 
 config ARCH_MAY_HAVE_PC_FDC
 	bool
-	default !PPC_PSERIES || PCI
+	default PCI
 
 config PPC_OF
 	def_bool y
@@ -287,6 +288,10 @@ config PPC_EMULATE_SSTEP
 	bool
 	default y if KPROBES || UPROBES || XMON || HAVE_HW_BREAKPOINT
 
+config ZONE_DMA32
+	bool
+	default y if PPC64
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
@@ -603,6 +608,10 @@ config PPC_SUBPAGE_PROT
 	  to set access permissions (read/write, readonly, or no access)
 	  on the 4k subpages of each 64k page.
 
+config PPC_COPRO_BASE
+	bool
+	default n
+
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
 	depends on PPC64 && SMP

+ 1 - 0
arch/powerpc/Makefile

@@ -135,6 +135,7 @@ CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4)
 CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5)
 CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6)
 CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7)
+CFLAGS-$(CONFIG_POWER8_CPU) += $(call cc-option,-mcpu=power8)
 
 # Altivec option not allowed with e500mc64 in GCC.
 ifeq ($(CONFIG_ALTIVEC),y)

+ 5 - 0
arch/powerpc/boot/Makefile

@@ -389,7 +389,12 @@ $(obj)/zImage:		$(addprefix $(obj)/, $(image-y))
 $(obj)/zImage.initrd:	$(addprefix $(obj)/, $(initrd-y))
 	@rm -f $@; ln $< $@
 
+# Only install the vmlinux
 install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y))
+	sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)"
+
+# Install the vmlinux and other built boot targets.
+zInstall: $(CONFIGURE) $(addprefix $(obj)/, $(image-y))
 	sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $^
 
 # anything not in $(targets)

+ 2 - 2
arch/powerpc/boot/dts/fsl/t2081si-post.dtsi

@@ -410,7 +410,7 @@
 /include/ "qoriq-gpio-3.dtsi"
 /include/ "qoriq-usb2-mph-0.dtsi"
 	usb0: usb@210000 {
-		compatible = "fsl-usb2-mph-v2.4", "fsl-usb2-mph";
+		compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph";
 		fsl,iommu-parent = <&pamu1>;
 		fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
 		phy_type = "utmi";
@@ -418,7 +418,7 @@
 	};
 /include/ "qoriq-usb2-dr-0.dtsi"
 	usb1: usb@211000 {
-		compatible = "fsl-usb2-dr-v2.4", "fsl-usb2-dr";
+		compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr";
 		fsl,iommu-parent = <&pamu1>;
 		fsl,liodn-reg = <&guts 0x524>; /* USB1LIODNR */
 		dr_mode = "host";

+ 2 - 2
arch/powerpc/boot/dts/fsl/t4240si-post.dtsi

@@ -498,13 +498,13 @@
 /include/ "qoriq-gpio-3.dtsi"
 /include/ "qoriq-usb2-mph-0.dtsi"
 		usb0: usb@210000 {
-			compatible = "fsl-usb2-mph-v2.4", "fsl-usb2-mph";
+			compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph";
 			phy_type = "utmi";
 			port0;
 		};
 /include/ "qoriq-usb2-dr-0.dtsi"
 		usb1: usb@211000 {
-			compatible = "fsl-usb2-dr-v2.4", "fsl-usb2-dr";
+			compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr";
 			dr_mode = "host";
 			phy_type = "utmi";
 		};

+ 48 - 0
arch/powerpc/boot/dts/t1040rdb.dts

@@ -0,0 +1,48 @@
+/*
+ * T1040RDB Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/t104xsi-pre.dtsi"
+/include/ "t104xrdb.dtsi"
+
+/ {
+	model = "fsl,T1040RDB";
+	compatible = "fsl,T1040RDB";
+	ifc: localbus@ffe124000 {
+		cpld@3,0 {
+			compatible = "fsl,t1040rdb-cpld";
+		};
+	};
+};
+
+/include/ "fsl/t1040si-post.dtsi"

+ 48 - 0
arch/powerpc/boot/dts/t1042rdb.dts

@@ -0,0 +1,48 @@
+/*
+ * T1042RDB Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/t104xsi-pre.dtsi"
+/include/ "t104xrdb.dtsi"
+
+/ {
+	model = "fsl,T1042RDB";
+	compatible = "fsl,T1042RDB";
+	ifc: localbus@ffe124000 {
+		cpld@3,0 {
+			compatible = "fsl,t1042rdb-cpld";
+		};
+	};
+};
+
+/include/ "fsl/t1042si-post.dtsi"

+ 57 - 0
arch/powerpc/boot/dts/t1042rdb_pi.dts

@@ -0,0 +1,57 @@
+/*
+ * T1042RDB_PI Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "fsl/t104xsi-pre.dtsi"
+/include/ "t104xrdb.dtsi"
+
+/ {
+	model = "fsl,T1042RDB_PI";
+	compatible = "fsl,T1042RDB_PI";
+	ifc: localbus@ffe124000 {
+		cpld@3,0 {
+			compatible = "fsl,t1042rdb_pi-cpld";
+		};
+	};
+	soc: soc@ffe000000 {
+		i2c@118000 {
+			rtc@68 {
+				compatible = "dallas,ds1337";
+				reg = <0x68>;
+				interrupts = <0x2 0x1 0 0>;
+			};
+		};
+	};
+};
+
+/include/ "fsl/t1042si-post.dtsi"

+ 156 - 0
arch/powerpc/boot/dts/t104xrdb.dtsi

@@ -0,0 +1,156 @@
+/*
+ * T1040RDB/T1042RDB Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+
+	ifc: localbus@ffe124000 {
+		reg = <0xf 0xfe124000 0 0x2000>;
+		ranges = <0 0 0xf 0xe8000000 0x08000000
+			  2 0 0xf 0xff800000 0x00010000
+			  3 0 0xf 0xffdf0000 0x00008000>;
+
+		nor@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x8000000>;
+			bank-width = <2>;
+			device-width = <1>;
+		};
+
+		nand@2,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "fsl,ifc-nand";
+			reg = <0x2 0x0 0x10000>;
+		};
+
+		cpld@3,0 {
+			reg = <3 0 0x300>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+	};
+
+	dcsr: dcsr@f00000000 {
+		ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+	};
+
+	soc: soc@ffe000000 {
+		ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+		reg = <0xf 0xfe000000 0 0x00001000>;
+
+		spi@110000 {
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "micron,n25q512a";
+				reg = <0>;
+				spi-max-frequency = <10000000>; /* input clock */
+			};
+		};
+
+		i2c@118100 {
+			pca9546@77 {
+				compatible = "nxp,pca9546";
+				reg = <0x77>;
+				#address-cells = <1>;
+				#size-cells = <0>;
+			};
+		};
+
+	};
+
+	pci0: pcie@ffe240000 {
+		reg = <0xf 0xfe240000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci1: pcie@ffe250000 {
+		reg = <0xf 0xfe250000 0 0x10000>;
+		ranges = <0x02000000 0x0 0xe0000000 0xc 0x10000000 0x0 0x10000000
+			  0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci2: pcie@ffe260000 {
+		reg = <0xf 0xfe260000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+
+	pci3: pcie@ffe270000 {
+		reg = <0xf 0xfe270000 0 0x10000>;
+		ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+			  0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+		pcie@0 {
+			ranges = <0x02000000 0 0xe0000000
+				  0x02000000 0 0xe0000000
+				  0 0x10000000
+
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00010000>;
+		};
+	};
+};

+ 1 - 0
arch/powerpc/configs/cell_defconfig

@@ -18,6 +18,7 @@ CONFIG_OPROFILE=m
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
+# CONFIG_PPC_POWERNV is not set
 # CONFIG_PPC_PSERIES is not set
 # CONFIG_PPC_PMAC is not set
 CONFIG_PPC_PS3=y

+ 1 - 0
arch/powerpc/configs/celleb_defconfig

@@ -15,6 +15,7 @@ CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
+# CONFIG_PPC_POWERNV is not set
 # CONFIG_PPC_PSERIES is not set
 # CONFIG_PPC_PMAC is not set
 CONFIG_PPC_CELLEB=y

+ 2 - 0
arch/powerpc/configs/corenet32_smp_defconfig

@@ -165,6 +165,8 @@ CONFIG_NFS_FS=y
 CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_850=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_NLS_UTF8=m
 CONFIG_MAGIC_SYSRQ=y

+ 10 - 36
arch/powerpc/configs/corenet64_smp_defconfig

@@ -50,7 +50,6 @@ CONFIG_NET_IPIP=y
 CONFIG_IP_MROUTE=y
 CONFIG_IP_PIMSM_V1=y
 CONFIG_IP_PIMSM_V2=y
-CONFIG_ARPD=y
 CONFIG_INET_ESP=y
 # CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_LRO is not set
@@ -60,33 +59,17 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_MTD=y
-CONFIG_MTD_OF_PARTS=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLKDEVS=y
 CONFIG_MTD_BLOCK=y
 CONFIG_FTL=y
 CONFIG_MTD_CFI=y
-CONFIG_MTD_GEN_PROBE=y
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
 CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_M25P80=y
-CONFIG_MTD_CFI_UTIL=y
-CONFIG_MTD_NAND_ECC=y
 CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_IDS=y
 CONFIG_MTD_NAND_FSL_ELBC=y
 CONFIG_MTD_NAND_FSL_IFC=y
 CONFIG_MTD_UBI=y
-CONFIG_MTD_UBI_WL_THRESHOLD=4096
-CONFIG_MTD_UBI_BEB_RESERVE=1
-CONFIG_PROC_DEVICETREE=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=131072
@@ -102,6 +85,7 @@ CONFIG_INPUT_FF_MEMLESS=m
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 CONFIG_SERIO_LIBPS2=y
+CONFIG_PPC_EPAPR_HV_BYTECHAN=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_MANY_PORTS=y
@@ -115,7 +99,6 @@ CONFIG_SPI_GPIO=y
 CONFIG_SPI_FSL_SPI=y
 CONFIG_SPI_FSL_ESPI=y
 # CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=y
 CONFIG_USB_HID=m
 CONFIG_USB=y
 CONFIG_USB_MON=y
@@ -124,14 +107,17 @@ CONFIG_USB_EHCI_FSL=y
 CONFIG_USB_STORAGE=y
 CONFIG_MMC=y
 CONFIG_MMC_SDHCI=y
+CONFIG_EDAC=y
+CONFIG_EDAC_MM_EDAC=y
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_DS1307=y
 CONFIG_RTC_DRV_DS1374=y
 CONFIG_RTC_DRV_DS3232=y
-CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
 CONFIG_DMADEVICES=y
 CONFIG_FSL_DMA=y
+CONFIG_VIRT_DRIVERS=y
+CONFIG_FSL_HV_MANAGER=y
+CONFIG_FSL_CORENET_CF=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 CONFIG_ISO9660_FS=m
@@ -144,35 +130,24 @@ CONFIG_NTFS_FS=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y
-CONFIG_MISC_FILESYSTEMS=y
 CONFIG_JFFS2_FS=y
 CONFIG_JFFS2_FS_DEBUG=1
-CONFIG_JFFS2_FS_WRITEBUFFER=y
-CONFIG_JFFS2_ZLIB=y
-CONFIG_JFFS2_RTIME=y
 CONFIG_UBIFS_FS=y
-CONFIG_UBIFS_FS_XATTR=y
-CONFIG_UBIFS_FS_LZO=y
-CONFIG_UBIFS_FS_ZLIB=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
 CONFIG_NFSD=m
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_850=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_NLS_UTF8=m
 CONFIG_CRC_T10DIF=y
-CONFIG_CRC16=y
-CONFIG_ZLIB_DEFLATE=y
-CONFIG_LZO_COMPRESS=y
-CONFIG_LZO_DECOMPRESS=y
-CONFIG_CRYPTO_DEFLATE=y
-CONFIG_CRYPTO_LZO=y
+CONFIG_DEBUG_INFO=y
 CONFIG_FRAME_WARN=1024
-CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
 CONFIG_CRYPTO_NULL=y
 CONFIG_CRYPTO_PCBC=m
 CONFIG_CRYPTO_MD4=y
@@ -180,4 +155,3 @@ CONFIG_CRYPTO_SHA256=y
 CONFIG_CRYPTO_SHA512=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_CRYPTO_DEV_FSL_CAAM=y
-CONFIG_FSL_CORENET_CF=y

+ 1 - 0
arch/powerpc/configs/g5_defconfig

@@ -16,6 +16,7 @@ CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
+# CONFIG_PPC_POWERNV is not set
 # CONFIG_PPC_PSERIES is not set
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=y

+ 1 - 0
arch/powerpc/configs/maple_defconfig

@@ -16,6 +16,7 @@ CONFIG_MODULE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
 # CONFIG_BLK_DEV_BSG is not set
+# CONFIG_PPC_POWERNV is not set
 # CONFIG_PPC_PSERIES is not set
 # CONFIG_PPC_PMAC is not set
 CONFIG_PPC_MAPLE=y

+ 3 - 1
arch/powerpc/configs/mpc85xx_defconfig

@@ -213,7 +213,6 @@ CONFIG_RTC_DRV_DS1307=y
 CONFIG_RTC_DRV_DS1374=y
 CONFIG_RTC_DRV_DS3232=y
 CONFIG_RTC_DRV_CMOS=y
-CONFIG_RTC_DRV_DS1307=y
 CONFIG_DMADEVICES=y
 CONFIG_FSL_DMA=y
 # CONFIG_NET_DMA is not set
@@ -227,6 +226,9 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_850=y
+CONFIG_NLS_ISO8859_1=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y

+ 3 - 1
arch/powerpc/configs/mpc85xx_smp_defconfig

@@ -214,7 +214,6 @@ CONFIG_RTC_DRV_DS1307=y
 CONFIG_RTC_DRV_DS1374=y
 CONFIG_RTC_DRV_DS3232=y
 CONFIG_RTC_DRV_CMOS=y
-CONFIG_RTC_DRV_DS1307=y
 CONFIG_DMADEVICES=y
 CONFIG_FSL_DMA=y
 # CONFIG_NET_DMA is not set
@@ -228,6 +227,9 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_850=y
+CONFIG_NLS_ISO8859_1=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y

+ 3 - 0
arch/powerpc/configs/mpc86xx_defconfig

@@ -145,6 +145,9 @@ CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_850=y
+CONFIG_NLS_ISO8859_1=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_ADFS_FS=m

+ 1 - 0
arch/powerpc/configs/pasemi_defconfig

@@ -14,6 +14,7 @@ CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_MAC_PARTITION=y
+# CONFIG_PPC_POWERNV is not set
 # CONFIG_PPC_PSERIES is not set
 # CONFIG_PPC_PMAC is not set
 CONFIG_PPC_PASEMI=y

+ 1 - 0
arch/powerpc/configs/ppc64_defconfig

@@ -50,6 +50,7 @@ CONFIG_HZ_100=y
 CONFIG_BINFMT_MISC=m
 CONFIG_PPC_TRANSACTIONAL_MEM=y
 CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
 CONFIG_IRQ_ALL_CPUS=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_SCHED_SMT=y

+ 0 - 1
arch/powerpc/include/asm/bug.h

@@ -133,7 +133,6 @@ extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
 extern void bad_page_fault(struct pt_regs *, unsigned long, int);
 extern void _exception(int, struct pt_regs *, int, unsigned long);
 extern void die(const char *, struct pt_regs *, long);
-extern void print_backtrace(unsigned long *);
 
 #endif /* !__ASSEMBLY__ */
 

+ 29 - 0
arch/powerpc/include/asm/copro.h

@@ -0,0 +1,29 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_POWERPC_COPRO_H
+#define _ASM_POWERPC_COPRO_H
+
+struct copro_slb
+{
+	u64 esid, vsid;
+};
+
+int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
+			  unsigned long dsisr, unsigned *flt);
+
+int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb);
+
+
+#ifdef CONFIG_PPC_COPRO_BASE
+void copro_flush_all_slbs(struct mm_struct *mm);
+#else
+static inline void copro_flush_all_slbs(struct mm_struct *mm) {}
+#endif
+#endif /* _ASM_POWERPC_COPRO_H */

+ 1 - 0
arch/powerpc/include/asm/dma-mapping.h

@@ -135,6 +135,7 @@ static inline int dma_supported(struct device *dev, u64 mask)
 
 extern int dma_set_mask(struct device *dev, u64 dma_mask);
 extern int __dma_set_mask(struct device *dev, u64 dma_mask);
+extern u64 __dma_get_required_mask(struct device *dev);
 
 #define dma_alloc_coherent(d,s,h,f)	dma_alloc_attrs(d,s,h,f,NULL)
 

+ 24 - 15
arch/powerpc/include/asm/eeh.h

@@ -146,6 +146,11 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
 	return edev ? edev->pdev : NULL;
 }
 
+static inline struct eeh_pe *eeh_dev_to_pe(struct eeh_dev* edev)
+{
+	return edev ? edev->pe : NULL;
+}
+
 /* Return values from eeh_ops::next_error */
 enum {
 	EEH_NEXT_ERR_NONE = 0,
@@ -167,6 +172,7 @@ enum {
 #define EEH_OPT_ENABLE		1	/* EEH enable	*/
 #define EEH_OPT_THAW_MMIO	2	/* MMIO enable	*/
 #define EEH_OPT_THAW_DMA	3	/* DMA enable	*/
+#define EEH_OPT_FREEZE_PE	4	/* Freeze PE	*/
 #define EEH_STATE_UNAVAILABLE	(1 << 0)	/* State unavailable	*/
 #define EEH_STATE_NOT_SUPPORT	(1 << 1)	/* EEH not supported	*/
 #define EEH_STATE_RESET_ACTIVE	(1 << 2)	/* Active reset		*/
@@ -198,6 +204,8 @@ struct eeh_ops {
 	int (*wait_state)(struct eeh_pe *pe, int max_wait);
 	int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len);
 	int (*configure_bridge)(struct eeh_pe *pe);
+	int (*err_inject)(struct eeh_pe *pe, int type, int func,
+			  unsigned long addr, unsigned long mask);
 	int (*read_config)(struct device_node *dn, int where, int size, u32 *val);
 	int (*write_config)(struct device_node *dn, int where, int size, u32 val);
 	int (*next_error)(struct eeh_pe **pe);
@@ -269,8 +277,7 @@ void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
 int eeh_init(void);
 int __init eeh_ops_register(struct eeh_ops *ops);
 int __exit eeh_ops_unregister(const char *name);
-unsigned long eeh_check_failure(const volatile void __iomem *token,
-				unsigned long val);
+int eeh_check_failure(const volatile void __iomem *token);
 int eeh_dev_check_failure(struct eeh_dev *edev);
 void eeh_addr_cache_build(void);
 void eeh_add_device_early(struct device_node *);
@@ -279,6 +286,8 @@ void eeh_add_device_late(struct pci_dev *);
 void eeh_add_device_tree_late(struct pci_bus *);
 void eeh_add_sysfs_files(struct pci_bus *);
 void eeh_remove_device(struct pci_dev *);
+int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state);
+int eeh_pe_reset_and_recover(struct eeh_pe *pe);
 int eeh_dev_open(struct pci_dev *pdev);
 void eeh_dev_release(struct pci_dev *pdev);
 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group);
@@ -321,9 +330,9 @@ static inline void *eeh_dev_init(struct device_node *dn, void *data)
 
 static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { }
 
-static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
+static inline int eeh_check_failure(const volatile void __iomem *token)
 {
-	return val;
+	return 0;
 }
 
 #define eeh_dev_check_failure(x) (0)
@@ -354,7 +363,7 @@ static inline u8 eeh_readb(const volatile void __iomem *addr)
 {
 	u8 val = in_8(addr);
 	if (EEH_POSSIBLE_ERROR(val, u8))
-		return eeh_check_failure(addr, val);
+		eeh_check_failure(addr);
 	return val;
 }
 
@@ -362,7 +371,7 @@ static inline u16 eeh_readw(const volatile void __iomem *addr)
 {
 	u16 val = in_le16(addr);
 	if (EEH_POSSIBLE_ERROR(val, u16))
-		return eeh_check_failure(addr, val);
+		eeh_check_failure(addr);
 	return val;
 }
 
@@ -370,7 +379,7 @@ static inline u32 eeh_readl(const volatile void __iomem *addr)
 {
 	u32 val = in_le32(addr);
 	if (EEH_POSSIBLE_ERROR(val, u32))
-		return eeh_check_failure(addr, val);
+		eeh_check_failure(addr);
 	return val;
 }
 
@@ -378,7 +387,7 @@ static inline u64 eeh_readq(const volatile void __iomem *addr)
 {
 	u64 val = in_le64(addr);
 	if (EEH_POSSIBLE_ERROR(val, u64))
-		return eeh_check_failure(addr, val);
+		eeh_check_failure(addr);
 	return val;
 }
 
@@ -386,7 +395,7 @@ static inline u16 eeh_readw_be(const volatile void __iomem *addr)
 {
 	u16 val = in_be16(addr);
 	if (EEH_POSSIBLE_ERROR(val, u16))
-		return eeh_check_failure(addr, val);
+		eeh_check_failure(addr);
 	return val;
 }
 
@@ -394,7 +403,7 @@ static inline u32 eeh_readl_be(const volatile void __iomem *addr)
 {
 	u32 val = in_be32(addr);
 	if (EEH_POSSIBLE_ERROR(val, u32))
-		return eeh_check_failure(addr, val);
+		eeh_check_failure(addr);
 	return val;
 }
 
@@ -402,7 +411,7 @@ static inline u64 eeh_readq_be(const volatile void __iomem *addr)
 {
 	u64 val = in_be64(addr);
 	if (EEH_POSSIBLE_ERROR(val, u64))
-		return eeh_check_failure(addr, val);
+		eeh_check_failure(addr);
 	return val;
 }
 
@@ -416,7 +425,7 @@ static inline void eeh_memcpy_fromio(void *dest, const
 	 * were copied. Check all four bytes.
 	 */
 	if (n >= 4 && EEH_POSSIBLE_ERROR(*((u32 *)(dest + n - 4)), u32))
-		eeh_check_failure(src, *((u32 *)(dest + n - 4)));
+		eeh_check_failure(src);
 }
 
 /* in-string eeh macros */
@@ -425,7 +434,7 @@ static inline void eeh_readsb(const volatile void __iomem *addr, void * buf,
 {
 	_insb(addr, buf, ns);
 	if (EEH_POSSIBLE_ERROR((*(((u8*)buf)+ns-1)), u8))
-		eeh_check_failure(addr, *(u8*)buf);
+		eeh_check_failure(addr);
 }
 
 static inline void eeh_readsw(const volatile void __iomem *addr, void * buf,
@@ -433,7 +442,7 @@ static inline void eeh_readsw(const volatile void __iomem *addr, void * buf,
 {
 	_insw(addr, buf, ns);
 	if (EEH_POSSIBLE_ERROR((*(((u16*)buf)+ns-1)), u16))
-		eeh_check_failure(addr, *(u16*)buf);
+		eeh_check_failure(addr);
 }
 
 static inline void eeh_readsl(const volatile void __iomem *addr, void * buf,
@@ -441,7 +450,7 @@ static inline void eeh_readsl(const volatile void __iomem *addr, void * buf,
 {
 	_insl(addr, buf, nl);
 	if (EEH_POSSIBLE_ERROR((*(((u32*)buf)+nl-1)), u32))
-		eeh_check_failure(addr, *(u32*)buf);
+		eeh_check_failure(addr);
 }
 
 #endif /* CONFIG_PPC64 */

+ 0 - 1
arch/powerpc/include/asm/hydra.h

@@ -95,7 +95,6 @@ extern volatile struct Hydra __iomem *Hydra;
 #define HYDRA_INT_SPARE		19
 
 extern int hydra_init(void);
-extern void macio_adb_init(void);
 
 #endif /* __KERNEL__ */
 

+ 0 - 5
arch/powerpc/include/asm/irq.h

@@ -31,11 +31,6 @@ extern atomic_t ppc_n_lost_interrupts;
 
 extern irq_hw_number_t virq_to_hw(unsigned int virq);
 
-/**
- * irq_early_init - Init irq remapping subsystem
- */
-extern void irq_early_init(void);
-
 static __inline__ int irq_canonicalize(int irq)
 {
 	return irq;

+ 0 - 1
arch/powerpc/include/asm/kexec.h

@@ -81,7 +81,6 @@ extern void default_machine_crash_shutdown(struct pt_regs *regs);
 extern int crash_shutdown_register(crash_shutdown_t handler);
 extern int crash_shutdown_unregister(crash_shutdown_t handler);
 
-extern void machine_kexec_simple(struct kimage *image);
 extern void crash_kexec_secondary(struct pt_regs *regs);
 extern int overlaps_crashkernel(unsigned long start, unsigned long size);
 extern void reserve_crashkernel(void);

+ 0 - 2
arch/powerpc/include/asm/machdep.h

@@ -328,8 +328,6 @@ extern struct machdep_calls *machine_id;
 
 extern void probe_machine(void);
 
-extern char cmd_line[COMMAND_LINE_SIZE];
-
 #ifdef CONFIG_PPC_PMAC
 /*
  * Power macintoshes have either a CUDA, PMU or SMU controlling

+ 10 - 0
arch/powerpc/include/asm/mmu-hash64.h

@@ -190,6 +190,13 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
 
 #ifndef __ASSEMBLY__
 
+static inline int slb_vsid_shift(int ssize)
+{
+	if (ssize == MMU_SEGSIZE_256M)
+		return SLB_VSID_SHIFT;
+	return SLB_VSID_SHIFT_1T;
+}
+
 static inline int segment_shift(int ssize)
 {
 	if (ssize == MMU_SEGSIZE_256M)
@@ -317,6 +324,7 @@ extern int __hash_page_64K(unsigned long ea, unsigned long access,
 			   unsigned int local, int ssize);
 struct mm_struct;
 unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap);
+extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap);
 extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap);
 int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		     pte_t *ptep, unsigned long trap, int local, int ssize,
@@ -342,6 +350,8 @@ extern void hash_failure_debug(unsigned long ea, unsigned long access,
 extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
 			     unsigned long pstart, unsigned long prot,
 			     int psize, int ssize);
+int htab_remove_mapping(unsigned long vstart, unsigned long vend,
+			int psize, int ssize);
 extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
 extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
 

+ 42 - 3
arch/powerpc/include/asm/opal.h

@@ -135,6 +135,7 @@ struct opal_sg_list {
 #define OPAL_FLASH_MANAGE			77
 #define OPAL_FLASH_UPDATE			78
 #define OPAL_RESYNC_TIMEBASE			79
+#define OPAL_CHECK_TOKEN			80
 #define OPAL_DUMP_INIT				81
 #define OPAL_DUMP_INFO				82
 #define OPAL_DUMP_READ				83
@@ -146,7 +147,9 @@ struct opal_sg_list {
 #define OPAL_GET_PARAM				89
 #define OPAL_SET_PARAM				90
 #define OPAL_DUMP_RESEND			91
+#define OPAL_PCI_SET_PHB_CXL_MODE		93
 #define OPAL_DUMP_INFO2				94
+#define OPAL_PCI_ERR_INJECT			96
 #define OPAL_PCI_EEH_FREEZE_SET			97
 #define OPAL_HANDLE_HMI				98
 #define OPAL_REGISTER_DUMP_REGION		101
@@ -199,6 +202,35 @@ enum OpalPciErrorSeverity {
 	OPAL_EEH_SEV_INF	= 5
 };
 
+enum OpalErrinjectType {
+	OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR	= 0,
+	OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64	= 1,
+};
+
+enum OpalErrinjectFunc {
+	/* IOA bus specific errors */
+	OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR	= 0,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA	= 1,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_IO_ADDR	= 2,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_IO_DATA	= 3,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR	= 4,
+	OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA	= 5,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR	= 6,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA	= 7,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_IO_ADDR	= 8,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_IO_DATA	= 9,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR	= 10,
+	OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA	= 11,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR	= 12,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA	= 13,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER	= 14,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET	= 15,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR	= 16,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA	= 17,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER	= 18,
+	OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET	= 19,
+};
+
 enum OpalShpcAction {
 	OPAL_SHPC_GET_LINK_STATE = 0,
 	OPAL_SHPC_GET_SLOT_STATE = 1
@@ -356,9 +388,12 @@ enum OpalM64EnableAction {
 };
 
 enum OpalPciResetScope {
-	OPAL_PHB_COMPLETE = 1, OPAL_PCI_LINK = 2, OPAL_PHB_ERROR = 3,
-	OPAL_PCI_HOT_RESET = 4, OPAL_PCI_FUNDAMENTAL_RESET = 5,
-	OPAL_PCI_IODA_TABLE_RESET = 6,
+	OPAL_RESET_PHB_COMPLETE		= 1,
+	OPAL_RESET_PCI_LINK		= 2,
+	OPAL_RESET_PHB_ERROR		= 3,
+	OPAL_RESET_PCI_HOT		= 4,
+	OPAL_RESET_PCI_FUNDAMENTAL	= 5,
+	OPAL_RESET_PCI_IODA_TABLE	= 6
 };
 
 enum OpalPciReinitScope {
@@ -819,6 +854,8 @@ int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number,
 				  uint64_t eeh_action_token);
 int64_t opal_pci_eeh_freeze_set(uint64_t phb_id, uint64_t pe_number,
 				uint64_t eeh_action_token);
+int64_t opal_pci_err_inject(uint64_t phb_id, uint32_t pe_no, uint32_t type,
+			    uint32_t func, uint64_t addr, uint64_t mask);
 int64_t opal_pci_shpc(uint64_t phb_id, uint64_t shpc_action, uint8_t *state);
 
 
@@ -887,6 +924,7 @@ int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe,
 			    __be16 *pci_error_type, __be16 *severity);
 int64_t opal_pci_poll(uint64_t phb_id);
 int64_t opal_return_cpu(void);
+int64_t opal_check_token(uint64_t token);
 int64_t opal_reinit_cpus(uint64_t flags);
 
 int64_t opal_xscom_read(uint32_t gcid, uint64_t pcb_addr, __be64 *val);
@@ -924,6 +962,7 @@ int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
 int64_t opal_handle_hmi(void);
 int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
 int64_t opal_unregister_dump_region(uint32_t id);
+int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,

+ 31 - 12
arch/powerpc/include/asm/page_64.h

@@ -42,20 +42,40 @@
 
 typedef unsigned long pte_basic_t;
 
-static __inline__ void clear_page(void *addr)
+static inline void clear_page(void *addr)
 {
-	unsigned long lines, line_size;
-
-	line_size = ppc64_caches.dline_size;
-	lines = ppc64_caches.dlines_per_page;
-
-	__asm__ __volatile__(
+	unsigned long iterations;
+	unsigned long onex, twox, fourx, eightx;
+
+	iterations = ppc64_caches.dlines_per_page / 8;
+
+	/*
+	 * Some verisions of gcc use multiply instructions to
+	 * calculate the offsets so lets give it a hand to
+	 * do better.
+	 */
+	onex = ppc64_caches.dline_size;
+	twox = onex << 1;
+	fourx = onex << 2;
+	eightx = onex << 3;
+
+	asm volatile(
 	"mtctr	%1	# clear_page\n\
-1:      dcbz	0,%0\n\
-	add	%0,%0,%3\n\
+	.balign	16\n\
+1:	dcbz	0,%0\n\
+	dcbz	%3,%0\n\
+	dcbz	%4,%0\n\
+	dcbz	%5,%0\n\
+	dcbz	%6,%0\n\
+	dcbz	%7,%0\n\
+	dcbz	%8,%0\n\
+	dcbz	%9,%0\n\
+	add	%0,%0,%10\n\
 	bdnz+	1b"
-        : "=r" (addr)
-        : "r" (lines), "0" (addr), "r" (line_size)
+	: "=&r" (addr)
+	: "r" (iterations), "0" (addr), "b" (onex), "b" (twox),
+		"b" (twox+onex), "b" (fourx), "b" (fourx+onex),
+		"b" (twox+fourx), "b" (eightx-onex), "r" (eightx)
 	: "ctr", "memory");
 }
 
@@ -104,7 +124,6 @@ extern unsigned long slice_get_unmapped_area(unsigned long addr,
 extern unsigned int get_slice_psize(struct mm_struct *mm,
 				    unsigned long addr);
 
-extern void slice_init_context(struct mm_struct *mm, unsigned int psize);
 extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
 extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
 				  unsigned long len, unsigned int psize);

+ 2 - 4
arch/powerpc/include/asm/pgtable-ppc32.h

@@ -8,8 +8,6 @@
 #include <linux/threads.h>
 #include <asm/io.h>			/* For sub-arch specific PPC_PIN_SIZE */
 
-extern unsigned long va_to_phys(unsigned long address);
-extern pte_t *va_to_pte(unsigned long address);
 extern unsigned long ioremap_bot;
 
 #ifdef CONFIG_44x
@@ -50,10 +48,10 @@ extern int icache_44x_need_flush;
 #define FIRST_USER_ADDRESS	0
 
 #define pte_ERROR(e) \
-	printk("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \
+	pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \
 		(unsigned long long)pte_val(e))
 #define pgd_ERROR(e) \
-	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
 
 /*
  * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary

+ 1 - 1
arch/powerpc/include/asm/pgtable-ppc64-4k.h

@@ -64,7 +64,7 @@
     (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
 
 #define pud_ERROR(e) \
-	printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
+	pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
 
 /*
  * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */

+ 3 - 3
arch/powerpc/include/asm/pgtable-ppc64.h

@@ -328,11 +328,11 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 #define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
 
 #define pte_ERROR(e) \
-	printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+	pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
 #define pmd_ERROR(e) \
-	printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+	pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
 #define pgd_ERROR(e) \
-	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
 
 /* Encode and de-code a swap entry */
 #define __swp_type(entry)	(((entry).val >> 1) & 0x3f)

+ 3 - 0
arch/powerpc/include/asm/pgtable.h

@@ -4,6 +4,7 @@
 
 #ifndef __ASSEMBLY__
 #include <linux/mmdebug.h>
+#include <linux/mmzone.h>
 #include <asm/processor.h>		/* For TASK_SIZE */
 #include <asm/mmu.h>
 #include <asm/page.h>
@@ -248,6 +249,8 @@ extern unsigned long empty_zero_page[];
 
 extern pgd_t swapper_pg_dir[];
 
+void limit_zone_pfn(enum zone_type zone, unsigned long max_pfn);
+int dma_pfn_limit_to_zone(u64 pfn_limit);
 extern void paging_init(void);
 
 /*

+ 6 - 6
arch/powerpc/include/asm/plpar_wrappers.h

@@ -273,7 +273,7 @@ static inline long plpar_set_mode(unsigned long mflags, unsigned long resource,
 static inline long enable_reloc_on_exceptions(void)
 {
 	/* mflags = 3: Exceptions at 0xC000000000004000 */
-	return plpar_set_mode(3, 3, 0, 0);
+	return plpar_set_mode(3, H_SET_MODE_RESOURCE_ADDR_TRANS_MODE, 0, 0);
 }
 
 /*
@@ -284,7 +284,7 @@ static inline long enable_reloc_on_exceptions(void)
  * returns H_SUCCESS.
  */
 static inline long disable_reloc_on_exceptions(void) {
-	return plpar_set_mode(0, 3, 0, 0);
+	return plpar_set_mode(0, H_SET_MODE_RESOURCE_ADDR_TRANS_MODE, 0, 0);
 }
 
 /*
@@ -297,7 +297,7 @@ static inline long disable_reloc_on_exceptions(void) {
 static inline long enable_big_endian_exceptions(void)
 {
 	/* mflags = 0: big endian exceptions */
-	return plpar_set_mode(0, 4, 0, 0);
+	return plpar_set_mode(0, H_SET_MODE_RESOURCE_LE, 0, 0);
 }
 
 /*
@@ -310,17 +310,17 @@ static inline long enable_big_endian_exceptions(void)
 static inline long enable_little_endian_exceptions(void)
 {
 	/* mflags = 1: little endian exceptions */
-	return plpar_set_mode(1, 4, 0, 0);
+	return plpar_set_mode(1, H_SET_MODE_RESOURCE_LE, 0, 0);
 }
 
 static inline long plapr_set_ciabr(unsigned long ciabr)
 {
-	return plpar_set_mode(0, 1, ciabr, 0);
+	return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_CIABR, ciabr, 0);
 }
 
 static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0)
 {
-	return plpar_set_mode(0, 2, dawr0, dawrx0);
+	return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0);
 }
 
 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */

+ 31 - 0
arch/powerpc/include/asm/pnv-pci.h

@@ -0,0 +1,31 @@
+/*
+ * Copyright 2014 IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_PNV_PCI_H
+#define _ASM_PNV_PCI_H
+
+#include <linux/pci.h>
+#include <misc/cxl.h>
+
+int pnv_phb_to_cxl(struct pci_dev *dev);
+int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
+			   unsigned int virq);
+int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num);
+void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num);
+int pnv_cxl_get_irq_count(struct pci_dev *dev);
+struct device_node *pnv_pci_to_phb_node(struct pci_dev *dev);
+
+#ifdef CONFIG_CXL_BASE
+int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs,
+			       struct pci_dev *dev, int num);
+void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs,
+				  struct pci_dev *dev);
+#endif
+
+#endif

+ 0 - 2
arch/powerpc/include/asm/prom.h

@@ -76,8 +76,6 @@ void of_parse_dma_window(struct device_node *dn, const __be32 *dma_window,
 			 unsigned long *busno, unsigned long *phys,
 			 unsigned long *size);
 
-extern void kdump_move_device_tree(void);
-
 extern void of_instantiate_rtc(void);
 
 extern int of_get_ibm_chip_id(struct device_node *np);

+ 2 - 1
arch/powerpc/include/asm/reg.h

@@ -947,7 +947,7 @@
  * 32-bit 8xx:
  *	- SPRG0 scratch for exception vectors
  *	- SPRG1 scratch for exception vectors
- *	- SPRG2 apparently unused but initialized
+ *	- SPRG2 scratch for exception vectors
  *
  */
 #ifdef CONFIG_PPC64
@@ -1057,6 +1057,7 @@
 #ifdef CONFIG_8xx
 #define SPRN_SPRG_SCRATCH0	SPRN_SPRG0
 #define SPRN_SPRG_SCRATCH1	SPRN_SPRG1
+#define SPRN_SPRG_SCRATCH2	SPRN_SPRG2
 #endif
 
 

+ 0 - 1
arch/powerpc/include/asm/rio.h

@@ -13,7 +13,6 @@
 #ifndef ASM_PPC_RIO_H
 #define ASM_PPC_RIO_H
 
-extern void platform_rio_init(void);
 #ifdef CONFIG_FSL_RIO
 extern int fsl_rio_mcheck_exception(struct pt_regs *);
 #else

+ 2 - 3
arch/powerpc/include/asm/spu.h

@@ -27,6 +27,8 @@
 #include <linux/workqueue.h>
 #include <linux/device.h>
 #include <linux/mutex.h>
+#include <asm/reg.h>
+#include <asm/copro.h>
 
 #define LS_SIZE (256 * 1024)
 #define LS_ADDR_MASK (LS_SIZE - 1)
@@ -277,9 +279,6 @@ void spu_remove_dev_attr(struct device_attribute *attr);
 int spu_add_dev_attr_group(struct attribute_group *attrs);
 void spu_remove_dev_attr_group(struct attribute_group *attrs);
 
-int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
-		unsigned long dsisr, unsigned *flt);
-
 /*
  * Notifier blocks:
  *

+ 62 - 0
arch/powerpc/include/asm/sstep.h

@@ -25,3 +25,65 @@ struct pt_regs;
 
 /* Emulate instructions that cause a transfer of control. */
 extern int emulate_step(struct pt_regs *regs, unsigned int instr);
+
+enum instruction_type {
+	COMPUTE,		/* arith/logical/CR op, etc. */
+	LOAD,
+	LOAD_MULTI,
+	LOAD_FP,
+	LOAD_VMX,
+	LOAD_VSX,
+	STORE,
+	STORE_MULTI,
+	STORE_FP,
+	STORE_VMX,
+	STORE_VSX,
+	LARX,
+	STCX,
+	BRANCH,
+	MFSPR,
+	MTSPR,
+	CACHEOP,
+	BARRIER,
+	SYSCALL,
+	MFMSR,
+	MTMSR,
+	RFI,
+	INTERRUPT,
+	UNKNOWN
+};
+
+#define INSTR_TYPE_MASK	0x1f
+
+/* Load/store flags, ORed in with type */
+#define SIGNEXT		0x20
+#define UPDATE		0x40	/* matches bit in opcode 31 instructions */
+#define BYTEREV		0x80
+
+/* Cacheop values, ORed in with type */
+#define CACHEOP_MASK	0x700
+#define DCBST		0
+#define DCBF		0x100
+#define DCBTST		0x200
+#define DCBT		0x300
+#define ICBI		0x400
+
+/* Size field in type word */
+#define SIZE(n)		((n) << 8)
+#define GETSIZE(w)	((w) >> 8)
+
+#define MKOP(t, f, s)	((t) | (f) | SIZE(s))
+
+struct instruction_op {
+	int type;
+	int reg;
+	unsigned long val;
+	/* For LOAD/STORE/LARX/STCX */
+	unsigned long ea;
+	int update_reg;
+	/* For MFSPR */
+	int spr;
+};
+
+extern int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
+			 unsigned int instr);

+ 0 - 4
arch/powerpc/include/asm/tsi108.h

@@ -84,10 +84,6 @@
 extern u32 tsi108_pci_cfg_base;
 /* Exported functions */
 
-extern int tsi108_bridge_init(struct pci_controller *hose, uint phys_csr_base);
-extern unsigned long tsi108_get_mem_size(void);
-extern unsigned long tsi108_get_cpu_clk(void);
-extern unsigned long tsi108_get_sdc_clk(void);
 extern int tsi108_direct_write_config(struct pci_bus *bus, unsigned int devfn,
 				      int offset, int len, u32 val);
 extern int tsi108_direct_read_config(struct pci_bus *bus, unsigned int devfn,

+ 0 - 1
arch/powerpc/include/asm/udbg.h

@@ -52,7 +52,6 @@ extern void __init udbg_init_44x_as1(void);
 extern void __init udbg_init_40x_realmode(void);
 extern void __init udbg_init_cpm(void);
 extern void __init udbg_init_usbgecko(void);
-extern void __init udbg_init_wsp(void);
 extern void __init udbg_init_memcons(void);
 extern void __init udbg_init_ehv_bc(void);
 extern void __init udbg_init_ps3gelic(void);

+ 98 - 14
arch/powerpc/include/asm/word-at-a-time.h

@@ -42,32 +42,65 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct
 
 #else
 
+#ifdef CONFIG_64BIT
+
+/* unused */
 struct word_at_a_time {
-	const unsigned long one_bits, high_bits;
 };
 
-#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+#define WORD_AT_A_TIME_CONSTANTS { }
 
-#ifdef CONFIG_64BIT
+/* This will give us 0xff for a NULL char and 0x00 elsewhere */
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
+{
+	unsigned long ret;
+	unsigned long zero = 0;
 
-/* Alan Modra's little-endian strlen tail for 64-bit */
-#define create_zero_mask(mask) (mask)
+	asm("cmpb %0,%1,%2" : "=r" (ret) : "r" (a), "r" (zero));
+	*bits = ret;
 
-static inline unsigned long find_zero(unsigned long mask)
+	return ret;
+}
+
+static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
+{
+	return bits;
+}
+
+/* Alan Modra's little-endian strlen tail for 64-bit */
+static inline unsigned long create_zero_mask(unsigned long bits)
 {
 	unsigned long leading_zero_bits;
 	long trailing_zero_bit_mask;
 
-	asm ("addi %1,%2,-1\n\t"
-	     "andc %1,%1,%2\n\t"
-	     "popcntd %0,%1"
-	     : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
-	     : "r" (mask));
-	return leading_zero_bits >> 3;
+	asm("addi	%1,%2,-1\n\t"
+	    "andc	%1,%1,%2\n\t"
+	    "popcntd	%0,%1"
+		: "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
+		: "r" (bits));
+
+	return leading_zero_bits;
+}
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+	return mask >> 3;
+}
+
+/* This assumes that we never ask for an all 1s bitmask */
+static inline unsigned long zero_bytemask(unsigned long mask)
+{
+	return (1UL << mask) - 1;
 }
 
 #else	/* 32-bit case */
 
+struct word_at_a_time {
+	const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
 /*
  * This is largely generic for little-endian machines, but the
  * optimal byte mask counting is probably going to be something
@@ -96,8 +129,6 @@ static inline unsigned long find_zero(unsigned long mask)
 	return count_masked_bytes(mask);
 }
 
-#endif
-
 /* Return nonzero if it has a zero */
 static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
 {
@@ -114,6 +145,59 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits,
 /* The mask we created is directly usable as a bytemask */
 #define zero_bytemask(mask) (mask)
 
+#endif /* CONFIG_64BIT */
+
+#endif /* __BIG_ENDIAN__ */
+
+/*
+ * We use load_unaligned_zero() in a selftest, which builds a userspace
+ * program. Some linker scripts seem to discard the .fixup section, so allow
+ * the test code to use a different section name.
+ */
+#ifndef FIXUP_SECTION
+#define FIXUP_SECTION ".fixup"
+#endif
+
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+	unsigned long ret, offset, tmp;
+
+	asm(
+	"1:	" PPC_LL "%[ret], 0(%[addr])\n"
+	"2:\n"
+	".section " FIXUP_SECTION ",\"ax\"\n"
+	"3:	"
+#ifdef __powerpc64__
+	"clrrdi		%[tmp], %[addr], 3\n\t"
+	"clrlsldi	%[offset], %[addr], 61, 3\n\t"
+	"ld		%[ret], 0(%[tmp])\n\t"
+#ifdef __BIG_ENDIAN__
+	"sld		%[ret], %[ret], %[offset]\n\t"
+#else
+	"srd		%[ret], %[ret], %[offset]\n\t"
 #endif
+#else
+	"clrrwi		%[tmp], %[addr], 2\n\t"
+	"clrlslwi	%[offset], %[addr], 30, 3\n\t"
+	"lwz		%[ret], 0(%[tmp])\n\t"
+#ifdef __BIG_ENDIAN__
+	"slw		%[ret], %[ret], %[offset]\n\t"
+#else
+	"srw		%[ret], %[ret], %[offset]\n\t"
+#endif
+#endif
+	"b	2b\n"
+	".previous\n"
+	".section __ex_table,\"a\"\n\t"
+		PPC_LONG_ALIGN "\n\t"
+		PPC_LONG "1b,3b\n"
+	".previous"
+	: [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret)
+	: [addr] "b" (addr), "m" (*(unsigned long *)addr));
+
+	return ret;
+}
+
+#undef FIXUP_SECTION
 
 #endif /* _ASM_WORD_AT_A_TIME_H */

+ 1 - 0
arch/powerpc/include/asm/xics.h

@@ -29,6 +29,7 @@
 /* Native ICP */
 #ifdef CONFIG_PPC_ICP_NATIVE
 extern int icp_native_init(void);
+extern void icp_native_flush_interrupt(void);
 #else
 static inline int icp_native_init(void) { return -ENODEV; }
 #endif

+ 3 - 0
arch/powerpc/kernel/Makefile

@@ -93,6 +93,9 @@ obj-$(CONFIG_PPC32)		+= entry_32.o setup_32.o
 obj-$(CONFIG_PPC64)		+= dma-iommu.o iommu.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_MODULES)		+= ppc_ksyms.o
+ifeq ($(CONFIG_PPC32),y)
+obj-$(CONFIG_MODULES)		+= ppc_ksyms_32.o
+endif
 obj-$(CONFIG_BOOTX_TEXT)	+= btext.o
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o

+ 1 - 0
arch/powerpc/kernel/crash_dump.c

@@ -13,6 +13,7 @@
 
 #include <linux/crash_dump.h>
 #include <linux/bootmem.h>
+#include <linux/io.h>
 #include <linux/memblock.h>
 #include <asm/code-patching.h>
 #include <asm/kdump.h>

+ 6 - 2
arch/powerpc/kernel/dma-swiotlb.c

@@ -106,10 +106,14 @@ int __init swiotlb_setup_bus_notifier(void)
 	return 0;
 }
 
-void swiotlb_detect_4g(void)
+void __init swiotlb_detect_4g(void)
 {
-	if ((memblock_end_of_DRAM() - 1) > 0xffffffff)
+	if ((memblock_end_of_DRAM() - 1) > 0xffffffff) {
 		ppc_swiotlb_enable = 1;
+#ifdef CONFIG_ZONE_DMA32
+		limit_zone_pfn(ZONE_DMA32, (1ULL << 32) >> PAGE_SHIFT);
+#endif
+	}
 }
 
 static int __init swiotlb_late_init(void)

+ 43 - 4
arch/powerpc/kernel/dma.c

@@ -15,6 +15,7 @@
 #include <asm/vio.h>
 #include <asm/bug.h>
 #include <asm/machdep.h>
+#include <asm/swiotlb.h>
 
 /*
  * Generic direct DMA implementation
@@ -25,6 +26,18 @@
  * default the offset is PCI_DRAM_OFFSET.
  */
 
+static u64 __maybe_unused get_pfn_limit(struct device *dev)
+{
+	u64 pfn = (dev->coherent_dma_mask >> PAGE_SHIFT) + 1;
+	struct dev_archdata __maybe_unused *sd = &dev->archdata;
+
+#ifdef CONFIG_SWIOTLB
+	if (sd->max_direct_dma_addr && sd->dma_ops == &swiotlb_dma_ops)
+		pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT);
+#endif
+
+	return pfn;
+}
 
 void *dma_direct_alloc_coherent(struct device *dev, size_t size,
 				dma_addr_t *dma_handle, gfp_t flag,
@@ -40,6 +53,26 @@ void *dma_direct_alloc_coherent(struct device *dev, size_t size,
 #else
 	struct page *page;
 	int node = dev_to_node(dev);
+	u64 pfn = get_pfn_limit(dev);
+	int zone;
+
+	zone = dma_pfn_limit_to_zone(pfn);
+	if (zone < 0) {
+		dev_err(dev, "%s: No suitable zone for pfn %#llx\n",
+			__func__, pfn);
+		return NULL;
+	}
+
+	switch (zone) {
+	case ZONE_DMA:
+		flag |= GFP_DMA;
+		break;
+#ifdef CONFIG_ZONE_DMA32
+	case ZONE_DMA32:
+		flag |= GFP_DMA32;
+		break;
+#endif
+	};
 
 	/* ignore region specifiers */
 	flag  &= ~(__GFP_HIGHMEM);
@@ -202,6 +235,7 @@ int __dma_set_mask(struct device *dev, u64 dma_mask)
 	*dev->dma_mask = dma_mask;
 	return 0;
 }
+
 int dma_set_mask(struct device *dev, u64 dma_mask)
 {
 	if (ppc_md.dma_set_mask)
@@ -210,13 +244,10 @@ int dma_set_mask(struct device *dev, u64 dma_mask)
 }
 EXPORT_SYMBOL(dma_set_mask);
 
-u64 dma_get_required_mask(struct device *dev)
+u64 __dma_get_required_mask(struct device *dev)
 {
 	struct dma_map_ops *dma_ops = get_dma_ops(dev);
 
-	if (ppc_md.dma_get_required_mask)
-		return ppc_md.dma_get_required_mask(dev);
-
 	if (unlikely(dma_ops == NULL))
 		return 0;
 
@@ -225,6 +256,14 @@ u64 dma_get_required_mask(struct device *dev)
 
 	return DMA_BIT_MASK(8 * sizeof(dma_addr_t));
 }
+
+u64 dma_get_required_mask(struct device *dev)
+{
+	if (ppc_md.dma_get_required_mask)
+		return ppc_md.dma_get_required_mask(dev);
+
+	return __dma_get_required_mask(dev);
+}
 EXPORT_SYMBOL_GPL(dma_get_required_mask);
 
 static int __init dma_init(void)

+ 210 - 59
arch/powerpc/kernel/eeh.c

@@ -117,7 +117,7 @@ static DEFINE_MUTEX(eeh_dev_mutex);
  * not dynamically alloced, so that it ends up in RMO where RTAS
  * can access it.
  */
-#define EEH_PCI_REGS_LOG_LEN 4096
+#define EEH_PCI_REGS_LOG_LEN 8192
 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
 
 /*
@@ -148,16 +148,12 @@ static int __init eeh_setup(char *str)
 }
 __setup("eeh=", eeh_setup);
 
-/**
- * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
- * @edev: device to report data for
- * @buf: point to buffer in which to log
- * @len: amount of room in buffer
- *
- * This routine captures assorted PCI configuration space data,
- * and puts them into a buffer for RTAS error logging.
+/*
+ * This routine captures assorted PCI configuration space data
+ * for the indicated PCI device, and puts them into a buffer
+ * for RTAS error logging.
  */
-static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len)
+static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 {
 	struct device_node *dn = eeh_dev_to_of_node(edev);
 	u32 cfg;
@@ -255,6 +251,19 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len)
 	return n;
 }
 
+static void *eeh_dump_pe_log(void *data, void *flag)
+{
+	struct eeh_pe *pe = data;
+	struct eeh_dev *edev, *tmp;
+	size_t *plen = flag;
+
+	eeh_pe_for_each_dev(pe, edev, tmp)
+		*plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen,
+					  EEH_PCI_REGS_LOG_LEN - *plen);
+
+	return NULL;
+}
+
 /**
  * eeh_slot_error_detail - Generate combined log including driver log and error log
  * @pe: EEH PE
@@ -268,7 +277,6 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len)
 void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 {
 	size_t loglen = 0;
-	struct eeh_dev *edev, *tmp;
 
 	/*
 	 * When the PHB is fenced or dead, it's pointless to collect
@@ -286,10 +294,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 		eeh_pe_restore_bars(pe);
 
 		pci_regs_buf[0] = 0;
-		eeh_pe_for_each_dev(pe, edev, tmp) {
-			loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen,
-						      EEH_PCI_REGS_LOG_LEN - loglen);
-		}
+		eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen);
 	}
 
 	eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
@@ -410,7 +415,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
 	}
 	dn = eeh_dev_to_of_node(edev);
 	dev = eeh_dev_to_pci_dev(edev);
-	pe = edev->pe;
+	pe = eeh_dev_to_pe(edev);
 
 	/* Access to IO BARs might get this far and still not want checking. */
 	if (!pe) {
@@ -542,17 +547,16 @@ EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
 
 /**
  * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
- * @token: I/O token, should be address in the form 0xA....
- * @val: value, should be all 1's (XXX why do we need this arg??)
+ * @token: I/O address
  *
- * Check for an EEH failure at the given token address.  Call this
+ * Check for an EEH failure at the given I/O address. Call this
  * routine if the result of a read was all 0xff's and you want to
- * find out if this is due to an EEH slot freeze event.  This routine
+ * find out if this is due to an EEH slot freeze event. This routine
  * will query firmware for the EEH status.
  *
  * Note this routine is safe to call in an interrupt context.
  */
-unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
+int eeh_check_failure(const volatile void __iomem *token)
 {
 	unsigned long addr;
 	struct eeh_dev *edev;
@@ -562,13 +566,11 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
 	edev = eeh_addr_cache_get_dev(addr);
 	if (!edev) {
 		eeh_stats.no_device++;
-		return val;
+		return 0;
 	}
 
-	eeh_dev_check_failure(edev);
-	return val;
+	return eeh_dev_check_failure(edev);
 }
-
 EXPORT_SYMBOL(eeh_check_failure);
 
 
@@ -582,25 +584,51 @@ EXPORT_SYMBOL(eeh_check_failure);
  */
 int eeh_pci_enable(struct eeh_pe *pe, int function)
 {
-	int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+	int active_flag, rc;
 
 	/*
 	 * pHyp doesn't allow to enable IO or DMA on unfrozen PE.
 	 * Also, it's pointless to enable them on unfrozen PE. So
-	 * we have the check here.
+	 * we have to check before enabling IO or DMA.
 	 */
-	if (function == EEH_OPT_THAW_MMIO ||
-	    function == EEH_OPT_THAW_DMA) {
+	switch (function) {
+	case EEH_OPT_THAW_MMIO:
+		active_flag = EEH_STATE_MMIO_ACTIVE;
+		break;
+	case EEH_OPT_THAW_DMA:
+		active_flag = EEH_STATE_DMA_ACTIVE;
+		break;
+	case EEH_OPT_DISABLE:
+	case EEH_OPT_ENABLE:
+	case EEH_OPT_FREEZE_PE:
+		active_flag = 0;
+		break;
+	default:
+		pr_warn("%s: Invalid function %d\n",
+			__func__, function);
+		return -EINVAL;
+	}
+
+	/*
+	 * Check if IO or DMA has been enabled before
+	 * enabling them.
+	 */
+	if (active_flag) {
 		rc = eeh_ops->get_state(pe, NULL);
 		if (rc < 0)
 			return rc;
 
-		/* Needn't to enable or already enabled */
-		if ((rc == EEH_STATE_NOT_SUPPORT) ||
-		    ((rc & flags) == flags))
+		/* Needn't enable it at all */
+		if (rc == EEH_STATE_NOT_SUPPORT)
+			return 0;
+
+		/* It's already enabled */
+		if (rc & active_flag)
 			return 0;
 	}
 
+
+	/* Issue the request */
 	rc = eeh_ops->set_option(pe, function);
 	if (rc)
 		pr_warn("%s: Unexpected state change %d on "
@@ -608,17 +636,17 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
 			__func__, function, pe->phb->global_number,
 			pe->addr, rc);
 
-	rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
-	if (rc <= 0)
-		return rc;
+	/* Check if the request is finished successfully */
+	if (active_flag) {
+		rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
+		if (rc <= 0)
+			return rc;
 
-	if ((function == EEH_OPT_THAW_MMIO) &&
-	    (rc & EEH_STATE_MMIO_ENABLED))
-		return 0;
+		if (rc & active_flag)
+			return 0;
 
-	if ((function == EEH_OPT_THAW_DMA) &&
-	    (rc & EEH_STATE_DMA_ENABLED))
-		return 0;
+		return -EIO;
+	}
 
 	return rc;
 }
@@ -634,7 +662,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
 {
 	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
-	struct eeh_pe *pe = edev->pe;
+	struct eeh_pe *pe = eeh_dev_to_pe(edev);
 
 	if (!pe) {
 		pr_err("%s: No PE found on PCI device %s\n",
@@ -645,14 +673,18 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
 	switch (state) {
 	case pcie_deassert_reset:
 		eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
+		eeh_pe_state_clear(pe, EEH_PE_RESET);
 		break;
 	case pcie_hot_reset:
+		eeh_pe_state_mark(pe, EEH_PE_RESET);
 		eeh_ops->reset(pe, EEH_RESET_HOT);
 		break;
 	case pcie_warm_reset:
+		eeh_pe_state_mark(pe, EEH_PE_RESET);
 		eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
 		break;
 	default:
+		eeh_pe_state_clear(pe, EEH_PE_RESET);
 		return -EINVAL;
 	};
 
@@ -1141,6 +1173,85 @@ void eeh_remove_device(struct pci_dev *dev)
 	edev->mode &= ~EEH_DEV_SYSFS;
 }
 
+int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state)
+{
+	int ret;
+
+	ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+	if (ret) {
+		pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n",
+			__func__, ret, pe->phb->global_number, pe->addr);
+		return ret;
+	}
+
+	ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+	if (ret) {
+		pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n",
+			__func__, ret, pe->phb->global_number, pe->addr);
+		return ret;
+	}
+
+	/* Clear software isolated state */
+	if (sw_state && (pe->state & EEH_PE_ISOLATED))
+		eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+
+	return ret;
+}
+
+
+static struct pci_device_id eeh_reset_ids[] = {
+	{ PCI_DEVICE(0x19a2, 0x0710) },	/* Emulex, BE     */
+	{ PCI_DEVICE(0x10df, 0xe220) },	/* Emulex, Lancer */
+	{ 0 }
+};
+
+static int eeh_pe_change_owner(struct eeh_pe *pe)
+{
+	struct eeh_dev *edev, *tmp;
+	struct pci_dev *pdev;
+	struct pci_device_id *id;
+	int flags, ret;
+
+	/* Check PE state */
+	flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+	ret = eeh_ops->get_state(pe, NULL);
+	if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT)
+		return 0;
+
+	/* Unfrozen PE, nothing to do */
+	if ((ret & flags) == flags)
+		return 0;
+
+	/* Frozen PE, check if it needs PE level reset */
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		pdev = eeh_dev_to_pci_dev(edev);
+		if (!pdev)
+			continue;
+
+		for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) {
+			if (id->vendor != PCI_ANY_ID &&
+			    id->vendor != pdev->vendor)
+				continue;
+			if (id->device != PCI_ANY_ID &&
+			    id->device != pdev->device)
+				continue;
+			if (id->subvendor != PCI_ANY_ID &&
+			    id->subvendor != pdev->subsystem_vendor)
+				continue;
+			if (id->subdevice != PCI_ANY_ID &&
+			    id->subdevice != pdev->subsystem_device)
+				continue;
+
+			goto reset;
+		}
+	}
+
+	return eeh_unfreeze_pe(pe, true);
+
+reset:
+	return eeh_pe_reset_and_recover(pe);
+}
+
 /**
  * eeh_dev_open - Increase count of pass through devices for PE
  * @pdev: PCI device
@@ -1153,6 +1264,7 @@ void eeh_remove_device(struct pci_dev *dev)
 int eeh_dev_open(struct pci_dev *pdev)
 {
 	struct eeh_dev *edev;
+	int ret = -ENODEV;
 
 	mutex_lock(&eeh_dev_mutex);
 
@@ -1165,6 +1277,16 @@ int eeh_dev_open(struct pci_dev *pdev)
 	if (!edev || !edev->pe)
 		goto out;
 
+	/*
+	 * The PE might have been put into frozen state, but we
+	 * didn't detect that yet. The passed through PCI devices
+	 * in frozen PE won't work properly. Clear the frozen state
+	 * in advance.
+	 */
+	ret = eeh_pe_change_owner(edev->pe);
+	if (ret)
+		goto out;
+
 	/* Increase PE's pass through count */
 	atomic_inc(&edev->pe->pass_dev_cnt);
 	mutex_unlock(&eeh_dev_mutex);
@@ -1172,7 +1294,7 @@ int eeh_dev_open(struct pci_dev *pdev)
 	return 0;
 out:
 	mutex_unlock(&eeh_dev_mutex);
-	return -ENODEV;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(eeh_dev_open);
 
@@ -1202,6 +1324,7 @@ void eeh_dev_release(struct pci_dev *pdev)
 	/* Decrease PE's pass through count */
 	atomic_dec(&edev->pe->pass_dev_cnt);
 	WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0);
+	eeh_pe_change_owner(edev->pe);
 out:
 	mutex_unlock(&eeh_dev_mutex);
 }
@@ -1281,8 +1404,10 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option)
 	 */
 	switch (option) {
 	case EEH_OPT_ENABLE:
-		if (eeh_enabled())
+		if (eeh_enabled()) {
+			ret = eeh_pe_change_owner(pe);
 			break;
+		}
 		ret = -EIO;
 		break;
 	case EEH_OPT_DISABLE:
@@ -1294,7 +1419,7 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option)
 			break;
 		}
 
-		ret = eeh_ops->set_option(pe, option);
+		ret = eeh_pci_enable(pe, option);
 		break;
 	default:
 		pr_debug("%s: Option %d out of range (%d, %d)\n",
@@ -1345,6 +1470,36 @@ int eeh_pe_get_state(struct eeh_pe *pe)
 }
 EXPORT_SYMBOL_GPL(eeh_pe_get_state);
 
+static int eeh_pe_reenable_devices(struct eeh_pe *pe)
+{
+	struct eeh_dev *edev, *tmp;
+	struct pci_dev *pdev;
+	int ret = 0;
+
+	/* Restore config space */
+	eeh_pe_restore_bars(pe);
+
+	/*
+	 * Reenable PCI devices as the devices passed
+	 * through are always enabled before the reset.
+	 */
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		pdev = eeh_dev_to_pci_dev(edev);
+		if (!pdev)
+			continue;
+
+		ret = pci_reenable_device(pdev);
+		if (ret) {
+			pr_warn("%s: Failure %d reenabling %s\n",
+				__func__, ret, pci_name(pdev));
+			return ret;
+		}
+	}
+
+	/* The PE is still in frozen state */
+	return eeh_unfreeze_pe(pe, true);
+}
+
 /**
  * eeh_pe_reset - Issue PE reset according to specified type
  * @pe: EEH PE
@@ -1368,23 +1523,22 @@ int eeh_pe_reset(struct eeh_pe *pe, int option)
 	switch (option) {
 	case EEH_RESET_DEACTIVATE:
 		ret = eeh_ops->reset(pe, option);
+		eeh_pe_state_clear(pe, EEH_PE_RESET);
 		if (ret)
 			break;
 
-		/*
-		 * The PE is still in frozen state and we need to clear
-		 * that. It's good to clear frozen state after deassert
-		 * to avoid messy IO access during reset, which might
-		 * cause recursive frozen PE.
-		 */
-		ret = eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO);
-		if (!ret)
-			ret = eeh_ops->set_option(pe, EEH_OPT_THAW_DMA);
-		if (!ret)
-			eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+		ret = eeh_pe_reenable_devices(pe);
 		break;
 	case EEH_RESET_HOT:
 	case EEH_RESET_FUNDAMENTAL:
+		/*
+		 * Proactively freeze the PE to drop all MMIO access
+		 * during reset, which should be banned as it's always
+		 * cause recursive EEH error.
+		 */
+		eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
+
+		eeh_pe_state_mark(pe, EEH_PE_RESET);
 		ret = eeh_ops->reset(pe, option);
 		break;
 	default:
@@ -1413,9 +1567,6 @@ int eeh_pe_configure(struct eeh_pe *pe)
 	if (!pe)
 		return -ENODEV;
 
-	/* Restore config space for the affected devices */
-	eeh_pe_restore_bars(pe);
-
 	return ret;
 }
 EXPORT_SYMBOL_GPL(eeh_pe_configure);

+ 91 - 15
arch/powerpc/kernel/eeh_driver.c

@@ -180,6 +180,22 @@ static bool eeh_dev_removed(struct eeh_dev *edev)
 	return false;
 }
 
+static void *eeh_dev_save_state(void *data, void *userdata)
+{
+	struct eeh_dev *edev = data;
+	struct pci_dev *pdev;
+
+	if (!edev)
+		return NULL;
+
+	pdev = eeh_dev_to_pci_dev(edev);
+	if (!pdev)
+		return NULL;
+
+	pci_save_state(pdev);
+	return NULL;
+}
+
 /**
  * eeh_report_error - Report pci error to each device driver
  * @data: eeh device
@@ -303,6 +319,22 @@ static void *eeh_report_reset(void *data, void *userdata)
 	return NULL;
 }
 
+static void *eeh_dev_restore_state(void *data, void *userdata)
+{
+	struct eeh_dev *edev = data;
+	struct pci_dev *pdev;
+
+	if (!edev)
+		return NULL;
+
+	pdev = eeh_dev_to_pci_dev(edev);
+	if (!pdev)
+		return NULL;
+
+	pci_restore_state(pdev);
+	return NULL;
+}
+
 /**
  * eeh_report_resume - Tell device to resume normal operations
  * @data: eeh device
@@ -450,38 +482,82 @@ static void *eeh_pe_detach_dev(void *data, void *userdata)
 static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
 {
 	struct eeh_pe *pe = (struct eeh_pe *)data;
-	int i, rc;
+	bool *clear_sw_state = flag;
+	int i, rc = 1;
 
-	for (i = 0; i < 3; i++) {
-		rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
-		if (rc)
-			continue;
-		rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
-		if (!rc)
-			break;
-	}
+	for (i = 0; rc && i < 3; i++)
+		rc = eeh_unfreeze_pe(pe, clear_sw_state);
 
-	/* The PE has been isolated, clear it */
+	/* Stop immediately on any errors */
 	if (rc) {
-		pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n",
-			__func__, pe->phb->global_number, pe->addr, rc);
+		pr_warn("%s: Failure %d unfreezing PHB#%x-PE#%x\n",
+			__func__, rc, pe->phb->global_number, pe->addr);
 		return (void *)pe;
 	}
 
 	return NULL;
 }
 
-static int eeh_clear_pe_frozen_state(struct eeh_pe *pe)
+static int eeh_clear_pe_frozen_state(struct eeh_pe *pe,
+				     bool clear_sw_state)
 {
 	void *rc;
 
-	rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL);
+	rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state);
 	if (!rc)
 		eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
 
 	return rc ? -EIO : 0;
 }
 
+int eeh_pe_reset_and_recover(struct eeh_pe *pe)
+{
+	int result, ret;
+
+	/* Bail if the PE is being recovered */
+	if (pe->state & EEH_PE_RECOVERING)
+		return 0;
+
+	/* Put the PE into recovery mode */
+	eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+
+	/* Save states */
+	eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
+
+	/* Report error */
+	eeh_pe_dev_traverse(pe, eeh_report_error, &result);
+
+	/* Issue reset */
+	eeh_pe_state_mark(pe, EEH_PE_RESET);
+	ret = eeh_reset_pe(pe);
+	if (ret) {
+		eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_RESET);
+		return ret;
+	}
+	eeh_pe_state_clear(pe, EEH_PE_RESET);
+
+	/* Unfreeze the PE */
+	ret = eeh_clear_pe_frozen_state(pe, true);
+	if (ret) {
+		eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+		return ret;
+	}
+
+	/* Notify completion of reset */
+	eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
+
+	/* Restore device state */
+	eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL);
+
+	/* Resume */
+	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
+
+	/* Clear recovery mode */
+	eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
+
+	return 0;
+}
+
 /**
  * eeh_reset_device - Perform actual reset of a pci slot
  * @pe: EEH PE
@@ -540,7 +616,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 	eeh_pe_state_clear(pe, EEH_PE_RESET);
 
 	/* Clear frozen state */
-	rc = eeh_clear_pe_frozen_state(pe);
+	rc = eeh_clear_pe_frozen_state(pe, false);
 	if (rc)
 		return rc;
 

+ 19 - 4
arch/powerpc/kernel/eeh_pe.c

@@ -428,7 +428,7 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
 	}
 
 	/* Remove the EEH device */
-	pe = edev->pe;
+	pe = eeh_dev_to_pe(edev);
 	edev->pe = NULL;
 	list_del(&edev->list);
 
@@ -584,6 +584,8 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
 {
 	struct eeh_pe *pe = (struct eeh_pe *)data;
 	int state = *((int *)flag);
+	struct eeh_dev *edev, *tmp;
+	struct pci_dev *pdev;
 
 	/* Keep the state of permanently removed PE intact */
 	if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
@@ -592,9 +594,22 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
 
 	pe->state &= ~state;
 
-	/* Clear check count since last isolation */
-	if (state & EEH_PE_ISOLATED)
-		pe->check_count = 0;
+	/*
+	 * Special treatment on clearing isolated state. Clear
+	 * check count since last isolation and put all affected
+	 * devices to normal state.
+	 */
+	if (!(state & EEH_PE_ISOLATED))
+		return NULL;
+
+	pe->check_count = 0;
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		pdev = eeh_dev_to_pci_dev(edev);
+		if (!pdev)
+			continue;
+
+		pdev->error_state = pci_channel_io_normal;
+	}
 
 	return NULL;
 }

+ 40 - 1
arch/powerpc/kernel/eeh_sysfs.c

@@ -54,6 +54,43 @@ EEH_SHOW_ATTR(eeh_mode,            mode,            "0x%x");
 EEH_SHOW_ATTR(eeh_config_addr,     config_addr,     "0x%x");
 EEH_SHOW_ATTR(eeh_pe_config_addr,  pe_config_addr,  "0x%x");
 
+static ssize_t eeh_pe_state_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+	int state;
+
+	if (!edev || !edev->pe)
+		return -ENODEV;
+
+	state = eeh_ops->get_state(edev->pe, NULL);
+	return sprintf(buf, "%0x08x %0x08x\n",
+		       state, edev->pe->state);
+}
+
+static ssize_t eeh_pe_state_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+
+	if (!edev || !edev->pe)
+		return -ENODEV;
+
+	/* Nothing to do if it's not frozen */
+	if (!(edev->pe->state & EEH_PE_ISOLATED))
+		return count;
+
+	if (eeh_unfreeze_pe(edev->pe, true))
+		return -EIO;
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(eeh_pe_state);
+
 void eeh_sysfs_add_device(struct pci_dev *pdev)
 {
 	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
@@ -68,9 +105,10 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)
 	rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
 	rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
 	rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
+	rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state);
 
 	if (rc)
-		printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
+		pr_warn("EEH: Unable to create sysfs entries\n");
 	else if (edev)
 		edev->mode |= EEH_DEV_SYSFS;
 }
@@ -92,6 +130,7 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
 	device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
 	device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
 	device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
+	device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
 
 	if (edev)
 		edev->mode &= ~EEH_DEV_SYSFS;

+ 56 - 94
arch/powerpc/kernel/head_8xx.S

@@ -104,12 +104,15 @@ turn_on_mmu:
  * task's thread_struct.
  */
 #define EXCEPTION_PROLOG	\
-	mtspr	SPRN_SPRG_SCRATCH0,r10;	\
-	mtspr	SPRN_SPRG_SCRATCH1,r11;	\
-	mfcr	r10;		\
+	EXCEPTION_PROLOG_0;	\
 	EXCEPTION_PROLOG_1;	\
 	EXCEPTION_PROLOG_2
 
+#define EXCEPTION_PROLOG_0	\
+	mtspr	SPRN_SPRG_SCRATCH0,r10;	\
+	mtspr	SPRN_SPRG_SCRATCH1,r11;	\
+	mfcr	r10
+
 #define EXCEPTION_PROLOG_1	\
 	mfspr	r11,SPRN_SRR1;		/* check whether user or kernel */ \
 	andi.	r11,r11,MSR_PR;	\
@@ -144,6 +147,14 @@ turn_on_mmu:
 	SAVE_4GPRS(3, r11);	\
 	SAVE_2GPRS(7, r11)
 
+/*
+ * Exception exit code.
+ */
+#define EXCEPTION_EPILOG_0	\
+	mtcr	r10;		\
+	mfspr	r10,SPRN_SPRG_SCRATCH0;	\
+	mfspr	r11,SPRN_SPRG_SCRATCH1
+
 /*
  * Note: code which follows this uses cr0.eq (set if from kernel),
  * r11, r12 (SRR0), and r9 (SRR1).
@@ -293,16 +304,8 @@ InstructionTLBMiss:
 #ifdef CONFIG_8xx_CPU6
 	stw	r3, 8(r0)
 #endif
-	DO_8xx_CPU6(0x3f80, r3)
-	mtspr	SPRN_M_TW, r10	/* Save a couple of working registers */
-	mfcr	r10
-#ifdef CONFIG_8xx_CPU6
-	stw	r10, 0(r0)
-	stw	r11, 4(r0)
-#else
-	mtspr	SPRN_DAR, r10
-	mtspr	SPRN_SPRG2, r11
-#endif
+	EXCEPTION_PROLOG_0
+	mtspr	SPRN_SPRG_SCRATCH2, r10
 	mfspr	r10, SPRN_SRR0	/* Get effective address of fault */
 #ifdef CONFIG_8xx_CPU15
 	addi	r11, r10, 0x1000
@@ -359,18 +362,11 @@ InstructionTLBMiss:
 	mtspr	SPRN_MI_RPN, r10	/* Update TLB entry */
 
 	/* Restore registers */
-#ifndef CONFIG_8xx_CPU6
-	mfspr	r10, SPRN_DAR
-	mtcr	r10
-	mtspr	SPRN_DAR, r11	/* Tag DAR */
-	mfspr	r11, SPRN_SPRG2
-#else
-	lwz	r11, 0(r0)
-	mtcr	r11
-	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
 	lwz	r3, 8(r0)
 #endif
-	mfspr	r10, SPRN_M_TW
+	mfspr	r10, SPRN_SPRG_SCRATCH2
+	EXCEPTION_EPILOG_0
 	rfi
 2:
 	mfspr	r11, SPRN_SRR1
@@ -381,19 +377,11 @@ InstructionTLBMiss:
 	mtspr	SPRN_SRR1, r11
 
 	/* Restore registers */
-#ifndef CONFIG_8xx_CPU6
-	mfspr	r10, SPRN_DAR
-	mtcr	r10
-	li	r11, 0x00f0
-	mtspr	SPRN_DAR, r11	/* Tag DAR */
-	mfspr	r11, SPRN_SPRG2
-#else
-	lwz	r11, 0(r0)
-	mtcr	r11
-	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
 	lwz	r3, 8(r0)
 #endif
-	mfspr	r10, SPRN_M_TW
+	mfspr	r10, SPRN_SPRG_SCRATCH2
+	EXCEPTION_EPILOG_0
 	b	InstructionAccess
 
 	. = 0x1200
@@ -401,16 +389,8 @@ DataStoreTLBMiss:
 #ifdef CONFIG_8xx_CPU6
 	stw	r3, 8(r0)
 #endif
-	DO_8xx_CPU6(0x3f80, r3)
-	mtspr	SPRN_M_TW, r10	/* Save a couple of working registers */
-	mfcr	r10
-#ifdef CONFIG_8xx_CPU6
-	stw	r10, 0(r0)
-	stw	r11, 4(r0)
-#else
-	mtspr	SPRN_DAR, r10
-	mtspr	SPRN_SPRG2, r11
-#endif
+	EXCEPTION_PROLOG_0
+	mtspr	SPRN_SPRG_SCRATCH2, r10
 	mfspr	r10, SPRN_M_TWB	/* Get level 1 table entry address */
 
 	/* If we are faulting a kernel address, we have to use the
@@ -483,19 +463,12 @@ DataStoreTLBMiss:
 	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
 
 	/* Restore registers */
-#ifndef CONFIG_8xx_CPU6
-	mfspr	r10, SPRN_DAR
-	mtcr	r10
-	mtspr	SPRN_DAR, r11	/* Tag DAR */
-	mfspr	r11, SPRN_SPRG2
-#else
-	mtspr	SPRN_DAR, r11	/* Tag DAR */
-	lwz	r11, 0(r0)
-	mtcr	r11
-	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
 	lwz	r3, 8(r0)
 #endif
-	mfspr	r10, SPRN_M_TW
+	mtspr	SPRN_DAR, r11	/* Tag DAR */
+	mfspr	r10, SPRN_SPRG_SCRATCH2
+	EXCEPTION_EPILOG_0
 	rfi
 
 /* This is an instruction TLB error on the MPC8xx.  This could be due
@@ -507,35 +480,18 @@ InstructionTLBError:
 	b	InstructionAccess
 
 /* This is the data TLB error on the MPC8xx.  This could be due to
- * many reasons, including a dirty update to a pte.  We can catch that
- * one here, but anything else is an error.  First, we track down the
- * Linux pte.  If it is valid, write access is allowed, but the
- * page dirty bit is not set, we will set it and reload the TLB.  For
- * any other case, we bail out to a higher level function that can
- * handle it.
+ * many reasons, including a dirty update to a pte.  We bail out to
+ * a higher level function that can handle it.
  */
 	. = 0x1400
 DataTLBError:
-#ifdef CONFIG_8xx_CPU6
-	stw	r3, 8(r0)
-#endif
-	DO_8xx_CPU6(0x3f80, r3)
-	mtspr	SPRN_M_TW, r10	/* Save a couple of working registers */
-	mfcr	r10
-	stw	r10, 0(r0)
-	stw	r11, 4(r0)
+	EXCEPTION_PROLOG_0
 
-	mfspr	r10, SPRN_DAR
-	cmpwi	cr0, r10, 0x00f0
+	mfspr	r11, SPRN_DAR
+	cmpwi	cr0, r11, 0x00f0
 	beq-	FixupDAR	/* must be a buggy dcbX, icbi insn. */
-DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR */
-	mfspr	r10, SPRN_M_TW	/* Restore registers */
-	lwz	r11, 0(r0)
-	mtcr	r11
-	lwz	r11, 4(r0)
-#ifdef CONFIG_8xx_CPU6
-	lwz	r3, 8(r0)
-#endif
+DARFixed:/* Return from dcbx instruction bug workaround */
+	EXCEPTION_EPILOG_0
 	b	DataAccess
 
 	EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
@@ -559,11 +515,15 @@ DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR
 
 /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
  * by decoding the registers used by the dcbx instruction and adding them.
- * DAR is set to the calculated address and r10 also holds the EA on exit.
+ * DAR is set to the calculated address.
  */
  /* define if you don't want to use self modifying code */
 #define NO_SELF_MODIFYING_CODE
 FixupDAR:/* Entry point for dcbx workaround. */
+#ifdef CONFIG_8xx_CPU6
+	stw	r3, 8(r0)
+#endif
+	mtspr	SPRN_SPRG_SCRATCH2, r10
 	/* fetch instruction from memory. */
 	mfspr	r10, SPRN_SRR0
 	andis.	r11, r10, 0x8000	/* Address >= 0x80000000 */
@@ -579,16 +539,17 @@ FixupDAR:/* Entry point for dcbx workaround. */
 	mtspr	SPRN_MD_TWC, r11	/* Load pte table base address */
 	mfspr	r11, SPRN_MD_TWC	/* ....and get the pte address */
 	lwz	r11, 0(r11)	/* Get the pte */
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)	/* restore r3 from memory */
+#endif
 	/* concat physical page address(r11) and page offset(r10) */
 	rlwimi	r11, r10, 0, 20, 31
 	lwz	r11,0(r11)
 /* Check if it really is a dcbx instruction. */
 /* dcbt and dcbtst does not generate DTLB Misses/Errors,
  * no need to include them here */
-	srwi	r10, r11, 26	/* check if major OP code is 31 */
-	cmpwi	cr0, r10, 31
-	bne-	141f
-	rlwinm	r10, r11, 0, 21, 30
+	xoris	r10, r11, 0x7c00	/* check if major OP code is 31 */
+	rlwinm	r10, r10, 0, 21, 5
 	cmpwi	cr0, r10, 2028	/* Is dcbz? */
 	beq+	142f
 	cmpwi	cr0, r10, 940	/* Is dcbi? */
@@ -599,16 +560,13 @@ FixupDAR:/* Entry point for dcbx workaround. */
 	beq+	142f
 	cmpwi	cr0, r10, 1964	/* Is icbi? */
 	beq+	142f
-141:	mfspr	r10, SPRN_DAR	/* r10 must hold DAR at exit */
+141:	mfspr	r10,SPRN_SPRG_SCRATCH2
 	b	DARFixed	/* Nope, go back to normal TLB processing */
 
 144:	mfspr	r10, SPRN_DSISR
 	rlwinm	r10, r10,0,7,5	/* Clear store bit for buggy dcbst insn */
 	mtspr	SPRN_DSISR, r10
 142:	/* continue, it was a dcbx, dcbi instruction. */
-#ifdef CONFIG_8xx_CPU6
-	lwz	r3, 8(r0)	/* restore r3 from memory */
-#endif
 #ifndef NO_SELF_MODIFYING_CODE
 	andis.	r10,r11,0x1f	/* test if reg RA is r0 */
 	li	r10,modified_instr@l
@@ -619,14 +577,15 @@ FixupDAR:/* Entry point for dcbx workaround. */
 	stw	r11,0(r10)	/* store add/and instruction */
 	dcbf	0,r10		/* flush new instr. to memory. */
 	icbi	0,r10		/* invalidate instr. cache line */
-	lwz	r11, 4(r0)	/* restore r11 from memory */
-	mfspr	r10, SPRN_M_TW	/* restore r10 from M_TW */
+	mfspr	r11, SPRN_SPRG_SCRATCH1	/* restore r11 */
+	mfspr	r10, SPRN_SPRG_SCRATCH0	/* restore r10 */
 	isync			/* Wait until new instr is loaded from memory */
 modified_instr:
 	.space	4		/* this is where the add instr. is stored */
 	bne+	143f
 	subf	r10,r0,r10	/* r10=r10-r0, only if reg RA is r0 */
 143:	mtdar	r10		/* store faulting EA in DAR */
+	mfspr	r10,SPRN_SPRG_SCRATCH2
 	b	DARFixed	/* Go back to normal TLB handling */
 #else
 	mfctr	r10
@@ -680,13 +639,16 @@ modified_instr:
 	mfdar	r11
 	mtctr	r11			/* restore ctr reg from DAR */
 	mtdar	r10			/* save fault EA to DAR */
+	mfspr	r10,SPRN_SPRG_SCRATCH2
 	b	DARFixed		/* Go back to normal TLB handling */
 
 	/* special handling for r10,r11 since these are modified already */
-153:	lwz	r11, 4(r0)	/* load r11 from memory */
-	b	155f
-154:	mfspr	r11, SPRN_M_TW	/* load r10 from M_TW */
-155:	add	r10, r10, r11	/* add it */
+153:	mfspr	r11, SPRN_SPRG_SCRATCH1	/* load r11 from SPRN_SPRG_SCRATCH1 */
+	add	r10, r10, r11	/* add it */
+	mfctr	r11		/* restore r11 */
+	b	151b
+154:	mfspr	r11, SPRN_SPRG_SCRATCH0	/* load r10 from SPRN_SPRG_SCRATCH0 */
+	add	r10, r10, r11	/* add it */
 	mfctr	r11		/* restore r11 */
 	b	151b
 #endif

+ 1 - 1
arch/powerpc/kernel/hw_breakpoint.c

@@ -293,7 +293,7 @@ out:
 /*
  * Handle single-step exceptions following a DABR hit.
  */
-int __kprobes single_step_dabr_instruction(struct die_args *args)
+static int __kprobes single_step_dabr_instruction(struct die_args *args)
 {
 	struct pt_regs *regs = args->regs;
 	struct perf_event *bp = NULL;

+ 1 - 1
arch/powerpc/kernel/ibmebus.c

@@ -55,7 +55,7 @@ static struct device ibmebus_bus_device = { /* fake "parent" device */
 struct bus_type ibmebus_bus_type;
 
 /* These devices will automatically be added to the bus during init */
-static struct of_device_id __initdata ibmebus_matches[] = {
+static const struct of_device_id ibmebus_matches[] __initconst = {
 	{ .compatible = "IBM,lhca" },
 	{ .compatible = "IBM,lhea" },
 	{},

+ 1 - 1
arch/powerpc/kernel/idle_power7.S

@@ -73,7 +73,7 @@ _GLOBAL(power7_powersave_common)
 
 	/* Check if something happened while soft-disabled */
 	lbz	r0,PACAIRQHAPPENED(r13)
-	cmpwi	cr0,r0,0
+	andi.	r0,r0,~PACA_IRQ_HARD_DIS@l
 	beq	1f
 	cmpwi	cr0,r4,0
 	beq	1f

+ 3 - 3
arch/powerpc/kernel/irq.c

@@ -444,13 +444,13 @@ void migrate_irqs(void)
 
 		cpumask_and(mask, data->affinity, map);
 		if (cpumask_any(mask) >= nr_cpu_ids) {
-			printk("Breaking affinity for irq %i\n", irq);
+			pr_warn("Breaking affinity for irq %i\n", irq);
 			cpumask_copy(mask, map);
 		}
 		if (chip->irq_set_affinity)
 			chip->irq_set_affinity(data, mask, true);
 		else if (desc->action && !(warned++))
-			printk("Cannot set affinity for irq %i\n", irq);
+			pr_err("Cannot set affinity for irq %i\n", irq);
 	}
 
 	free_cpumask_var(mask);
@@ -470,7 +470,7 @@ static inline void check_stack_overflow(void)
 
 	/* check for stack overflow: is there less than 2KB free? */
 	if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
-		printk("do_IRQ: stack overflow: %ld\n",
+		pr_err("do_IRQ: stack overflow: %ld\n",
 			sp - sizeof(struct thread_info));
 		dump_stack();
 	}

+ 1 - 1
arch/powerpc/kernel/legacy_serial.c

@@ -35,7 +35,7 @@ static struct legacy_serial_info {
 	phys_addr_t			taddr;
 } legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS];
 
-static struct of_device_id legacy_serial_parents[] __initdata = {
+static const struct of_device_id legacy_serial_parents[] __initconst = {
 	{.type = "soc",},
 	{.type = "tsi-bridge",},
 	{.type = "opb", },

+ 14 - 17
arch/powerpc/kernel/module_32.c

@@ -15,6 +15,9 @@
     along with this program; if not, write to the Free Software
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/moduleloader.h>
 #include <linux/elf.h>
@@ -28,12 +31,6 @@
 #include <linux/sort.h>
 #include <asm/setup.h>
 
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(fmt , ...)
-#endif
-
 /* Count how many different relocations (different symbol, different
    addend) */
 static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
@@ -121,8 +118,8 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
 			continue;
 
 		if (sechdrs[i].sh_type == SHT_RELA) {
-			DEBUGP("Found relocations in section %u\n", i);
-			DEBUGP("Ptr: %p.  Number: %u\n",
+			pr_debug("Found relocations in section %u\n", i);
+			pr_debug("Ptr: %p.  Number: %u\n",
 			       (void *)hdr + sechdrs[i].sh_offset,
 			       sechdrs[i].sh_size / sizeof(Elf32_Rela));
 
@@ -161,7 +158,7 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr,
 			me->arch.core_plt_section = i;
 	}
 	if (!me->arch.core_plt_section || !me->arch.init_plt_section) {
-		printk("Module doesn't contain .plt or .init.plt sections.\n");
+		pr_err("Module doesn't contain .plt or .init.plt sections.\n");
 		return -ENOEXEC;
 	}
 
@@ -189,7 +186,7 @@ static uint32_t do_plt_call(void *location,
 {
 	struct ppc_plt_entry *entry;
 
-	DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location);
+	pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location);
 	/* Init, or core PLT? */
 	if (location >= mod->module_core
 	    && location < mod->module_core + mod->core_size)
@@ -208,7 +205,7 @@ static uint32_t do_plt_call(void *location,
 	entry->jump[2] = 0x7d8903a6;                    /* mtctr r12 */
 	entry->jump[3] = 0x4e800420;			/* bctr */
 
-	DEBUGP("Initialized plt for 0x%x at %p\n", val, entry);
+	pr_debug("Initialized plt for 0x%x at %p\n", val, entry);
 	return (uint32_t)entry;
 }
 
@@ -224,7 +221,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
 	uint32_t *location;
 	uint32_t value;
 
-	DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
+	pr_debug("Applying ADD relocate section %u to %u\n", relsec,
 	       sechdrs[relsec].sh_info);
 	for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
 		/* This is where to make the change */
@@ -268,17 +265,17 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
 						    sechdrs, module);
 
 			/* Only replace bits 2 through 26 */
-			DEBUGP("REL24 value = %08X. location = %08X\n",
+			pr_debug("REL24 value = %08X. location = %08X\n",
 			       value, (uint32_t)location);
-			DEBUGP("Location before: %08X.\n",
+			pr_debug("Location before: %08X.\n",
 			       *(uint32_t *)location);
 			*(uint32_t *)location
 				= (*(uint32_t *)location & ~0x03fffffc)
 				| ((value - (uint32_t)location)
 				   & 0x03fffffc);
-			DEBUGP("Location after: %08X.\n",
+			pr_debug("Location after: %08X.\n",
 			       *(uint32_t *)location);
-			DEBUGP("ie. jump to %08X+%08X = %08X\n",
+			pr_debug("ie. jump to %08X+%08X = %08X\n",
 			       *(uint32_t *)location & 0x03fffffc,
 			       (uint32_t)location,
 			       (*(uint32_t *)location & 0x03fffffc)
@@ -291,7 +288,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
 			break;
 
 		default:
-			printk("%s: unknown ADD relocation: %u\n",
+			pr_err("%s: unknown ADD relocation: %u\n",
 			       module->name,
 			       ELF32_R_TYPE(rela[i].r_info));
 			return -ENOEXEC;

+ 17 - 19
arch/powerpc/kernel/module_64.c

@@ -15,6 +15,9 @@
     along with this program; if not, write to the Free Software
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/elf.h>
 #include <linux/moduleloader.h>
@@ -36,11 +39,6 @@
    Using a magic allocator which places modules within 32MB solves
    this, and makes other things simpler.  Anton?
    --RR.  */
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(fmt , ...)
-#endif
 
 #if defined(_CALL_ELF) && _CALL_ELF == 2
 #define R2_STACK_OFFSET 24
@@ -279,8 +277,8 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
 	/* Every relocated section... */
 	for (i = 1; i < hdr->e_shnum; i++) {
 		if (sechdrs[i].sh_type == SHT_RELA) {
-			DEBUGP("Found relocations in section %u\n", i);
-			DEBUGP("Ptr: %p.  Number: %lu\n",
+			pr_debug("Found relocations in section %u\n", i);
+			pr_debug("Ptr: %p.  Number: %Lu\n",
 			       (void *)sechdrs[i].sh_addr,
 			       sechdrs[i].sh_size / sizeof(Elf64_Rela));
 
@@ -304,7 +302,7 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
 	relocs++;
 #endif
 
-	DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
+	pr_debug("Looks like a total of %lu stubs, max\n", relocs);
 	return relocs * sizeof(struct ppc64_stub_entry);
 }
 
@@ -390,7 +388,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
 	}
 
 	if (!me->arch.stubs_section) {
-		printk("%s: doesn't contain .stubs.\n", me->name);
+		pr_err("%s: doesn't contain .stubs.\n", me->name);
 		return -ENOEXEC;
 	}
 
@@ -434,11 +432,11 @@ static inline int create_stub(Elf64_Shdr *sechdrs,
 	/* Stub uses address relative to r2. */
 	reladdr = (unsigned long)entry - my_r2(sechdrs, me);
 	if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
-		printk("%s: Address %p of stub out of range of %p.\n",
+		pr_err("%s: Address %p of stub out of range of %p.\n",
 		       me->name, (void *)reladdr, (void *)my_r2);
 		return 0;
 	}
-	DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr);
+	pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
 
 	entry->jump[0] |= PPC_HA(reladdr);
 	entry->jump[1] |= PPC_LO(reladdr);
@@ -477,7 +475,7 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
 static int restore_r2(u32 *instruction, struct module *me)
 {
 	if (*instruction != PPC_INST_NOP) {
-		printk("%s: Expect noop after relocate, got %08x\n",
+		pr_err("%s: Expect noop after relocate, got %08x\n",
 		       me->name, *instruction);
 		return 0;
 	}
@@ -498,7 +496,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 	unsigned long *location;
 	unsigned long value;
 
-	DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
+	pr_debug("Applying ADD relocate section %u to %u\n", relsec,
 	       sechdrs[relsec].sh_info);
 
 	/* First time we're called, we can fix up .TOC. */
@@ -519,7 +517,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
 			+ ELF64_R_SYM(rela[i].r_info);
 
-		DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n",
+		pr_debug("RELOC at %p: %li-type as %s (0x%lx) + %li\n",
 		       location, (long)ELF64_R_TYPE(rela[i].r_info),
 		       strtab + sym->st_name, (unsigned long)sym->st_value,
 		       (long)rela[i].r_addend);
@@ -546,7 +544,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			/* Subtract TOC pointer */
 			value -= my_r2(sechdrs, me);
 			if (value + 0x8000 > 0xffff) {
-				printk("%s: bad TOC16 relocation (%lu)\n",
+				pr_err("%s: bad TOC16 relocation (0x%lx)\n",
 				       me->name, value);
 				return -ENOEXEC;
 			}
@@ -567,7 +565,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			/* Subtract TOC pointer */
 			value -= my_r2(sechdrs, me);
 			if ((value & 3) != 0 || value + 0x8000 > 0xffff) {
-				printk("%s: bad TOC16_DS relocation (%lu)\n",
+				pr_err("%s: bad TOC16_DS relocation (0x%lx)\n",
 				       me->name, value);
 				return -ENOEXEC;
 			}
@@ -580,7 +578,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			/* Subtract TOC pointer */
 			value -= my_r2(sechdrs, me);
 			if ((value & 3) != 0) {
-				printk("%s: bad TOC16_LO_DS relocation (%lu)\n",
+				pr_err("%s: bad TOC16_LO_DS relocation (0x%lx)\n",
 				       me->name, value);
 				return -ENOEXEC;
 			}
@@ -613,7 +611,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			/* Convert value to relative */
 			value -= (unsigned long)location;
 			if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){
-				printk("%s: REL24 %li out of range!\n",
+				pr_err("%s: REL24 %li out of range!\n",
 				       me->name, (long int)value);
 				return -ENOEXEC;
 			}
@@ -655,7 +653,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			break;
 
 		default:
-			printk("%s: Unknown ADD relocation: %lu\n",
+			pr_err("%s: Unknown ADD relocation: %lu\n",
 			       me->name,
 			       (unsigned long)ELF64_R_TYPE(rela[i].r_info));
 			return -ENOEXEC;

+ 1 - 1
arch/powerpc/kernel/nvram_64.c

@@ -567,7 +567,7 @@ static int __init nvram_init(void)
   	return rc;
 }
 
-void __exit nvram_cleanup(void)
+static void __exit nvram_cleanup(void)
 {
         misc_deregister( &nvram_dev );
 }

+ 1 - 1
arch/powerpc/kernel/of_platform.c

@@ -97,7 +97,7 @@ static int of_pci_phb_probe(struct platform_device *dev)
 	return 0;
 }
 
-static struct of_device_id of_pci_phb_ids[] = {
+static const struct of_device_id of_pci_phb_ids[] = {
 	{ .type = "pci", },
 	{ .type = "pcix", },
 	{ .type = "pcie", },

+ 1 - 2
arch/powerpc/kernel/pci-common.c

@@ -1140,7 +1140,7 @@ static int reparent_resources(struct resource *parent,
  *	    as well.
  */
 
-void pcibios_allocate_bus_resources(struct pci_bus *bus)
+static void pcibios_allocate_bus_resources(struct pci_bus *bus)
 {
 	struct pci_bus *b;
 	int i;
@@ -1561,7 +1561,6 @@ EARLY_PCI_OP(write, byte, u8)
 EARLY_PCI_OP(write, word, u16)
 EARLY_PCI_OP(write, dword, u32)
 
-extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, int cap);
 int early_find_capability(struct pci_controller *hose, int bus, int devfn,
 			  int cap)
 {

+ 1 - 1
arch/powerpc/kernel/pci_of_scan.c

@@ -38,7 +38,7 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
  * @addr0: value of 1st cell of a device tree PCI address.
  * @bridge: Set this flag if the address is from a bridge 'ranges' property
  */
-unsigned int pci_parse_of_flags(u32 addr0, int bridge)
+static unsigned int pci_parse_of_flags(u32 addr0, int bridge)
 {
 	unsigned int flags = 0;
 

+ 13 - 179
arch/powerpc/kernel/ppc_ksyms.c

@@ -1,207 +1,41 @@
-#include <linux/export.h>
-#include <linux/threads.h>
-#include <linux/smp.h>
-#include <linux/sched.h>
-#include <linux/elfcore.h>
-#include <linux/string.h>
-#include <linux/interrupt.h>
-#include <linux/screen_info.h>
-#include <linux/vt_kern.h>
-#include <linux/nvram.h>
-#include <linux/irq.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
+#include <linux/ftrace.h>
+#include <linux/mm.h>
 
-#include <asm/page.h>
 #include <asm/processor.h>
-#include <asm/cacheflush.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-#include <linux/atomic.h>
-#include <asm/checksum.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <linux/adb.h>
-#include <linux/cuda.h>
-#include <linux/pmu.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/irq.h>
-#include <asm/pmac_feature.h>
-#include <asm/dma.h>
-#include <asm/machdep.h>
-#include <asm/hw_irq.h>
-#include <asm/nvram.h>
-#include <asm/mmu_context.h>
-#include <asm/backlight.h>
-#include <asm/time.h>
-#include <asm/cputable.h>
-#include <asm/btext.h>
-#include <asm/div64.h>
-#include <asm/signal.h>
-#include <asm/dcr.h>
-#include <asm/ftrace.h>
 #include <asm/switch_to.h>
+#include <asm/cacheflush.h>
 #include <asm/epapr_hcalls.h>
 
-#ifdef CONFIG_PPC32
-extern void transfer_to_handler(void);
-extern void do_IRQ(struct pt_regs *regs);
-extern void machine_check_exception(struct pt_regs *regs);
-extern void alignment_exception(struct pt_regs *regs);
-extern void program_check_exception(struct pt_regs *regs);
-extern void single_step_exception(struct pt_regs *regs);
-extern int sys_sigreturn(struct pt_regs *regs);
+EXPORT_SYMBOL(flush_dcache_range);
+EXPORT_SYMBOL(flush_icache_range);
 
-EXPORT_SYMBOL(clear_pages);
-EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
-EXPORT_SYMBOL(DMA_MODE_READ);
-EXPORT_SYMBOL(DMA_MODE_WRITE);
+EXPORT_SYMBOL(empty_zero_page);
 
-EXPORT_SYMBOL(transfer_to_handler);
-EXPORT_SYMBOL(do_IRQ);
-EXPORT_SYMBOL(machine_check_exception);
-EXPORT_SYMBOL(alignment_exception);
-EXPORT_SYMBOL(program_check_exception);
-EXPORT_SYMBOL(single_step_exception);
-EXPORT_SYMBOL(sys_sigreturn);
-#endif
+long long __bswapdi2(long long);
+EXPORT_SYMBOL(__bswapdi2);
 
 #ifdef CONFIG_FUNCTION_TRACER
 EXPORT_SYMBOL(_mcount);
 #endif
 
-EXPORT_SYMBOL(strcpy);
-EXPORT_SYMBOL(strncpy);
-EXPORT_SYMBOL(strcat);
-EXPORT_SYMBOL(strlen);
-EXPORT_SYMBOL(strcmp);
-EXPORT_SYMBOL(strncmp);
-
-#ifndef CONFIG_GENERIC_CSUM
-EXPORT_SYMBOL(csum_partial);
-EXPORT_SYMBOL(csum_partial_copy_generic);
-EXPORT_SYMBOL(ip_fast_csum);
-EXPORT_SYMBOL(csum_tcpudp_magic);
-#endif
-
-EXPORT_SYMBOL(__copy_tofrom_user);
-EXPORT_SYMBOL(__clear_user);
-EXPORT_SYMBOL(copy_page);
-
-#if defined(CONFIG_PCI) && defined(CONFIG_PPC32)
-EXPORT_SYMBOL(isa_io_base);
-EXPORT_SYMBOL(isa_mem_base);
-EXPORT_SYMBOL(pci_dram_offset);
-#endif /* CONFIG_PCI */
-
-EXPORT_SYMBOL(start_thread);
-
 #ifdef CONFIG_PPC_FPU
 EXPORT_SYMBOL(giveup_fpu);
 EXPORT_SYMBOL(load_fp_state);
 EXPORT_SYMBOL(store_fp_state);
 #endif
+
 #ifdef CONFIG_ALTIVEC
 EXPORT_SYMBOL(giveup_altivec);
 EXPORT_SYMBOL(load_vr_state);
 EXPORT_SYMBOL(store_vr_state);
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_VSX
-EXPORT_SYMBOL(giveup_vsx);
-EXPORT_SYMBOL_GPL(__giveup_vsx);
-#endif /* CONFIG_VSX */
-#ifdef CONFIG_SPE
-EXPORT_SYMBOL(giveup_spe);
-#endif /* CONFIG_SPE */
-
-#ifndef CONFIG_PPC64
-EXPORT_SYMBOL(flush_instruction_cache);
 #endif
-EXPORT_SYMBOL(flush_dcache_range);
-EXPORT_SYMBOL(flush_icache_range);
 
-#ifdef CONFIG_SMP
-#ifdef CONFIG_PPC32
-EXPORT_SYMBOL(smp_hw_index);
-#endif
-#endif
-
-#ifdef CONFIG_ADB
-EXPORT_SYMBOL(adb_request);
-EXPORT_SYMBOL(adb_register);
-EXPORT_SYMBOL(adb_unregister);
-EXPORT_SYMBOL(adb_poll);
-EXPORT_SYMBOL(adb_try_handler_change);
-#endif /* CONFIG_ADB */
-#ifdef CONFIG_ADB_CUDA
-EXPORT_SYMBOL(cuda_request);
-EXPORT_SYMBOL(cuda_poll);
-#endif /* CONFIG_ADB_CUDA */
-EXPORT_SYMBOL(to_tm);
-
-#ifdef CONFIG_PPC32
-long long __ashrdi3(long long, int);
-long long __ashldi3(long long, int);
-long long __lshrdi3(long long, int);
-EXPORT_SYMBOL(__ashrdi3);
-EXPORT_SYMBOL(__ashldi3);
-EXPORT_SYMBOL(__lshrdi3);
-int __ucmpdi2(unsigned long long, unsigned long long);
-EXPORT_SYMBOL(__ucmpdi2);
-int __cmpdi2(long long, long long);
-EXPORT_SYMBOL(__cmpdi2);
-#endif
-long long __bswapdi2(long long);
-EXPORT_SYMBOL(__bswapdi2);
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memmove);
-EXPORT_SYMBOL(memcmp);
-EXPORT_SYMBOL(memchr);
-
-#if defined(CONFIG_FB_VGA16_MODULE)
-EXPORT_SYMBOL(screen_info);
-#endif
-
-#ifdef CONFIG_PPC32
-EXPORT_SYMBOL(timer_interrupt);
-EXPORT_SYMBOL(tb_ticks_per_jiffy);
-EXPORT_SYMBOL(cacheable_memcpy);
-EXPORT_SYMBOL(cacheable_memzero);
-#endif
-
-#ifdef CONFIG_PPC32
-EXPORT_SYMBOL(switch_mmu_context);
-#endif
-
-#ifdef CONFIG_PPC_STD_MMU_32
-extern long mol_trampoline;
-EXPORT_SYMBOL(mol_trampoline); /* For MOL */
-EXPORT_SYMBOL(flush_hash_pages); /* For MOL */
-#ifdef CONFIG_SMP
-extern int mmu_hash_lock;
-EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */
-#endif /* CONFIG_SMP */
-extern long *intercept_table;
-EXPORT_SYMBOL(intercept_table);
-#endif /* CONFIG_PPC_STD_MMU_32 */
-#ifdef CONFIG_PPC_DCR_NATIVE
-EXPORT_SYMBOL(__mtdcr);
-EXPORT_SYMBOL(__mfdcr);
-#endif
-EXPORT_SYMBOL(empty_zero_page);
-
-#ifdef CONFIG_PPC64
-EXPORT_SYMBOL(__arch_hweight8);
-EXPORT_SYMBOL(__arch_hweight16);
-EXPORT_SYMBOL(__arch_hweight32);
-EXPORT_SYMBOL(__arch_hweight64);
+#ifdef CONFIG_VSX
+EXPORT_SYMBOL_GPL(__giveup_vsx);
 #endif
 
-#ifdef CONFIG_PPC_BOOK3S_64
-EXPORT_SYMBOL_GPL(mmu_psize_defs);
+#ifdef CONFIG_SPE
+EXPORT_SYMBOL(giveup_spe);
 #endif
 
 #ifdef CONFIG_EPAPR_PARAVIRT

+ 61 - 0
arch/powerpc/kernel/ppc_ksyms_32.c

@@ -0,0 +1,61 @@
+#include <linux/export.h>
+#include <linux/smp.h>
+
+#include <asm/page.h>
+#include <asm/dma.h>
+#include <asm/io.h>
+#include <asm/hw_irq.h>
+#include <asm/time.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/dcr.h>
+
+EXPORT_SYMBOL(clear_pages);
+EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
+EXPORT_SYMBOL(DMA_MODE_READ);
+EXPORT_SYMBOL(DMA_MODE_WRITE);
+
+#if defined(CONFIG_PCI)
+EXPORT_SYMBOL(isa_io_base);
+EXPORT_SYMBOL(isa_mem_base);
+EXPORT_SYMBOL(pci_dram_offset);
+#endif
+
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(smp_hw_index);
+#endif
+
+long long __ashrdi3(long long, int);
+long long __ashldi3(long long, int);
+long long __lshrdi3(long long, int);
+int __ucmpdi2(unsigned long long, unsigned long long);
+int __cmpdi2(long long, long long);
+EXPORT_SYMBOL(__ashrdi3);
+EXPORT_SYMBOL(__ashldi3);
+EXPORT_SYMBOL(__lshrdi3);
+EXPORT_SYMBOL(__ucmpdi2);
+EXPORT_SYMBOL(__cmpdi2);
+
+EXPORT_SYMBOL(timer_interrupt);
+EXPORT_SYMBOL(tb_ticks_per_jiffy);
+
+EXPORT_SYMBOL(switch_mmu_context);
+
+#ifdef CONFIG_PPC_STD_MMU_32
+extern long mol_trampoline;
+EXPORT_SYMBOL(mol_trampoline); /* For MOL */
+EXPORT_SYMBOL(flush_hash_pages); /* For MOL */
+#ifdef CONFIG_SMP
+extern int mmu_hash_lock;
+EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */
+#endif /* CONFIG_SMP */
+extern long *intercept_table;
+EXPORT_SYMBOL(intercept_table);
+#endif /* CONFIG_PPC_STD_MMU_32 */
+
+#ifdef CONFIG_PPC_DCR_NATIVE
+EXPORT_SYMBOL(__mtdcr);
+EXPORT_SYMBOL(__mfdcr);
+#endif
+
+EXPORT_SYMBOL(flush_instruction_cache);

+ 2 - 0
arch/powerpc/kernel/process.c

@@ -228,6 +228,7 @@ void giveup_vsx(struct task_struct *tsk)
 	giveup_altivec_maybe_transactional(tsk);
 	__giveup_vsx(tsk);
 }
+EXPORT_SYMBOL(giveup_vsx);
 
 void flush_vsx_to_thread(struct task_struct *tsk)
 {
@@ -1316,6 +1317,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 	current->thread.tm_tfiar = 0;
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 }
+EXPORT_SYMBOL(start_thread);
 
 #define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \
 		| PR_FP_EXC_RES | PR_FP_EXC_INV)

+ 8 - 5
arch/powerpc/kernel/prom.c

@@ -386,8 +386,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	return 0;
 }
 
-int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,
-					 int depth, void *data)
+static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
+						const char *uname,
+						int depth, void *data)
 {
 	const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */
 
@@ -641,6 +642,10 @@ void __init early_init_devtree(void *params)
 
 	DBG(" -> early_init_devtree(%p)\n", params);
 
+	/* Too early to BUG_ON(), do it by hand */
+	if (!early_init_dt_verify(params))
+		panic("BUG: Failed verifying flat device tree, bad version?");
+
 	/* Setup flat device-tree pointer */
 	initial_boot_params = params;
 
@@ -663,14 +668,12 @@ void __init early_init_devtree(void *params)
 	 * device-tree, including the platform type, initrd location and
 	 * size, TCE reserve, and more ...
 	 */
-	of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line);
+	of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line);
 
 	/* Scan memory nodes and rebuild MEMBLOCKs */
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
 
-	/* Save command line for /proc/cmdline and then parse parameters */
-	strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
 	parse_early_param();
 
 	/* make sure we've parsed cmdline for mem= before this */

+ 6 - 16
arch/powerpc/kernel/prom_init_check.sh

@@ -50,24 +50,14 @@ do
 	done
 
 	# ignore register save/restore funcitons
-	if [ "${UNDEF:0:9}" = "_restgpr_" ]; then
+	case $UNDEF in
+	_restgpr_*|_restgpr0_*|_rest32gpr_*)
 		OK=1
-	fi
-	if [ "${UNDEF:0:10}" = "_restgpr0_" ]; then
-		OK=1
-	fi
-	if [ "${UNDEF:0:11}" = "_rest32gpr_" ]; then
-		OK=1
-	fi
-	if [ "${UNDEF:0:9}" = "_savegpr_" ]; then
+		;;
+	_savegpr_*|_savegpr0_*|_save32gpr_*)
 		OK=1
-	fi
-	if [ "${UNDEF:0:10}" = "_savegpr0_" ]; then
-		OK=1
-	fi
-	if [ "${UNDEF:0:11}" = "_save32gpr_" ]; then
-		OK=1
-	fi
+		;;
+	esac
 
 	if [ $OK -eq 0 ]; then
 		ERROR=1

+ 1 - 1
arch/powerpc/kernel/ptrace.c

@@ -932,7 +932,7 @@ void ptrace_triggered(struct perf_event *bp,
 }
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 
-int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
+static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 			       unsigned long data)
 {
 #ifdef CONFIG_HAVE_HW_BREAKPOINT

+ 1 - 1
arch/powerpc/kernel/rtasd.c

@@ -286,7 +286,7 @@ static void prrn_work_fn(struct work_struct *work)
 
 static DECLARE_WORK(prrn_work, prrn_work_fn);
 
-void prrn_schedule_update(u32 scope)
+static void prrn_schedule_update(u32 scope)
 {
 	flush_work(&prrn_work);
 	prrn_update_scope = scope;

+ 4 - 3
arch/powerpc/kernel/setup-common.c

@@ -81,8 +81,6 @@ EXPORT_SYMBOL_GPL(boot_cpuid);
 
 unsigned long klimit = (unsigned long) _end;
 
-char cmd_line[COMMAND_LINE_SIZE];
-
 /*
  * This still seems to be needed... -- paulus
  */ 
@@ -94,6 +92,9 @@ struct screen_info screen_info = {
 	.orig_video_isVGA = 1,
 	.orig_video_points = 16
 };
+#if defined(CONFIG_FB_VGA16_MODULE)
+EXPORT_SYMBOL(screen_info);
+#endif
 
 /* Variables required to store legacy IO irq routing */
 int of_i8042_kbd_irq;
@@ -382,7 +383,7 @@ void __init check_for_initrd(void)
 		initrd_start = initrd_end = 0;
 
 	if (initrd_start)
-		printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
+		pr_info("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
 
 	DBG(" <- check_for_initrd()\n");
 #endif /* CONFIG_BLK_DEV_INITRD */

+ 1 - 1
arch/powerpc/kernel/setup_32.c

@@ -268,7 +268,7 @@ static void __init exc_lvl_early_init(void)
 /* Warning, IO base is not yet inited */
 void __init setup_arch(char **cmdline_p)
 {
-	*cmdline_p = cmd_line;
+	*cmdline_p = boot_command_line;
 
 	/* so udelay does something sensible, assume <= 1000 bogomips */
 	loops_per_jiffy = 500000000 / HZ;

+ 21 - 11
arch/powerpc/kernel/setup_64.c

@@ -525,21 +525,31 @@ void __init setup_system(void)
 	printk("Starting Linux PPC64 %s\n", init_utsname()->version);
 
 	printk("-----------------------------------------------------\n");
-	printk("ppc64_pft_size                = 0x%llx\n", ppc64_pft_size);
-	printk("physicalMemorySize            = 0x%llx\n", memblock_phys_mem_size());
+	printk("ppc64_pft_size    = 0x%llx\n", ppc64_pft_size);
+	printk("phys_mem_size     = 0x%llx\n", memblock_phys_mem_size());
+
 	if (ppc64_caches.dline_size != 0x80)
-		printk("ppc64_caches.dcache_line_size = 0x%x\n",
-		       ppc64_caches.dline_size);
+		printk("dcache_line_size  = 0x%x\n", ppc64_caches.dline_size);
 	if (ppc64_caches.iline_size != 0x80)
-		printk("ppc64_caches.icache_line_size = 0x%x\n",
-		       ppc64_caches.iline_size);
+		printk("icache_line_size  = 0x%x\n", ppc64_caches.iline_size);
+
+	printk("cpu_features      = 0x%016lx\n", cur_cpu_spec->cpu_features);
+	printk("  possible        = 0x%016lx\n", CPU_FTRS_POSSIBLE);
+	printk("  always          = 0x%016lx\n", CPU_FTRS_ALWAYS);
+	printk("cpu_user_features = 0x%08x 0x%08x\n", cur_cpu_spec->cpu_user_features,
+		cur_cpu_spec->cpu_user_features2);
+	printk("mmu_features      = 0x%08x\n", cur_cpu_spec->mmu_features);
+	printk("firmware_features = 0x%016lx\n", powerpc_firmware_features);
+
 #ifdef CONFIG_PPC_STD_MMU_64
 	if (htab_address)
-		printk("htab_address                  = 0x%p\n", htab_address);
-	printk("htab_hash_mask                = 0x%lx\n", htab_hash_mask);
-#endif /* CONFIG_PPC_STD_MMU_64 */
+		printk("htab_address      = 0x%p\n", htab_address);
+
+	printk("htab_hash_mask    = 0x%lx\n", htab_hash_mask);
+#endif
+
 	if (PHYSICAL_START > 0)
-		printk("physical_start                = 0x%llx\n",
+		printk("physical_start    = 0x%llx\n",
 		       (unsigned long long)PHYSICAL_START);
 	printk("-----------------------------------------------------\n");
 
@@ -657,7 +667,7 @@ void __init setup_arch(char **cmdline_p)
 {
 	ppc64_boot_msg(0x12, "Setup Arch");
 
-	*cmdline_p = cmd_line;
+	*cmdline_p = boot_command_line;
 
 	/*
 	 * Set cache line size based on type of cpu as a default.

+ 9 - 2
arch/powerpc/kernel/smp.c

@@ -52,6 +52,7 @@
 #endif
 #include <asm/vdso.h>
 #include <asm/debug.h>
+#include <asm/kexec.h>
 
 #ifdef DEBUG
 #include <asm/udbg.h>
@@ -379,8 +380,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 		/*
 		 * numa_node_id() works after this.
 		 */
-		set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
-		set_cpu_numa_mem(cpu, local_memory_node(numa_cpu_lookup_table[cpu]));
+		if (cpu_present(cpu)) {
+			set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
+			set_cpu_numa_mem(cpu,
+				local_memory_node(numa_cpu_lookup_table[cpu]));
+		}
 	}
 
 	cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
@@ -728,6 +732,9 @@ void start_secondary(void *unused)
 	}
 	traverse_core_siblings(cpu, true);
 
+	set_numa_node(numa_cpu_lookup_table[cpu]);
+	set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
+
 	smp_wmb();
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);

+ 3 - 2
arch/powerpc/kernel/time.c

@@ -479,7 +479,7 @@ void arch_irq_work_raise(void)
 
 #endif /* CONFIG_IRQ_WORK */
 
-void __timer_interrupt(void)
+static void __timer_interrupt(void)
 {
 	struct pt_regs *regs = get_irq_regs();
 	u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
@@ -643,7 +643,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val)
 	return found;
 }
 
-void start_cpu_decrementer(void)
+static void start_cpu_decrementer(void)
 {
 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
 	/* Clear any pending timer interrupts */
@@ -1024,6 +1024,7 @@ void to_tm(int tim, struct rtc_time * tm)
 	 */
 	GregorianDay(tm);
 }
+EXPORT_SYMBOL(to_tm);
 
 /*
  * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit

+ 1 - 1
arch/powerpc/lib/Makefile

@@ -10,7 +10,7 @@ CFLAGS_REMOVE_code-patching.o = -pg
 CFLAGS_REMOVE_feature-fixups.o = -pg
 
 obj-y			:= string.o alloc.o \
-			   crtsavres.o
+			   crtsavres.o ppc_ksyms.o
 obj-$(CONFIG_PPC32)	+= div64.o copy_32.o
 obj-$(CONFIG_HAS_IOMEM)	+= devres.o
 

+ 1 - 1
arch/powerpc/lib/feature-fixups.c

@@ -164,7 +164,7 @@ static long calc_offset(struct fixup_entry *entry, unsigned int *p)
 	return (unsigned long)p - (unsigned long)entry;
 }
 
-void test_basic_patching(void)
+static void test_basic_patching(void)
 {
 	extern unsigned int ftr_fixup_test1;
 	extern unsigned int end_ftr_fixup_test1;

+ 39 - 0
arch/powerpc/lib/ppc_ksyms.c

@@ -0,0 +1,39 @@
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/bitops.h>
+#include <net/checksum.h>
+
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memmove);
+EXPORT_SYMBOL(memcmp);
+EXPORT_SYMBOL(memchr);
+#ifdef CONFIG_PPC32
+EXPORT_SYMBOL(cacheable_memcpy);
+EXPORT_SYMBOL(cacheable_memzero);
+#endif
+
+EXPORT_SYMBOL(strcpy);
+EXPORT_SYMBOL(strncpy);
+EXPORT_SYMBOL(strcat);
+EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strncmp);
+
+#ifndef CONFIG_GENERIC_CSUM
+EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(csum_partial_copy_generic);
+EXPORT_SYMBOL(ip_fast_csum);
+EXPORT_SYMBOL(csum_tcpudp_magic);
+#endif
+
+EXPORT_SYMBOL(__copy_tofrom_user);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(copy_page);
+
+#ifdef CONFIG_PPC64
+EXPORT_SYMBOL(__arch_hweight8);
+EXPORT_SYMBOL(__arch_hweight16);
+EXPORT_SYMBOL(__arch_hweight32);
+EXPORT_SYMBOL(__arch_hweight64);
+#endif

+ 636 - 360
arch/powerpc/lib/sstep.c

@@ -98,13 +98,8 @@ static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs
 
 	ra = (instr >> 16) & 0x1f;
 	ea = (signed short) instr;		/* sign-extend */
-	if (ra) {
+	if (ra)
 		ea += regs->gpr[ra];
-		if (instr & 0x04000000) {		/* update forms */
-			if ((instr>>26) != 47) 		/* stmw is not an update form */
-				regs->gpr[ra] = ea;
-		}
-	}
 
 	return truncate_if_32bit(regs->msr, ea);
 }
@@ -120,11 +115,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg
 
 	ra = (instr >> 16) & 0x1f;
 	ea = (signed short) (instr & ~3);	/* sign-extend */
-	if (ra) {
+	if (ra)
 		ea += regs->gpr[ra];
-		if ((instr & 3) == 1)		/* update forms */
-			regs->gpr[ra] = ea;
-	}
 
 	return truncate_if_32bit(regs->msr, ea);
 }
@@ -133,8 +125,8 @@ static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *reg
 /*
  * Calculate effective address for an X-form instruction
  */
-static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs,
-				     int do_update)
+static unsigned long __kprobes xform_ea(unsigned int instr,
+					struct pt_regs *regs)
 {
 	int ra, rb;
 	unsigned long ea;
@@ -142,11 +134,8 @@ static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs
 	ra = (instr >> 16) & 0x1f;
 	rb = (instr >> 11) & 0x1f;
 	ea = regs->gpr[rb];
-	if (ra) {
+	if (ra)
 		ea += regs->gpr[ra];
-		if (do_update)		/* update forms */
-			regs->gpr[ra] = ea;
-	}
 
 	return truncate_if_32bit(regs->msr, ea);
 }
@@ -611,6 +600,23 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1,
 	regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift);
 }
 
+static int __kprobes trap_compare(long v1, long v2)
+{
+	int ret = 0;
+
+	if (v1 < v2)
+		ret |= 0x10;
+	else if (v1 > v2)
+		ret |= 0x08;
+	else
+		ret |= 0x04;
+	if ((unsigned long)v1 < (unsigned long)v2)
+		ret |= 0x02;
+	else if ((unsigned long)v1 > (unsigned long)v2)
+		ret |= 0x01;
+	return ret;
+}
+
 /*
  * Elements of 32-bit rotate and mask instructions.
  */
@@ -627,26 +633,27 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1,
 #define ROTATE(x, n)	((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) - (n)))) : (x))
 
 /*
- * Emulate instructions that cause a transfer of control,
- * loads and stores, and a few other instructions.
- * Returns 1 if the step was emulated, 0 if not,
- * or -1 if the instruction is one that should not be stepped,
- * such as an rfid, or a mtmsrd that would clear MSR_RI.
+ * Decode an instruction, and execute it if that can be done just by
+ * modifying *regs (i.e. integer arithmetic and logical instructions,
+ * branches, and barrier instructions).
+ * Returns 1 if the instruction has been executed, or 0 if not.
+ * Sets *op to indicate what the instruction does.
  */
-int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
+int __kprobes analyse_instr(struct instruction_op *op, struct pt_regs *regs,
+			    unsigned int instr)
 {
 	unsigned int opcode, ra, rb, rd, spr, u;
 	unsigned long int imm;
 	unsigned long int val, val2;
-	unsigned long int ea;
-	unsigned int cr, mb, me, sh;
-	int err;
-	unsigned long old_ra, val3;
+	unsigned int mb, me, sh;
 	long ival;
 
+	op->type = COMPUTE;
+
 	opcode = instr >> 26;
 	switch (opcode) {
 	case 16:	/* bc */
+		op->type = BRANCH;
 		imm = (signed short)(instr & 0xfffc);
 		if ((instr & 2) == 0)
 			imm += regs->nip;
@@ -659,26 +666,14 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		return 1;
 #ifdef CONFIG_PPC64
 	case 17:	/* sc */
-		/*
-		 * N.B. this uses knowledge about how the syscall
-		 * entry code works.  If that is changed, this will
-		 * need to be changed also.
-		 */
-		if (regs->gpr[0] == 0x1ebe &&
-		    cpu_has_feature(CPU_FTR_REAL_LE)) {
-			regs->msr ^= MSR_LE;
-			goto instr_done;
-		}
-		regs->gpr[9] = regs->gpr[13];
-		regs->gpr[10] = MSR_KERNEL;
-		regs->gpr[11] = regs->nip + 4;
-		regs->gpr[12] = regs->msr & MSR_MASK;
-		regs->gpr[13] = (unsigned long) get_paca();
-		regs->nip = (unsigned long) &system_call_common;
-		regs->msr = MSR_KERNEL;
-		return 1;
+		if ((instr & 0xfe2) == 2)
+			op->type = SYSCALL;
+		else
+			op->type = UNKNOWN;
+		return 0;
 #endif
 	case 18:	/* b */
+		op->type = BRANCH;
 		imm = instr & 0x03fffffc;
 		if (imm & 0x02000000)
 			imm -= 0x04000000;
@@ -691,8 +686,16 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		return 1;
 	case 19:
 		switch ((instr >> 1) & 0x3ff) {
+		case 0:		/* mcrf */
+			rd = (instr >> 21) & 0x1c;
+			ra = (instr >> 16) & 0x1c;
+			val = (regs->ccr >> ra) & 0xf;
+			regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd);
+			goto instr_done;
+
 		case 16:	/* bclr */
 		case 528:	/* bcctr */
+			op->type = BRANCH;
 			imm = (instr & 0x400)? regs->ctr: regs->link;
 			regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
 			imm = truncate_if_32bit(regs->msr, imm);
@@ -703,9 +706,13 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 			return 1;
 
 		case 18:	/* rfid, scary */
-			return -1;
+			if (regs->msr & MSR_PR)
+				goto priv;
+			op->type = RFI;
+			return 0;
 
 		case 150:	/* isync */
+			op->type = BARRIER;
 			isync();
 			goto instr_done;
 
@@ -731,6 +738,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 	case 31:
 		switch ((instr >> 1) & 0x3ff) {
 		case 598:	/* sync */
+			op->type = BARRIER;
 #ifdef __powerpc64__
 			switch ((instr >> 21) & 3) {
 			case 1:		/* lwsync */
@@ -745,6 +753,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 			goto instr_done;
 
 		case 854:	/* eieio */
+			op->type = BARRIER;
 			eieio();
 			goto instr_done;
 		}
@@ -760,6 +769,17 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 	rb = (instr >> 11) & 0x1f;
 
 	switch (opcode) {
+#ifdef __powerpc64__
+	case 2:		/* tdi */
+		if (rd & trap_compare(regs->gpr[ra], (short) instr))
+			goto trap;
+		goto instr_done;
+#endif
+	case 3:		/* twi */
+		if (rd & trap_compare((int)regs->gpr[ra], (short) instr))
+			goto trap;
+		goto instr_done;
+
 	case 7:		/* mulli */
 		regs->gpr[rd] = regs->gpr[ra] * (short) instr;
 		goto instr_done;
@@ -908,35 +928,44 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 
 	case 31:
 		switch ((instr >> 1) & 0x3ff) {
+		case 4:		/* tw */
+			if (rd == 0x1f ||
+			    (rd & trap_compare((int)regs->gpr[ra],
+					       (int)regs->gpr[rb])))
+				goto trap;
+			goto instr_done;
+#ifdef __powerpc64__
+		case 68:	/* td */
+			if (rd & trap_compare(regs->gpr[ra], regs->gpr[rb]))
+				goto trap;
+			goto instr_done;
+#endif
 		case 83:	/* mfmsr */
 			if (regs->msr & MSR_PR)
-				break;
-			regs->gpr[rd] = regs->msr & MSR_MASK;
-			goto instr_done;
+				goto priv;
+			op->type = MFMSR;
+			op->reg = rd;
+			return 0;
 		case 146:	/* mtmsr */
 			if (regs->msr & MSR_PR)
-				break;
-			imm = regs->gpr[rd];
-			if ((imm & MSR_RI) == 0)
-				/* can't step mtmsr that would clear MSR_RI */
-				return -1;
-			regs->msr = imm;
-			goto instr_done;
+				goto priv;
+			op->type = MTMSR;
+			op->reg = rd;
+			op->val = 0xffffffff & ~(MSR_ME | MSR_LE);
+			return 0;
 #ifdef CONFIG_PPC64
 		case 178:	/* mtmsrd */
-			/* only MSR_EE and MSR_RI get changed if bit 15 set */
-			/* mtmsrd doesn't change MSR_HV and MSR_ME */
 			if (regs->msr & MSR_PR)
-				break;
-			imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL;
-			imm = (regs->msr & MSR_MASK & ~imm)
-				| (regs->gpr[rd] & imm);
-			if ((imm & MSR_RI) == 0)
-				/* can't step mtmsrd that would clear MSR_RI */
-				return -1;
-			regs->msr = imm;
-			goto instr_done;
+				goto priv;
+			op->type = MTMSR;
+			op->reg = rd;
+			/* only MSR_EE and MSR_RI get changed if bit 15 set */
+			/* mtmsrd doesn't change MSR_HV, MSR_ME or MSR_LE */
+			imm = (instr & 0x10000)? 0x8002: 0xefffffffffffeffeUL;
+			op->val = imm;
+			return 0;
 #endif
+
 		case 19:	/* mfcr */
 			regs->gpr[rd] = regs->ccr;
 			regs->gpr[rd] &= 0xffffffffUL;
@@ -954,33 +983,43 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 			goto instr_done;
 
 		case 339:	/* mfspr */
-			spr = (instr >> 11) & 0x3ff;
+			spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0);
 			switch (spr) {
-			case 0x20:	/* mfxer */
+			case SPRN_XER:	/* mfxer */
 				regs->gpr[rd] = regs->xer;
 				regs->gpr[rd] &= 0xffffffffUL;
 				goto instr_done;
-			case 0x100:	/* mflr */
+			case SPRN_LR:	/* mflr */
 				regs->gpr[rd] = regs->link;
 				goto instr_done;
-			case 0x120:	/* mfctr */
+			case SPRN_CTR:	/* mfctr */
 				regs->gpr[rd] = regs->ctr;
 				goto instr_done;
+			default:
+				op->type = MFSPR;
+				op->reg = rd;
+				op->spr = spr;
+				return 0;
 			}
 			break;
 
 		case 467:	/* mtspr */
-			spr = (instr >> 11) & 0x3ff;
+			spr = ((instr >> 16) & 0x1f) | ((instr >> 6) & 0x3e0);
 			switch (spr) {
-			case 0x20:	/* mtxer */
+			case SPRN_XER:	/* mtxer */
 				regs->xer = (regs->gpr[rd] & 0xffffffffUL);
 				goto instr_done;
-			case 0x100:	/* mtlr */
+			case SPRN_LR:	/* mtlr */
 				regs->link = regs->gpr[rd];
 				goto instr_done;
-			case 0x120:	/* mtctr */
+			case SPRN_CTR:	/* mtctr */
 				regs->ctr = regs->gpr[rd];
 				goto instr_done;
+			default:
+				op->type = MTSPR;
+				op->val = regs->gpr[rd];
+				op->spr = spr;
+				return 0;
 			}
 			break;
 
@@ -1257,294 +1296,242 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
  * Cache instructions
  */
 		case 54:	/* dcbst */
-			ea = xform_ea(instr, regs, 0);
-			if (!address_ok(regs, ea, 8))
-				return 0;
-			err = 0;
-			__cacheop_user_asmx(ea, err, "dcbst");
-			if (err)
-				return 0;
-			goto instr_done;
+			op->type = MKOP(CACHEOP, DCBST, 0);
+			op->ea = xform_ea(instr, regs);
+			return 0;
 
 		case 86:	/* dcbf */
-			ea = xform_ea(instr, regs, 0);
-			if (!address_ok(regs, ea, 8))
-				return 0;
-			err = 0;
-			__cacheop_user_asmx(ea, err, "dcbf");
-			if (err)
-				return 0;
-			goto instr_done;
+			op->type = MKOP(CACHEOP, DCBF, 0);
+			op->ea = xform_ea(instr, regs);
+			return 0;
 
 		case 246:	/* dcbtst */
-			if (rd == 0) {
-				ea = xform_ea(instr, regs, 0);
-				prefetchw((void *) ea);
-			}
-			goto instr_done;
+			op->type = MKOP(CACHEOP, DCBTST, 0);
+			op->ea = xform_ea(instr, regs);
+			op->reg = rd;
+			return 0;
 
 		case 278:	/* dcbt */
-			if (rd == 0) {
-				ea = xform_ea(instr, regs, 0);
-				prefetch((void *) ea);
-			}
-			goto instr_done;
+			op->type = MKOP(CACHEOP, DCBTST, 0);
+			op->ea = xform_ea(instr, regs);
+			op->reg = rd;
+			return 0;
 
+		case 982:	/* icbi */
+			op->type = MKOP(CACHEOP, ICBI, 0);
+			op->ea = xform_ea(instr, regs);
+			return 0;
 		}
 		break;
 	}
 
 	/*
-	 * Following cases are for loads and stores, so bail out
-	 * if we're in little-endian mode.
+	 * Loads and stores.
 	 */
-	if (regs->msr & MSR_LE)
-		return 0;
-
-	/*
-	 * Save register RA in case it's an update form load or store
-	 * and the access faults.
-	 */
-	old_ra = regs->gpr[ra];
+	op->type = UNKNOWN;
+	op->update_reg = ra;
+	op->reg = rd;
+	op->val = regs->gpr[rd];
+	u = (instr >> 20) & UPDATE;
 
 	switch (opcode) {
 	case 31:
-		u = instr & 0x40;
+		u = instr & UPDATE;
+		op->ea = xform_ea(instr, regs);
 		switch ((instr >> 1) & 0x3ff) {
 		case 20:	/* lwarx */
-			ea = xform_ea(instr, regs, 0);
-			if (ea & 3)
-				break;		/* can't handle misaligned */
-			err = -EFAULT;
-			if (!address_ok(regs, ea, 4))
-				goto ldst_done;
-			err = 0;
-			__get_user_asmx(val, ea, err, "lwarx");
-			if (!err)
-				regs->gpr[rd] = val;
-			goto ldst_done;
+			op->type = MKOP(LARX, 0, 4);
+			break;
 
 		case 150:	/* stwcx. */
-			ea = xform_ea(instr, regs, 0);
-			if (ea & 3)
-				break;		/* can't handle misaligned */
-			err = -EFAULT;
-			if (!address_ok(regs, ea, 4))
-				goto ldst_done;
-			err = 0;
-			__put_user_asmx(regs->gpr[rd], ea, err, "stwcx.", cr);
-			if (!err)
-				regs->ccr = (regs->ccr & 0x0fffffff) |
-					(cr & 0xe0000000) |
-					((regs->xer >> 3) & 0x10000000);
-			goto ldst_done;
+			op->type = MKOP(STCX, 0, 4);
+			break;
 
 #ifdef __powerpc64__
 		case 84:	/* ldarx */
-			ea = xform_ea(instr, regs, 0);
-			if (ea & 7)
-				break;		/* can't handle misaligned */
-			err = -EFAULT;
-			if (!address_ok(regs, ea, 8))
-				goto ldst_done;
-			err = 0;
-			__get_user_asmx(val, ea, err, "ldarx");
-			if (!err)
-				regs->gpr[rd] = val;
-			goto ldst_done;
+			op->type = MKOP(LARX, 0, 8);
+			break;
 
 		case 214:	/* stdcx. */
-			ea = xform_ea(instr, regs, 0);
-			if (ea & 7)
-				break;		/* can't handle misaligned */
-			err = -EFAULT;
-			if (!address_ok(regs, ea, 8))
-				goto ldst_done;
-			err = 0;
-			__put_user_asmx(regs->gpr[rd], ea, err, "stdcx.", cr);
-			if (!err)
-				regs->ccr = (regs->ccr & 0x0fffffff) |
-					(cr & 0xe0000000) |
-					((regs->xer >> 3) & 0x10000000);
-			goto ldst_done;
+			op->type = MKOP(STCX, 0, 8);
+			break;
 
 		case 21:	/* ldx */
 		case 53:	/* ldux */
-			err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
-				       8, regs);
-			goto ldst_done;
+			op->type = MKOP(LOAD, u, 8);
+			break;
 #endif
 
 		case 23:	/* lwzx */
 		case 55:	/* lwzux */
-			err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
-				       4, regs);
-			goto ldst_done;
+			op->type = MKOP(LOAD, u, 4);
+			break;
 
 		case 87:	/* lbzx */
 		case 119:	/* lbzux */
-			err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
-				       1, regs);
-			goto ldst_done;
+			op->type = MKOP(LOAD, u, 1);
+			break;
 
 #ifdef CONFIG_ALTIVEC
 		case 103:	/* lvx */
 		case 359:	/* lvxl */
 			if (!(regs->msr & MSR_VEC))
-				break;
-			ea = xform_ea(instr, regs, 0);
-			err = do_vec_load(rd, do_lvx, ea, regs);
-			goto ldst_done;
+				goto vecunavail;
+			op->type = MKOP(LOAD_VMX, 0, 16);
+			break;
 
 		case 231:	/* stvx */
 		case 487:	/* stvxl */
 			if (!(regs->msr & MSR_VEC))
-				break;
-			ea = xform_ea(instr, regs, 0);
-			err = do_vec_store(rd, do_stvx, ea, regs);
-			goto ldst_done;
+				goto vecunavail;
+			op->type = MKOP(STORE_VMX, 0, 16);
+			break;
 #endif /* CONFIG_ALTIVEC */
 
 #ifdef __powerpc64__
 		case 149:	/* stdx */
 		case 181:	/* stdux */
-			val = regs->gpr[rd];
-			err = write_mem(val, xform_ea(instr, regs, u), 8, regs);
-			goto ldst_done;
+			op->type = MKOP(STORE, u, 8);
+			break;
 #endif
 
 		case 151:	/* stwx */
 		case 183:	/* stwux */
-			val = regs->gpr[rd];
-			err = write_mem(val, xform_ea(instr, regs, u), 4, regs);
-			goto ldst_done;
+			op->type = MKOP(STORE, u, 4);
+			break;
 
 		case 215:	/* stbx */
 		case 247:	/* stbux */
-			val = regs->gpr[rd];
-			err = write_mem(val, xform_ea(instr, regs, u), 1, regs);
-			goto ldst_done;
+			op->type = MKOP(STORE, u, 1);
+			break;
 
 		case 279:	/* lhzx */
 		case 311:	/* lhzux */
-			err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
-				       2, regs);
-			goto ldst_done;
+			op->type = MKOP(LOAD, u, 2);
+			break;
 
 #ifdef __powerpc64__
 		case 341:	/* lwax */
 		case 373:	/* lwaux */
-			err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
-				       4, regs);
-			if (!err)
-				regs->gpr[rd] = (signed int) regs->gpr[rd];
-			goto ldst_done;
+			op->type = MKOP(LOAD, SIGNEXT | u, 4);
+			break;
 #endif
 
 		case 343:	/* lhax */
 		case 375:	/* lhaux */
-			err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
-				       2, regs);
-			if (!err)
-				regs->gpr[rd] = (signed short) regs->gpr[rd];
-			goto ldst_done;
+			op->type = MKOP(LOAD, SIGNEXT | u, 2);
+			break;
 
 		case 407:	/* sthx */
 		case 439:	/* sthux */
-			val = regs->gpr[rd];
-			err = write_mem(val, xform_ea(instr, regs, u), 2, regs);
-			goto ldst_done;
+			op->type = MKOP(STORE, u, 2);
+			break;
 
 #ifdef __powerpc64__
 		case 532:	/* ldbrx */
-			err = read_mem(&val, xform_ea(instr, regs, 0), 8, regs);
-			if (!err)
-				regs->gpr[rd] = byterev_8(val);
-			goto ldst_done;
+			op->type = MKOP(LOAD, BYTEREV, 8);
+			break;
 
 #endif
+		case 533:	/* lswx */
+			op->type = MKOP(LOAD_MULTI, 0, regs->xer & 0x7f);
+			break;
 
 		case 534:	/* lwbrx */
-			err = read_mem(&val, xform_ea(instr, regs, 0), 4, regs);
-			if (!err)
-				regs->gpr[rd] = byterev_4(val);
-			goto ldst_done;
+			op->type = MKOP(LOAD, BYTEREV, 4);
+			break;
+
+		case 597:	/* lswi */
+			if (rb == 0)
+				rb = 32;	/* # bytes to load */
+			op->type = MKOP(LOAD_MULTI, 0, rb);
+			op->ea = 0;
+			if (ra)
+				op->ea = truncate_if_32bit(regs->msr,
+							   regs->gpr[ra]);
+			break;
 
 #ifdef CONFIG_PPC_FPU
 		case 535:	/* lfsx */
 		case 567:	/* lfsux */
 			if (!(regs->msr & MSR_FP))
-				break;
-			ea = xform_ea(instr, regs, u);
-			err = do_fp_load(rd, do_lfs, ea, 4, regs);
-			goto ldst_done;
+				goto fpunavail;
+			op->type = MKOP(LOAD_FP, u, 4);
+			break;
 
 		case 599:	/* lfdx */
 		case 631:	/* lfdux */
 			if (!(regs->msr & MSR_FP))
-				break;
-			ea = xform_ea(instr, regs, u);
-			err = do_fp_load(rd, do_lfd, ea, 8, regs);
-			goto ldst_done;
+				goto fpunavail;
+			op->type = MKOP(LOAD_FP, u, 8);
+			break;
 
 		case 663:	/* stfsx */
 		case 695:	/* stfsux */
 			if (!(regs->msr & MSR_FP))
-				break;
-			ea = xform_ea(instr, regs, u);
-			err = do_fp_store(rd, do_stfs, ea, 4, regs);
-			goto ldst_done;
+				goto fpunavail;
+			op->type = MKOP(STORE_FP, u, 4);
+			break;
 
 		case 727:	/* stfdx */
 		case 759:	/* stfdux */
 			if (!(regs->msr & MSR_FP))
-				break;
-			ea = xform_ea(instr, regs, u);
-			err = do_fp_store(rd, do_stfd, ea, 8, regs);
-			goto ldst_done;
+				goto fpunavail;
+			op->type = MKOP(STORE_FP, u, 8);
+			break;
 #endif
 
 #ifdef __powerpc64__
 		case 660:	/* stdbrx */
-			val = byterev_8(regs->gpr[rd]);
-			err = write_mem(val, xform_ea(instr, regs, 0), 8, regs);
-			goto ldst_done;
+			op->type = MKOP(STORE, BYTEREV, 8);
+			op->val = byterev_8(regs->gpr[rd]);
+			break;
 
 #endif
+		case 661:	/* stswx */
+			op->type = MKOP(STORE_MULTI, 0, regs->xer & 0x7f);
+			break;
+
 		case 662:	/* stwbrx */
-			val = byterev_4(regs->gpr[rd]);
-			err = write_mem(val, xform_ea(instr, regs, 0), 4, regs);
-			goto ldst_done;
+			op->type = MKOP(STORE, BYTEREV, 4);
+			op->val = byterev_4(regs->gpr[rd]);
+			break;
+
+		case 725:
+			if (rb == 0)
+				rb = 32;	/* # bytes to store */
+			op->type = MKOP(STORE_MULTI, 0, rb);
+			op->ea = 0;
+			if (ra)
+				op->ea = truncate_if_32bit(regs->msr,
+							   regs->gpr[ra]);
+			break;
 
 		case 790:	/* lhbrx */
-			err = read_mem(&val, xform_ea(instr, regs, 0), 2, regs);
-			if (!err)
-				regs->gpr[rd] = byterev_2(val);
-			goto ldst_done;
+			op->type = MKOP(LOAD, BYTEREV, 2);
+			break;
 
 		case 918:	/* sthbrx */
-			val = byterev_2(regs->gpr[rd]);
-			err = write_mem(val, xform_ea(instr, regs, 0), 2, regs);
-			goto ldst_done;
+			op->type = MKOP(STORE, BYTEREV, 2);
+			op->val = byterev_2(regs->gpr[rd]);
+			break;
 
 #ifdef CONFIG_VSX
 		case 844:	/* lxvd2x */
 		case 876:	/* lxvd2ux */
 			if (!(regs->msr & MSR_VSX))
-				break;
-			rd |= (instr & 1) << 5;
-			ea = xform_ea(instr, regs, u);
-			err = do_vsx_load(rd, do_lxvd2x, ea, regs);
-			goto ldst_done;
+				goto vsxunavail;
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(LOAD_VSX, u, 16);
+			break;
 
 		case 972:	/* stxvd2x */
 		case 1004:	/* stxvd2ux */
 			if (!(regs->msr & MSR_VSX))
-				break;
-			rd |= (instr & 1) << 5;
-			ea = xform_ea(instr, regs, u);
-			err = do_vsx_store(rd, do_stxvd2x, ea, regs);
-			goto ldst_done;
+				goto vsxunavail;
+			op->reg = rd | ((instr & 1) << 5);
+			op->type = MKOP(STORE_VSX, u, 16);
+			break;
 
 #endif /* CONFIG_VSX */
 		}
@@ -1552,178 +1539,123 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 
 	case 32:	/* lwz */
 	case 33:	/* lwzu */
-		err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 4, regs);
-		goto ldst_done;
+		op->type = MKOP(LOAD, u, 4);
+		op->ea = dform_ea(instr, regs);
+		break;
 
 	case 34:	/* lbz */
 	case 35:	/* lbzu */
-		err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 1, regs);
-		goto ldst_done;
+		op->type = MKOP(LOAD, u, 1);
+		op->ea = dform_ea(instr, regs);
+		break;
 
 	case 36:	/* stw */
-		val = regs->gpr[rd];
-		err = write_mem(val, dform_ea(instr, regs), 4, regs);
-		goto ldst_done;
-
 	case 37:	/* stwu */
-		val = regs->gpr[rd];
-		val3 = dform_ea(instr, regs);
-		/*
-		 * For PPC32 we always use stwu to change stack point with r1. So
-		 * this emulated store may corrupt the exception frame, now we
-		 * have to provide the exception frame trampoline, which is pushed
-		 * below the kprobed function stack. So we only update gpr[1] but
-		 * don't emulate the real store operation. We will do real store
-		 * operation safely in exception return code by checking this flag.
-		 */
-		if ((ra == 1) && !(regs->msr & MSR_PR) \
-			&& (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) {
-#ifdef CONFIG_PPC32
-			/*
-			 * Check if we will touch kernel sack overflow
-			 */
-			if (val3 - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) {
-				printk(KERN_CRIT "Can't kprobe this since Kernel stack overflow.\n");
-				err = -EINVAL;
-				break;
-			}
-#endif /* CONFIG_PPC32 */
-			/*
-			 * Check if we already set since that means we'll
-			 * lose the previous value.
-			 */
-			WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE));
-			set_thread_flag(TIF_EMULATE_STACK_STORE);
-			err = 0;
-		} else
-			err = write_mem(val, val3, 4, regs);
-		goto ldst_done;
+		op->type = MKOP(STORE, u, 4);
+		op->ea = dform_ea(instr, regs);
+		break;
 
 	case 38:	/* stb */
 	case 39:	/* stbu */
-		val = regs->gpr[rd];
-		err = write_mem(val, dform_ea(instr, regs), 1, regs);
-		goto ldst_done;
+		op->type = MKOP(STORE, u, 1);
+		op->ea = dform_ea(instr, regs);
+		break;
 
 	case 40:	/* lhz */
 	case 41:	/* lhzu */
-		err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 2, regs);
-		goto ldst_done;
+		op->type = MKOP(LOAD, u, 2);
+		op->ea = dform_ea(instr, regs);
+		break;
 
 	case 42:	/* lha */
 	case 43:	/* lhau */
-		err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 2, regs);
-		if (!err)
-			regs->gpr[rd] = (signed short) regs->gpr[rd];
-		goto ldst_done;
+		op->type = MKOP(LOAD, SIGNEXT | u, 2);
+		op->ea = dform_ea(instr, regs);
+		break;
 
 	case 44:	/* sth */
 	case 45:	/* sthu */
-		val = regs->gpr[rd];
-		err = write_mem(val, dform_ea(instr, regs), 2, regs);
-		goto ldst_done;
+		op->type = MKOP(STORE, u, 2);
+		op->ea = dform_ea(instr, regs);
+		break;
 
 	case 46:	/* lmw */
-		ra = (instr >> 16) & 0x1f;
 		if (ra >= rd)
 			break;		/* invalid form, ra in range to load */
-		ea = dform_ea(instr, regs);
-		do {
-			err = read_mem(&regs->gpr[rd], ea, 4, regs);
-			if (err)
-				return 0;
-			ea += 4;
-		} while (++rd < 32);
-		goto instr_done;
+		op->type = MKOP(LOAD_MULTI, 0, 4 * (32 - rd));
+		op->ea = dform_ea(instr, regs);
+		break;
 
 	case 47:	/* stmw */
-		ea = dform_ea(instr, regs);
-		do {
-			err = write_mem(regs->gpr[rd], ea, 4, regs);
-			if (err)
-				return 0;
-			ea += 4;
-		} while (++rd < 32);
-		goto instr_done;
+		op->type = MKOP(STORE_MULTI, 0, 4 * (32 - rd));
+		op->ea = dform_ea(instr, regs);
+		break;
 
 #ifdef CONFIG_PPC_FPU
 	case 48:	/* lfs */
 	case 49:	/* lfsu */
 		if (!(regs->msr & MSR_FP))
-			break;
-		ea = dform_ea(instr, regs);
-		err = do_fp_load(rd, do_lfs, ea, 4, regs);
-		goto ldst_done;
+			goto fpunavail;
+		op->type = MKOP(LOAD_FP, u, 4);
+		op->ea = dform_ea(instr, regs);
+		break;
 
 	case 50:	/* lfd */
 	case 51:	/* lfdu */
 		if (!(regs->msr & MSR_FP))
-			break;
-		ea = dform_ea(instr, regs);
-		err = do_fp_load(rd, do_lfd, ea, 8, regs);
-		goto ldst_done;
+			goto fpunavail;
+		op->type = MKOP(LOAD_FP, u, 8);
+		op->ea = dform_ea(instr, regs);
+		break;
 
 	case 52:	/* stfs */
 	case 53:	/* stfsu */
 		if (!(regs->msr & MSR_FP))
-			break;
-		ea = dform_ea(instr, regs);
-		err = do_fp_store(rd, do_stfs, ea, 4, regs);
-		goto ldst_done;
+			goto fpunavail;
+		op->type = MKOP(STORE_FP, u, 4);
+		op->ea = dform_ea(instr, regs);
+		break;
 
 	case 54:	/* stfd */
 	case 55:	/* stfdu */
 		if (!(regs->msr & MSR_FP))
-			break;
-		ea = dform_ea(instr, regs);
-		err = do_fp_store(rd, do_stfd, ea, 8, regs);
-		goto ldst_done;
+			goto fpunavail;
+		op->type = MKOP(STORE_FP, u, 8);
+		op->ea = dform_ea(instr, regs);
+		break;
 #endif
 
 #ifdef __powerpc64__
 	case 58:	/* ld[u], lwa */
+		op->ea = dsform_ea(instr, regs);
 		switch (instr & 3) {
 		case 0:		/* ld */
-			err = read_mem(&regs->gpr[rd], dsform_ea(instr, regs),
-				       8, regs);
-			goto ldst_done;
+			op->type = MKOP(LOAD, 0, 8);
+			break;
 		case 1:		/* ldu */
-			err = read_mem(&regs->gpr[rd], dsform_ea(instr, regs),
-				       8, regs);
-			goto ldst_done;
+			op->type = MKOP(LOAD, UPDATE, 8);
+			break;
 		case 2:		/* lwa */
-			err = read_mem(&regs->gpr[rd], dsform_ea(instr, regs),
-				       4, regs);
-			if (!err)
-				regs->gpr[rd] = (signed int) regs->gpr[rd];
-			goto ldst_done;
+			op->type = MKOP(LOAD, SIGNEXT, 4);
+			break;
 		}
 		break;
 
 	case 62:	/* std[u] */
-		val = regs->gpr[rd];
+		op->ea = dsform_ea(instr, regs);
 		switch (instr & 3) {
 		case 0:		/* std */
-			err = write_mem(val, dsform_ea(instr, regs), 8, regs);
-			goto ldst_done;
+			op->type = MKOP(STORE, 0, 8);
+			break;
 		case 1:		/* stdu */
-			err = write_mem(val, dsform_ea(instr, regs), 8, regs);
-			goto ldst_done;
+			op->type = MKOP(STORE, UPDATE, 8);
+			break;
 		}
 		break;
 #endif /* __powerpc64__ */
 
 	}
-	err = -EINVAL;
-
- ldst_done:
-	if (err) {
-		regs->gpr[ra] = old_ra;
-		return 0;	/* invoke DSI if -EFAULT? */
-	}
- instr_done:
-	regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
-	return 1;
+	return 0;
 
  logical_done:
 	if (instr & 1)
@@ -1733,5 +1665,349 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
  arith_done:
 	if (instr & 1)
 		set_cr0(regs, rd);
-	goto instr_done;
+
+ instr_done:
+	regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
+	return 1;
+
+ priv:
+	op->type = INTERRUPT | 0x700;
+	op->val = SRR1_PROGPRIV;
+	return 0;
+
+ trap:
+	op->type = INTERRUPT | 0x700;
+	op->val = SRR1_PROGTRAP;
+	return 0;
+
+#ifdef CONFIG_PPC_FPU
+ fpunavail:
+	op->type = INTERRUPT | 0x800;
+	return 0;
+#endif
+
+#ifdef CONFIG_ALTIVEC
+ vecunavail:
+	op->type = INTERRUPT | 0xf20;
+	return 0;
+#endif
+
+#ifdef CONFIG_VSX
+ vsxunavail:
+	op->type = INTERRUPT | 0xf40;
+	return 0;
+#endif
+}
+EXPORT_SYMBOL_GPL(analyse_instr);
+
+/*
+ * For PPC32 we always use stwu with r1 to change the stack pointer.
+ * So this emulated store may corrupt the exception frame, now we
+ * have to provide the exception frame trampoline, which is pushed
+ * below the kprobed function stack. So we only update gpr[1] but
+ * don't emulate the real store operation. We will do real store
+ * operation safely in exception return code by checking this flag.
+ */
+static __kprobes int handle_stack_update(unsigned long ea, struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC32
+	/*
+	 * Check if we will touch kernel stack overflow
+	 */
+	if (ea - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) {
+		printk(KERN_CRIT "Can't kprobe this since kernel stack would overflow.\n");
+		return -EINVAL;
+	}
+#endif /* CONFIG_PPC32 */
+	/*
+	 * Check if we already set since that means we'll
+	 * lose the previous value.
+	 */
+	WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE));
+	set_thread_flag(TIF_EMULATE_STACK_STORE);
+	return 0;
+}
+
+static __kprobes void do_signext(unsigned long *valp, int size)
+{
+	switch (size) {
+	case 2:
+		*valp = (signed short) *valp;
+		break;
+	case 4:
+		*valp = (signed int) *valp;
+		break;
+	}
+}
+
+static __kprobes void do_byterev(unsigned long *valp, int size)
+{
+	switch (size) {
+	case 2:
+		*valp = byterev_2(*valp);
+		break;
+	case 4:
+		*valp = byterev_4(*valp);
+		break;
+#ifdef __powerpc64__
+	case 8:
+		*valp = byterev_8(*valp);
+		break;
+#endif
+	}
+}
+
+/*
+ * Emulate instructions that cause a transfer of control,
+ * loads and stores, and a few other instructions.
+ * Returns 1 if the step was emulated, 0 if not,
+ * or -1 if the instruction is one that should not be stepped,
+ * such as an rfid, or a mtmsrd that would clear MSR_RI.
+ */
+int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
+{
+	struct instruction_op op;
+	int r, err, size;
+	unsigned long val;
+	unsigned int cr;
+	int i, rd, nb;
+
+	r = analyse_instr(&op, regs, instr);
+	if (r != 0)
+		return r;
+
+	err = 0;
+	size = GETSIZE(op.type);
+	switch (op.type & INSTR_TYPE_MASK) {
+	case CACHEOP:
+		if (!address_ok(regs, op.ea, 8))
+			return 0;
+		switch (op.type & CACHEOP_MASK) {
+		case DCBST:
+			__cacheop_user_asmx(op.ea, err, "dcbst");
+			break;
+		case DCBF:
+			__cacheop_user_asmx(op.ea, err, "dcbf");
+			break;
+		case DCBTST:
+			if (op.reg == 0)
+				prefetchw((void *) op.ea);
+			break;
+		case DCBT:
+			if (op.reg == 0)
+				prefetch((void *) op.ea);
+			break;
+		case ICBI:
+			__cacheop_user_asmx(op.ea, err, "icbi");
+			break;
+		}
+		if (err)
+			return 0;
+		goto instr_done;
+
+	case LARX:
+		if (regs->msr & MSR_LE)
+			return 0;
+		if (op.ea & (size - 1))
+			break;		/* can't handle misaligned */
+		err = -EFAULT;
+		if (!address_ok(regs, op.ea, size))
+			goto ldst_done;
+		err = 0;
+		switch (size) {
+		case 4:
+			__get_user_asmx(val, op.ea, err, "lwarx");
+			break;
+		case 8:
+			__get_user_asmx(val, op.ea, err, "ldarx");
+			break;
+		default:
+			return 0;
+		}
+		if (!err)
+			regs->gpr[op.reg] = val;
+		goto ldst_done;
+
+	case STCX:
+		if (regs->msr & MSR_LE)
+			return 0;
+		if (op.ea & (size - 1))
+			break;		/* can't handle misaligned */
+		err = -EFAULT;
+		if (!address_ok(regs, op.ea, size))
+			goto ldst_done;
+		err = 0;
+		switch (size) {
+		case 4:
+			__put_user_asmx(op.val, op.ea, err, "stwcx.", cr);
+			break;
+		case 8:
+			__put_user_asmx(op.val, op.ea, err, "stdcx.", cr);
+			break;
+		default:
+			return 0;
+		}
+		if (!err)
+			regs->ccr = (regs->ccr & 0x0fffffff) |
+				(cr & 0xe0000000) |
+				((regs->xer >> 3) & 0x10000000);
+		goto ldst_done;
+
+	case LOAD:
+		if (regs->msr & MSR_LE)
+			return 0;
+		err = read_mem(&regs->gpr[op.reg], op.ea, size, regs);
+		if (!err) {
+			if (op.type & SIGNEXT)
+				do_signext(&regs->gpr[op.reg], size);
+			if (op.type & BYTEREV)
+				do_byterev(&regs->gpr[op.reg], size);
+		}
+		goto ldst_done;
+
+	case LOAD_FP:
+		if (regs->msr & MSR_LE)
+			return 0;
+		if (size == 4)
+			err = do_fp_load(op.reg, do_lfs, op.ea, size, regs);
+		else
+			err = do_fp_load(op.reg, do_lfd, op.ea, size, regs);
+		goto ldst_done;
+
+#ifdef CONFIG_ALTIVEC
+	case LOAD_VMX:
+		if (regs->msr & MSR_LE)
+			return 0;
+		err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs);
+		goto ldst_done;
+#endif
+#ifdef CONFIG_VSX
+	case LOAD_VSX:
+		if (regs->msr & MSR_LE)
+			return 0;
+		err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs);
+		goto ldst_done;
+#endif
+	case LOAD_MULTI:
+		if (regs->msr & MSR_LE)
+			return 0;
+		rd = op.reg;
+		for (i = 0; i < size; i += 4) {
+			nb = size - i;
+			if (nb > 4)
+				nb = 4;
+			err = read_mem(&regs->gpr[rd], op.ea, nb, regs);
+			if (err)
+				return 0;
+			if (nb < 4)	/* left-justify last bytes */
+				regs->gpr[rd] <<= 32 - 8 * nb;
+			op.ea += 4;
+			++rd;
+		}
+		goto instr_done;
+
+	case STORE:
+		if (regs->msr & MSR_LE)
+			return 0;
+		if ((op.type & UPDATE) && size == sizeof(long) &&
+		    op.reg == 1 && op.update_reg == 1 &&
+		    !(regs->msr & MSR_PR) &&
+		    op.ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) {
+			err = handle_stack_update(op.ea, regs);
+			goto ldst_done;
+		}
+		err = write_mem(op.val, op.ea, size, regs);
+		goto ldst_done;
+
+	case STORE_FP:
+		if (regs->msr & MSR_LE)
+			return 0;
+		if (size == 4)
+			err = do_fp_store(op.reg, do_stfs, op.ea, size, regs);
+		else
+			err = do_fp_store(op.reg, do_stfd, op.ea, size, regs);
+		goto ldst_done;
+
+#ifdef CONFIG_ALTIVEC
+	case STORE_VMX:
+		if (regs->msr & MSR_LE)
+			return 0;
+		err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs);
+		goto ldst_done;
+#endif
+#ifdef CONFIG_VSX
+	case STORE_VSX:
+		if (regs->msr & MSR_LE)
+			return 0;
+		err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs);
+		goto ldst_done;
+#endif
+	case STORE_MULTI:
+		if (regs->msr & MSR_LE)
+			return 0;
+		rd = op.reg;
+		for (i = 0; i < size; i += 4) {
+			val = regs->gpr[rd];
+			nb = size - i;
+			if (nb > 4)
+				nb = 4;
+			else
+				val >>= 32 - 8 * nb;
+			err = write_mem(val, op.ea, nb, regs);
+			if (err)
+				return 0;
+			op.ea += 4;
+			++rd;
+		}
+		goto instr_done;
+
+	case MFMSR:
+		regs->gpr[op.reg] = regs->msr & MSR_MASK;
+		goto instr_done;
+
+	case MTMSR:
+		val = regs->gpr[op.reg];
+		if ((val & MSR_RI) == 0)
+			/* can't step mtmsr[d] that would clear MSR_RI */
+			return -1;
+		/* here op.val is the mask of bits to change */
+		regs->msr = (regs->msr & ~op.val) | (val & op.val);
+		goto instr_done;
+
+#ifdef CONFIG_PPC64
+	case SYSCALL:	/* sc */
+		/*
+		 * N.B. this uses knowledge about how the syscall
+		 * entry code works.  If that is changed, this will
+		 * need to be changed also.
+		 */
+		if (regs->gpr[0] == 0x1ebe &&
+		    cpu_has_feature(CPU_FTR_REAL_LE)) {
+			regs->msr ^= MSR_LE;
+			goto instr_done;
+		}
+		regs->gpr[9] = regs->gpr[13];
+		regs->gpr[10] = MSR_KERNEL;
+		regs->gpr[11] = regs->nip + 4;
+		regs->gpr[12] = regs->msr & MSR_MASK;
+		regs->gpr[13] = (unsigned long) get_paca();
+		regs->nip = (unsigned long) &system_call_common;
+		regs->msr = MSR_KERNEL;
+		return 1;
+
+	case RFI:
+		return -1;
+#endif
+	}
+	return 0;
+
+ ldst_done:
+	if (err)
+		return 0;
+	if (op.type & UPDATE)
+		regs->gpr[op.update_reg] = op.ea;
+
+ instr_done:
+	regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
+	return 1;
 }

+ 1 - 0
arch/powerpc/mm/Makefile

@@ -34,3 +34,4 @@ obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o
 obj-$(CONFIG_PPC_SUBPAGE_PROT)	+= subpage-prot.o
 obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
 obj-$(CONFIG_HIGHMEM)		+= highmem.o
+obj-$(CONFIG_PPC_COPRO_BASE)	+= copro_fault.o

+ 62 - 7
arch/powerpc/platforms/cell/spu_fault.c → arch/powerpc/mm/copro_fault.c

@@ -1,5 +1,5 @@
 /*
- * SPU mm fault handler
+ * CoProcessor (SPU/AFU) mm fault handler
  *
  * (C) Copyright IBM Deutschland Entwicklung GmbH 2007
  *
@@ -23,16 +23,17 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/export.h>
-
+#include <asm/reg.h>
+#include <asm/copro.h>
 #include <asm/spu.h>
-#include <asm/spu_csa.h>
+#include <misc/cxl.h>
 
 /*
  * This ought to be kept in sync with the powerpc specific do_page_fault
  * function. Currently, there are a few corner cases that we haven't had
  * to handle fortunately.
  */
-int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
+int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
 		unsigned long dsisr, unsigned *flt)
 {
 	struct vm_area_struct *vma;
@@ -58,12 +59,12 @@ int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
 			goto out_unlock;
 	}
 
-	is_write = dsisr & MFC_DSISR_ACCESS_PUT;
+	is_write = dsisr & DSISR_ISSTORE;
 	if (is_write) {
 		if (!(vma->vm_flags & VM_WRITE))
 			goto out_unlock;
 	} else {
-		if (dsisr & MFC_DSISR_ACCESS_DENIED)
+		if (dsisr & DSISR_PROTFAULT)
 			goto out_unlock;
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 			goto out_unlock;
@@ -91,4 +92,58 @@ out_unlock:
 	up_read(&mm->mmap_sem);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(spu_handle_mm_fault);
+EXPORT_SYMBOL_GPL(copro_handle_mm_fault);
+
+int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
+{
+	u64 vsid;
+	int psize, ssize;
+
+	slb->esid = (ea & ESID_MASK) | SLB_ESID_V;
+
+	switch (REGION_ID(ea)) {
+	case USER_REGION_ID:
+		pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
+		psize = get_slice_psize(mm, ea);
+		ssize = user_segment_size(ea);
+		vsid = get_vsid(mm->context.id, ea, ssize);
+		break;
+	case VMALLOC_REGION_ID:
+		pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea);
+		if (ea < VMALLOC_END)
+			psize = mmu_vmalloc_psize;
+		else
+			psize = mmu_io_psize;
+		ssize = mmu_kernel_ssize;
+		vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+		break;
+	case KERNEL_REGION_ID:
+		pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea);
+		psize = mmu_linear_psize;
+		ssize = mmu_kernel_ssize;
+		vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+		break;
+	default:
+		pr_debug("%s: invalid region access at %016llx\n", __func__, ea);
+		return 1;
+	}
+
+	vsid = (vsid << slb_vsid_shift(ssize)) | SLB_VSID_USER;
+
+	vsid |= mmu_psize_defs[psize].sllp |
+		((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0);
+
+	slb->vsid = vsid;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(copro_calculate_slb);
+
+void copro_flush_all_slbs(struct mm_struct *mm)
+{
+#ifdef CONFIG_SPU_BASE
+	spu_flush_all_slbs(mm);
+#endif
+	cxl_slbia(mm);
+}
+EXPORT_SYMBOL_GPL(copro_flush_all_slbs);

+ 28 - 15
arch/powerpc/mm/fault.c

@@ -33,6 +33,7 @@
 #include <linux/magic.h>
 #include <linux/ratelimit.h>
 #include <linux/context_tracking.h>
+#include <linux/hugetlb.h>
 
 #include <asm/firmware.h>
 #include <asm/page.h>
@@ -114,22 +115,37 @@ static int store_updates_sp(struct pt_regs *regs)
 #define MM_FAULT_CONTINUE	-1
 #define MM_FAULT_ERR(sig)	(sig)
 
-static int do_sigbus(struct pt_regs *regs, unsigned long address)
+static int do_sigbus(struct pt_regs *regs, unsigned long address,
+		     unsigned int fault)
 {
 	siginfo_t info;
+	unsigned int lsb = 0;
 
 	up_read(&current->mm->mmap_sem);
 
-	if (user_mode(regs)) {
-		current->thread.trap_nr = BUS_ADRERR;
-		info.si_signo = SIGBUS;
-		info.si_errno = 0;
-		info.si_code = BUS_ADRERR;
-		info.si_addr = (void __user *)address;
-		force_sig_info(SIGBUS, &info, current);
-		return MM_FAULT_RETURN;
+	if (!user_mode(regs))
+		return MM_FAULT_ERR(SIGBUS);
+
+	current->thread.trap_nr = BUS_ADRERR;
+	info.si_signo = SIGBUS;
+	info.si_errno = 0;
+	info.si_code = BUS_ADRERR;
+	info.si_addr = (void __user *)address;
+#ifdef CONFIG_MEMORY_FAILURE
+	if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
+		pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
+			current->comm, current->pid, address);
+		info.si_code = BUS_MCEERR_AR;
 	}
-	return MM_FAULT_ERR(SIGBUS);
+
+	if (fault & VM_FAULT_HWPOISON_LARGE)
+		lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+	if (fault & VM_FAULT_HWPOISON)
+		lsb = PAGE_SHIFT;
+#endif
+	info.si_addr_lsb = lsb;
+	force_sig_info(SIGBUS, &info, current);
+	return MM_FAULT_RETURN;
 }
 
 static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
@@ -170,11 +186,8 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
 		return MM_FAULT_RETURN;
 	}
 
-	/* Bus error. x86 handles HWPOISON here, we'll add this if/when
-	 * we support the feature in HW
-	 */
-	if (fault & VM_FAULT_SIGBUS)
-		return do_sigbus(regs, addr);
+	if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE))
+		return do_sigbus(regs, addr, fault);
 
 	/* We don't understand the fault code, this is fatal */
 	BUG();

+ 5 - 1
arch/powerpc/mm/hash_native_64.c

@@ -29,6 +29,8 @@
 #include <asm/kexec.h>
 #include <asm/ppc-opcode.h>
 
+#include <misc/cxl.h>
+
 #ifdef DEBUG_LOW
 #define DBG_LOW(fmt...) udbg_printf(fmt)
 #else
@@ -149,9 +151,11 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
 static inline void tlbie(unsigned long vpn, int psize, int apsize,
 			 int ssize, int local)
 {
-	unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
+	unsigned int use_local;
 	int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
 
+	use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use();
+
 	if (use_local)
 		use_local = mmu_psize_defs[psize].tlbiel;
 	if (lock_tlbie && !use_local)

+ 84 - 76
arch/powerpc/mm/hash_utils_64.c

@@ -51,7 +51,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cputable.h>
 #include <asm/sections.h>
-#include <asm/spu.h>
+#include <asm/copro.h>
 #include <asm/udbg.h>
 #include <asm/code-patching.h>
 #include <asm/fadump.h>
@@ -92,12 +92,14 @@ extern unsigned long dart_tablebase;
 
 static unsigned long _SDR1;
 struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+EXPORT_SYMBOL_GPL(mmu_psize_defs);
 
 struct hash_pte *htab_address;
 unsigned long htab_size_bytes;
 unsigned long htab_hash_mask;
 EXPORT_SYMBOL_GPL(htab_hash_mask);
 int mmu_linear_psize = MMU_PAGE_4K;
+EXPORT_SYMBOL_GPL(mmu_linear_psize);
 int mmu_virtual_psize = MMU_PAGE_4K;
 int mmu_vmalloc_psize = MMU_PAGE_4K;
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
@@ -105,6 +107,7 @@ int mmu_vmemmap_psize = MMU_PAGE_4K;
 #endif
 int mmu_io_psize = MMU_PAGE_4K;
 int mmu_kernel_ssize = MMU_SEGSIZE_256M;
+EXPORT_SYMBOL_GPL(mmu_kernel_ssize);
 int mmu_highuser_ssize = MMU_SEGSIZE_256M;
 u16 mmu_slb_size = 64;
 EXPORT_SYMBOL_GPL(mmu_slb_size);
@@ -333,70 +336,69 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 		return 0;
 
 	prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size);
-	if (prop != NULL) {
-		pr_info("Page sizes from device-tree:\n");
-		size /= 4;
-		cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
-		while(size > 0) {
-			unsigned int base_shift = be32_to_cpu(prop[0]);
-			unsigned int slbenc = be32_to_cpu(prop[1]);
-			unsigned int lpnum = be32_to_cpu(prop[2]);
-			struct mmu_psize_def *def;
-			int idx, base_idx;
-
-			size -= 3; prop += 3;
-			base_idx = get_idx_from_shift(base_shift);
-			if (base_idx < 0) {
-				/*
-				 * skip the pte encoding also
-				 */
-				prop += lpnum * 2; size -= lpnum * 2;
+	if (!prop)
+		return 0;
+
+	pr_info("Page sizes from device-tree:\n");
+	size /= 4;
+	cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
+	while(size > 0) {
+		unsigned int base_shift = be32_to_cpu(prop[0]);
+		unsigned int slbenc = be32_to_cpu(prop[1]);
+		unsigned int lpnum = be32_to_cpu(prop[2]);
+		struct mmu_psize_def *def;
+		int idx, base_idx;
+
+		size -= 3; prop += 3;
+		base_idx = get_idx_from_shift(base_shift);
+		if (base_idx < 0) {
+			/* skip the pte encoding also */
+			prop += lpnum * 2; size -= lpnum * 2;
+			continue;
+		}
+		def = &mmu_psize_defs[base_idx];
+		if (base_idx == MMU_PAGE_16M)
+			cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
+
+		def->shift = base_shift;
+		if (base_shift <= 23)
+			def->avpnm = 0;
+		else
+			def->avpnm = (1 << (base_shift - 23)) - 1;
+		def->sllp = slbenc;
+		/*
+		 * We don't know for sure what's up with tlbiel, so
+		 * for now we only set it for 4K and 64K pages
+		 */
+		if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K)
+			def->tlbiel = 1;
+		else
+			def->tlbiel = 0;
+
+		while (size > 0 && lpnum) {
+			unsigned int shift = be32_to_cpu(prop[0]);
+			int penc  = be32_to_cpu(prop[1]);
+
+			prop += 2; size -= 2;
+			lpnum--;
+
+			idx = get_idx_from_shift(shift);
+			if (idx < 0)
 				continue;
-			}
-			def = &mmu_psize_defs[base_idx];
-			if (base_idx == MMU_PAGE_16M)
-				cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
-
-			def->shift = base_shift;
-			if (base_shift <= 23)
-				def->avpnm = 0;
-			else
-				def->avpnm = (1 << (base_shift - 23)) - 1;
-			def->sllp = slbenc;
-			/*
-			 * We don't know for sure what's up with tlbiel, so
-			 * for now we only set it for 4K and 64K pages
-			 */
-			if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K)
-				def->tlbiel = 1;
-			else
-				def->tlbiel = 0;
-
-			while (size > 0 && lpnum) {
-				unsigned int shift = be32_to_cpu(prop[0]);
-				int penc  = be32_to_cpu(prop[1]);
-
-				prop += 2; size -= 2;
-				lpnum--;
-
-				idx = get_idx_from_shift(shift);
-				if (idx < 0)
-					continue;
-
-				if (penc == -1)
-					pr_err("Invalid penc for base_shift=%d "
-					       "shift=%d\n", base_shift, shift);
-
-				def->penc[idx] = penc;
-				pr_info("base_shift=%d: shift=%d, sllp=0x%04lx,"
-					" avpnm=0x%08lx, tlbiel=%d, penc=%d\n",
-					base_shift, shift, def->sllp,
-					def->avpnm, def->tlbiel, def->penc[idx]);
-			}
+
+			if (penc == -1)
+				pr_err("Invalid penc for base_shift=%d "
+				       "shift=%d\n", base_shift, shift);
+
+			def->penc[idx] = penc;
+			pr_info("base_shift=%d: shift=%d, sllp=0x%04lx,"
+				" avpnm=0x%08lx, tlbiel=%d, penc=%d\n",
+				base_shift, shift, def->sllp,
+				def->avpnm, def->tlbiel, def->penc[idx]);
 		}
-		return 1;
 	}
-	return 0;
+
+	return 1;
 }
 
 #ifdef CONFIG_HUGETLB_PAGE
@@ -867,7 +869,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
 }
 
 #ifdef CONFIG_PPC_MM_SLICES
-unsigned int get_paca_psize(unsigned long addr)
+static unsigned int get_paca_psize(unsigned long addr)
 {
 	u64 lpsizes;
 	unsigned char *hpsizes;
@@ -901,10 +903,8 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
 	if (get_slice_psize(mm, addr) == MMU_PAGE_4K)
 		return;
 	slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
-#ifdef CONFIG_SPU_BASE
-	spu_flush_all_slbs(mm);
-#endif
-	if (get_paca_psize(addr) != MMU_PAGE_4K) {
+	copro_flush_all_slbs(mm);
+	if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
 		get_paca()->context = mm->context;
 		slb_flush_and_rebolt();
 	}
@@ -989,12 +989,11 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
  * -1 - critical hash insertion error
  * -2 - access not permitted by subpage protection mechanism
  */
-int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
+int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap)
 {
 	enum ctx_state prev_state = exception_enter();
 	pgd_t *pgdir;
 	unsigned long vsid;
-	struct mm_struct *mm;
 	pte_t *ptep;
 	unsigned hugeshift;
 	const struct cpumask *tmp;
@@ -1008,7 +1007,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
  	switch (REGION_ID(ea)) {
 	case USER_REGION_ID:
 		user_region = 1;
-		mm = current->mm;
 		if (! mm) {
 			DBG_LOW(" user region with no mm !\n");
 			rc = 1;
@@ -1019,7 +1017,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 		vsid = get_vsid(mm->context.id, ea, ssize);
 		break;
 	case VMALLOC_REGION_ID:
-		mm = &init_mm;
 		vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
 		if (ea < VMALLOC_END)
 			psize = mmu_vmalloc_psize;
@@ -1104,7 +1101,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 			WARN_ON(1);
 		}
 #endif
-		check_paca_psize(ea, mm, psize, user_region);
+		if (current->mm == mm)
+			check_paca_psize(ea, mm, psize, user_region);
 
 		goto bail;
 	}
@@ -1141,13 +1139,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
 			       "to 4kB pages because of "
 			       "non-cacheable mapping\n");
 			psize = mmu_vmalloc_psize = MMU_PAGE_4K;
-#ifdef CONFIG_SPU_BASE
-			spu_flush_all_slbs(mm);
-#endif
+			copro_flush_all_slbs(mm);
 		}
 	}
 
-	check_paca_psize(ea, mm, psize, user_region);
+	if (current->mm == mm)
+		check_paca_psize(ea, mm, psize, user_region);
 #endif /* CONFIG_PPC_64K_PAGES */
 
 #ifdef CONFIG_PPC_HAS_HASH_64K
@@ -1182,6 +1179,17 @@ bail:
 	exception_exit(prev_state);
 	return rc;
 }
+EXPORT_SYMBOL_GPL(hash_page_mm);
+
+int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
+{
+	struct mm_struct *mm = current->mm;
+
+	if (REGION_ID(ea) == VMALLOC_REGION_ID)
+		mm = &init_mm;
+
+	return hash_page_mm(mm, ea, access, trap);
+}
 EXPORT_SYMBOL_GPL(hash_page);
 
 void hash_preload(struct mm_struct *mm, unsigned long ea,

+ 2 - 2
arch/powerpc/mm/init_32.c

@@ -106,11 +106,11 @@ unsigned long __max_low_memory = MAX_LOW_MEM;
 void MMU_setup(void)
 {
 	/* Check for nobats option (used in mapin_ram). */
-	if (strstr(cmd_line, "nobats")) {
+	if (strstr(boot_command_line, "nobats")) {
 		__map_without_bats = 1;
 	}
 
-	if (strstr(cmd_line, "noltlbs")) {
+	if (strstr(boot_command_line, "noltlbs")) {
 		__map_without_ltlbs = 1;
 	}
 #ifdef CONFIG_DEBUG_PAGEALLOC

+ 0 - 3
arch/powerpc/mm/init_64.c

@@ -233,9 +233,6 @@ static void __meminit vmemmap_create_mapping(unsigned long start,
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-extern int htab_remove_mapping(unsigned long vstart, unsigned long vend,
-			int psize, int ssize);
-
 static void vmemmap_remove_mapping(unsigned long start,
 				   unsigned long page_size)
 {

+ 62 - 6
arch/powerpc/mm/mem.c

@@ -260,6 +260,60 @@ static int __init mark_nonram_nosave(void)
 	}
 	return 0;
 }
+#else /* CONFIG_NEED_MULTIPLE_NODES */
+static int __init mark_nonram_nosave(void)
+{
+	return 0;
+}
+#endif
+
+static bool zone_limits_final;
+
+static unsigned long max_zone_pfns[MAX_NR_ZONES] = {
+	[0 ... MAX_NR_ZONES - 1] = ~0UL
+};
+
+/*
+ * Restrict the specified zone and all more restrictive zones
+ * to be below the specified pfn.  May not be called after
+ * paging_init().
+ */
+void __init limit_zone_pfn(enum zone_type zone, unsigned long pfn_limit)
+{
+	int i;
+
+	if (WARN_ON(zone_limits_final))
+		return;
+
+	for (i = zone; i >= 0; i--) {
+		if (max_zone_pfns[i] > pfn_limit)
+			max_zone_pfns[i] = pfn_limit;
+	}
+}
+
+/*
+ * Find the least restrictive zone that is entirely below the
+ * specified pfn limit.  Returns < 0 if no suitable zone is found.
+ *
+ * pfn_limit must be u64 because it can exceed 32 bits even on 32-bit
+ * systems -- the DMA limit can be higher than any possible real pfn.
+ */
+int dma_pfn_limit_to_zone(u64 pfn_limit)
+{
+	enum zone_type top_zone = ZONE_NORMAL;
+	int i;
+
+#ifdef CONFIG_HIGHMEM
+	top_zone = ZONE_HIGHMEM;
+#endif
+
+	for (i = top_zone; i >= 0; i--) {
+		if (max_zone_pfns[i] <= pfn_limit)
+			return i;
+	}
+
+	return -EPERM;
+}
 
 /*
  * paging_init() sets up the page tables - in fact we've already done this.
@@ -268,7 +322,7 @@ void __init paging_init(void)
 {
 	unsigned long long total_ram = memblock_phys_mem_size();
 	phys_addr_t top_of_ram = memblock_end_of_DRAM();
-	unsigned long max_zone_pfns[MAX_NR_ZONES];
+	enum zone_type top_zone;
 
 #ifdef CONFIG_PPC32
 	unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1);
@@ -290,18 +344,20 @@ void __init paging_init(void)
 	       (unsigned long long)top_of_ram, total_ram);
 	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
 	       (long int)((top_of_ram - total_ram) >> 20));
-	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+
 #ifdef CONFIG_HIGHMEM
-	max_zone_pfns[ZONE_DMA] = lowmem_end_addr >> PAGE_SHIFT;
-	max_zone_pfns[ZONE_HIGHMEM] = top_of_ram >> PAGE_SHIFT;
+	top_zone = ZONE_HIGHMEM;
+	limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT);
 #else
-	max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
+	top_zone = ZONE_NORMAL;
 #endif
+
+	limit_zone_pfn(top_zone, top_of_ram >> PAGE_SHIFT);
+	zone_limits_final = true;
 	free_area_init_nodes(max_zone_pfns);
 
 	mark_nonram_nosave();
 }
-#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
 
 static void __init register_page_bootmem_info(void)
 {

+ 10 - 17
arch/powerpc/mm/numa.c

@@ -538,7 +538,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
  */
 static int numa_setup_cpu(unsigned long lcpu)
 {
-	int nid;
+	int nid = -1;
 	struct device_node *cpu;
 
 	/*
@@ -555,19 +555,21 @@ static int numa_setup_cpu(unsigned long lcpu)
 
 	if (!cpu) {
 		WARN_ON(1);
-		nid = 0;
-		goto out;
+		if (cpu_present(lcpu))
+			goto out_present;
+		else
+			goto out;
 	}
 
 	nid = of_node_to_nid_single(cpu);
 
+out_present:
 	if (nid < 0 || !node_online(nid))
 		nid = first_online_node;
-out:
-	map_cpu_to_node(lcpu, nid);
 
+	map_cpu_to_node(lcpu, nid);
 	of_node_put(cpu);
-
+out:
 	return nid;
 }
 
@@ -1127,20 +1129,11 @@ void __init do_init_bootmem(void)
 	 * even before we online them, so that we can use cpu_to_{node,mem}
 	 * early in boot, cf. smp_prepare_cpus().
 	 */
-	for_each_possible_cpu(cpu) {
-		cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
-				  (void *)(unsigned long)cpu);
+	for_each_present_cpu(cpu) {
+		numa_setup_cpu((unsigned long)cpu);
 	}
 }
 
-void __init paging_init(void)
-{
-	unsigned long max_zone_pfns[MAX_NR_ZONES];
-	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-	max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT;
-	free_area_init_nodes(max_zone_pfns);
-}
-
 static int __init early_numa(char *p)
 {
 	if (!p)

+ 1 - 1
arch/powerpc/mm/pgtable.c

@@ -48,7 +48,7 @@ static inline int pte_looks_normal(pte_t pte)
 	    (_PAGE_PRESENT | _PAGE_USER);
 }
 
-struct page * maybe_pte_to_page(pte_t pte)
+static struct page *maybe_pte_to_page(pte_t pte)
 {
 	unsigned long pfn = pte_pfn(pte);
 	struct page *page;

Some files were not shown because too many files changed in this diff