Эх сурвалжийг харах

Merge remote-tracking branch 'origin/drm-intel-next-queued' into drm-intel-next-queued

Pull in patches Jani applied while I was on vacation.

Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Daniel Vetter 10 жил өмнө
parent
commit
5e1249ff36
100 өөрчлөгдсөн 2642 нэмэгдсэн , 247 устгасан
  1. 1 1
      Documentation/ABI/testing/sysfs-devices-system-cpu
  2. 12 0
      Documentation/DocBook/drm.tmpl
  3. 3 1
      Documentation/devicetree/bindings/clock/silabs,si5351.txt
  4. 0 18
      Documentation/devicetree/bindings/drm/tilcdc/slave.txt
  5. 27 0
      Documentation/devicetree/bindings/drm/tilcdc/tilcdc.txt
  6. 2 1
      Documentation/devicetree/bindings/net/cdns-emac.txt
  7. 1 1
      Documentation/hwmon/tmp401
  8. 7 26
      Documentation/target/tcmu-design.txt
  9. 14 4
      Documentation/virtual/kvm/mmu.txt
  10. 18 7
      MAINTAINERS
  11. 1 1
      Makefile
  12. 10 6
      arch/alpha/boot/Makefile
  13. 0 1
      arch/alpha/boot/main.c
  14. 306 0
      arch/alpha/boot/stdio.c
  15. 3 0
      arch/alpha/boot/tools/objstrip.c
  16. 0 1
      arch/alpha/include/asm/types.h
  17. 1 1
      arch/alpha/include/asm/unistd.h
  18. 3 0
      arch/alpha/include/uapi/asm/unistd.h
  19. 0 1
      arch/alpha/kernel/err_ev6.c
  20. 0 1
      arch/alpha/kernel/irq.c
  21. 1 2
      arch/alpha/kernel/osf_sys.c
  22. 3 4
      arch/alpha/kernel/process.c
  23. 1 7
      arch/alpha/kernel/smp.c
  24. 1 2
      arch/alpha/kernel/srmcons.c
  25. 1 1
      arch/alpha/kernel/sys_marvel.c
  26. 3 0
      arch/alpha/kernel/systbls.S
  27. 0 1
      arch/alpha/kernel/traps.c
  28. 0 1
      arch/alpha/oprofile/op_model_ev4.c
  29. 0 1
      arch/alpha/oprofile/op_model_ev5.c
  30. 0 1
      arch/alpha/oprofile/op_model_ev6.c
  31. 0 1
      arch/alpha/oprofile/op_model_ev67.c
  32. 1 1
      arch/arm/boot/dts/Makefile
  33. 0 4
      arch/arm/boot/dts/am335x-boneblack.dts
  34. 1 1
      arch/arm/boot/dts/am335x-evmsk.dts
  35. 1 1
      arch/arm/boot/dts/exynos4412-trats2.dts
  36. 1 1
      arch/arm/boot/dts/imx27.dtsi
  37. 2 0
      arch/arm/boot/dts/omap3-devkit8000.dts
  38. 2 2
      arch/arm/boot/dts/zynq-7000.dtsi
  39. 1 1
      arch/arm/configs/multi_v7_defconfig
  40. 3 1
      arch/arm/kernel/entry-common.S
  41. 5 4
      arch/arm/kernel/perf_event_cpu.c
  42. 13 3
      arch/arm/mach-imx/gpc.c
  43. 1 1
      arch/arm/mach-pxa/pxa_cplds_irqs.c
  44. 10 10
      arch/arm/mm/mmu.c
  45. 1 0
      arch/arm/xen/enlighten.c
  46. 10 3
      arch/ia64/pci/pci.c
  47. 3 0
      arch/mips/ath79/prom.c
  48. 1 1
      arch/mips/configs/fuloong2e_defconfig
  49. 1 1
      arch/mips/kernel/irq.c
  50. 1 1
      arch/mips/kernel/smp-bmips.c
  51. 13 2
      arch/mips/lib/strnlen_user.S
  52. 2 2
      arch/powerpc/kernel/mce.c
  53. 1 0
      arch/powerpc/kernel/vmlinux.lds.S
  54. 3 2
      arch/powerpc/kvm/book3s_hv.c
  55. 16 9
      arch/powerpc/mm/hugetlbpage.c
  56. 11 0
      arch/powerpc/mm/pgtable_64.c
  57. 13 12
      arch/s390/crypto/ghash_s390.c
  58. 1 1
      arch/s390/crypto/prng.c
  59. 1 1
      arch/s390/include/asm/pgtable.h
  60. 10 9
      arch/s390/net/bpf_jit_comp.c
  61. 3 0
      arch/x86/include/asm/kvm_host.h
  62. 1 0
      arch/x86/include/uapi/asm/msr-index.h
  63. 5 2
      arch/x86/kernel/cpu/mcheck/mce.c
  64. 15 0
      arch/x86/kernel/i387.c
  65. 4 0
      arch/x86/kvm/cpuid.c
  66. 8 0
      arch/x86/kvm/cpuid.h
  67. 12 4
      arch/x86/kvm/mmu.c
  68. 2 2
      arch/x86/kvm/mmu.h
  69. 7 0
      arch/x86/kvm/paging_tmpl.h
  70. 1 0
      arch/x86/kvm/svm.c
  71. 1 0
      arch/x86/kvm/vmx.c
  72. 19 7
      arch/x86/kvm/x86.c
  73. 6 1
      arch/x86/net/bpf_jit_comp.c
  74. 10 3
      arch/x86/pci/acpi.c
  75. 13 0
      arch/xtensa/include/asm/dma-mapping.h
  76. 3 2
      block/blk-core.c
  77. 0 9
      crypto/Kconfig
  78. 4 5
      crypto/algif_aead.c
  79. 2 1
      drivers/block/nvme-scsi.c
  80. 4 0
      drivers/bluetooth/ath3k.c
  81. 3 0
      drivers/bluetooth/btusb.c
  82. 2 2
      drivers/bus/mips_cdmm.c
  83. 45 18
      drivers/clk/clk-si5351.c
  84. 8 0
      drivers/clk/clk.c
  85. 2 2
      drivers/clk/qcom/gcc-msm8916.c
  86. 1 1
      drivers/clk/samsung/Makefile
  87. 1 0
      drivers/clk/samsung/clk-exynos5420.c
  88. 6 6
      drivers/clk/samsung/clk-exynos5433.c
  89. 1 1
      drivers/gpio/gpio-kempld.c
  90. 6 4
      drivers/gpio/gpiolib.c
  91. 2 0
      drivers/gpu/drm/Kconfig
  92. 2 1
      drivers/gpu/drm/Makefile
  93. 2 1
      drivers/gpu/drm/amd/amdkfd/Makefile
  94. 308 0
      drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
  95. 886 0
      drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
  96. 193 0
      drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
  97. 168 0
      drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
  98. 294 0
      drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
  99. 5 0
      drivers/gpu/drm/amd/amdkfd/kfd_device.c
  100. 39 9
      drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

+ 1 - 1
Documentation/ABI/testing/sysfs-devices-system-cpu

@@ -162,7 +162,7 @@ Description:	Discover CPUs in the same CPU frequency coordination domain
 What:		/sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1}
 Date:		August 2008
 KernelVersion:	2.6.27
-Contact:	discuss@x86-64.org
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:	Disable L3 cache indices
 
 		These files exist in every CPU's cache/index3 directory. Each

+ 12 - 0
Documentation/DocBook/drm.tmpl

@@ -2439,6 +2439,18 @@ void intel_crt_init(struct drm_device *dev)
 	  <title>Tile group</title>
 !Pdrivers/gpu/drm/drm_crtc.c Tile group
     </sect2>
+    <sect2>
+	<title>Bridges</title>
+      <sect3>
+	 <title>Overview</title>
+!Pdrivers/gpu/drm/drm_bridge.c overview
+      </sect3>
+      <sect3>
+	 <title>Default bridge callback sequence</title>
+!Pdrivers/gpu/drm/drm_bridge.c bridge callbacks
+      </sect3>
+!Edrivers/gpu/drm/drm_bridge.c
+    </sect2>
   </sect1>
 
   <!-- Internals: kms properties -->

+ 3 - 1
Documentation/devicetree/bindings/clock/silabs,si5351.txt

@@ -17,7 +17,8 @@ Required properties:
 - #clock-cells: from common clock binding; shall be set to 1.
 - clocks: from common clock binding; list of parent clock
   handles, shall be xtal reference clock or xtal and clkin for
-  si5351c only.
+  si5351c only. Corresponding clock input names are "xtal" and
+  "clkin" respectively.
 - #address-cells: shall be set to 1.
 - #size-cells: shall be set to 0.
 
@@ -71,6 +72,7 @@ i2c-master-node {
 
 		/* connect xtal input to 25MHz reference */
 		clocks = <&ref25>;
+		clock-names = "xtal";
 
 		/* connect xtal input as source of pll0 and pll1 */
 		silabs,pll-source = <0 0>, <1 0>;

+ 0 - 18
Documentation/devicetree/bindings/drm/tilcdc/slave.txt

@@ -1,18 +0,0 @@
-Device-Tree bindings for tilcdc DRM encoder slave output driver
-
-Required properties:
- - compatible: value should be "ti,tilcdc,slave".
- - i2c: the phandle for the i2c device the encoder slave is connected to
-
-Recommended properties:
- - pinctrl-names, pinctrl-0: the pincontrol settings to configure
-   muxing properly for pins that connect to TFP410 device
-
-Example:
-
-	hdmi {
-		compatible = "ti,tilcdc,slave";
-		i2c = <&i2c0>;
-		pinctrl-names = "default";
-		pinctrl-0 = <&nxp_hdmi_bonelt_pins>;
-	};

+ 27 - 0
Documentation/devicetree/bindings/drm/tilcdc/tilcdc.txt

@@ -18,6 +18,12 @@ Optional properties:
  - max-pixelclock: The maximum pixel clock that can be supported
    by the lcd controller in KHz.
 
+Optional nodes:
+
+ - port/ports: to describe a connection to an external encoder. The
+   binding follows Documentation/devicetree/bindings/graph.txt and
+   suppors a single port with a single endpoint.
+
 Example:
 
 	fb: fb@4830e000 {
@@ -26,4 +32,25 @@ Example:
 		interrupt-parent = <&intc>;
 		interrupts = <36>;
 		ti,hwmods = "lcdc";
+
+		port {
+			lcdc_0: endpoint@0 {
+				remote-endpoint = <&hdmi_0>;
+			};
+		};
+	};
+
+	tda19988: tda19988 {
+		compatible = "nxp,tda998x";
+		reg = <0x70>;
+
+		pinctrl-names = "default", "off";
+		pinctrl-0 = <&nxp_hdmi_bonelt_pins>;
+		pinctrl-1 = <&nxp_hdmi_bonelt_off_pins>;
+
+		port {
+			hdmi_0: endpoint@0 {
+				remote-endpoint = <&lcdc_0>;
+			};
+		};
 	};

+ 2 - 1
Documentation/devicetree/bindings/net/cdns-emac.txt

@@ -3,7 +3,8 @@
 Required properties:
 - compatible: Should be "cdns,[<chip>-]{emac}"
   Use "cdns,at91rm9200-emac" Atmel at91rm9200 SoC.
-  or the generic form: "cdns,emac".
+  Use "cdns,zynq-gem" Xilinx Zynq-7xxx SoC.
+  Or the generic form: "cdns,emac".
 - reg: Address and length of the register set for the device
 - interrupts: Should contain macb interrupt
 - phy-mode: see ethernet.txt file in the same directory.

+ 1 - 1
Documentation/hwmon/tmp401

@@ -20,7 +20,7 @@ Supported chips:
     Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp432.html
   * Texas Instruments TMP435
     Prefix: 'tmp435'
-    Addresses scanned: I2C 0x37, 0x48 - 0x4f
+    Addresses scanned: I2C 0x48 - 0x4f
     Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp435.html
 
 Authors:

+ 7 - 26
Documentation/target/tcmu-design.txt

@@ -15,8 +15,7 @@ Contents:
   a) Discovering and configuring TCMU uio devices
   b) Waiting for events on the device(s)
   c) Managing the command ring
-3) Command filtering and pass_level
-4) A final note
+3) A final note
 
 
 TCM Userspace Design
@@ -324,7 +323,7 @@ int handle_device_events(int fd, void *map)
   /* Process events from cmd ring until we catch up with cmd_head */
   while (ent != (void *)mb + mb->cmdr_off + mb->cmd_head) {
 
-    if (tcmu_hdr_get_op(&ent->hdr) == TCMU_OP_CMD) {
+    if (tcmu_hdr_get_op(ent->hdr.len_op) == TCMU_OP_CMD) {
       uint8_t *cdb = (void *)mb + ent->req.cdb_off;
       bool success = true;
 
@@ -339,8 +338,12 @@ int handle_device_events(int fd, void *map)
         ent->rsp.scsi_status = SCSI_CHECK_CONDITION;
       }
     }
+    else if (tcmu_hdr_get_op(ent->hdr.len_op) != TCMU_OP_PAD) {
+      /* Tell the kernel we didn't handle unknown opcodes */
+      ent->hdr.uflags |= TCMU_UFLAG_UNKNOWN_OP;
+    }
     else {
-      /* Do nothing for PAD entries */
+      /* Do nothing for PAD entries except update cmd_tail */
     }
 
     /* update cmd_tail */
@@ -360,28 +363,6 @@ int handle_device_events(int fd, void *map)
 }
 
 
-Command filtering and pass_level
---------------------------------
-
-TCMU supports a "pass_level" option with valid values of 0 or 1.  When
-the value is 0 (the default), nearly all SCSI commands received for
-the device are passed through to the handler. This allows maximum
-flexibility but increases the amount of code required by the handler,
-to support all mandatory SCSI commands. If pass_level is set to 1,
-then only IO-related commands are presented, and the rest are handled
-by LIO's in-kernel command emulation. The commands presented at level
-1 include all versions of:
-
-READ
-WRITE
-WRITE_VERIFY
-XDWRITEREAD
-WRITE_SAME
-COMPARE_AND_WRITE
-SYNCHRONIZE_CACHE
-UNMAP
-
-
 A final note
 ------------
 

+ 14 - 4
Documentation/virtual/kvm/mmu.txt

@@ -169,6 +169,10 @@ Shadow pages contain the following information:
     Contains the value of cr4.smep && !cr0.wp for which the page is valid
     (pages for which this is true are different from other pages; see the
     treatment of cr0.wp=0 below).
+  role.smap_andnot_wp:
+    Contains the value of cr4.smap && !cr0.wp for which the page is valid
+    (pages for which this is true are different from other pages; see the
+    treatment of cr0.wp=0 below).
   gfn:
     Either the guest page table containing the translations shadowed by this
     page, or the base page frame for linear translations.  See role.direct.
@@ -344,10 +348,16 @@ on fault type:
 
 (user write faults generate a #PF)
 
-In the first case there is an additional complication if CR4.SMEP is
-enabled: since we've turned the page into a kernel page, the kernel may now
-execute it.  We handle this by also setting spte.nx.  If we get a user
-fetch or read fault, we'll change spte.u=1 and spte.nx=gpte.nx back.
+In the first case there are two additional complications:
+- if CR4.SMEP is enabled: since we've turned the page into a kernel page,
+  the kernel may now execute it.  We handle this by also setting spte.nx.
+  If we get a user fetch or read fault, we'll change spte.u=1 and
+  spte.nx=gpte.nx back.
+- if CR4.SMAP is disabled: since the page has been changed to a kernel
+  page, it can not be reused when CR4.SMAP is enabled. We set
+  CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note,
+  here we do not care the case that CR4.SMAP is enabled since KVM will
+  directly inject #PF to guest due to failed permission check.
 
 To prevent an spte that was converted into a kernel page with cr0.wp=0
 from being written by the kernel after cr0.wp has changed to 1, we make

+ 18 - 7
MAINTAINERS

@@ -2427,7 +2427,6 @@ L:	linux-security-module@vger.kernel.org
 S:	Supported
 F:	include/linux/capability.h
 F:	include/uapi/linux/capability.h
-F:	security/capability.c
 F:	security/commoncap.c
 F:	kernel/capability.c
 
@@ -3825,10 +3824,11 @@ M:	David Woodhouse <dwmw2@infradead.org>
 L:	linux-embedded@vger.kernel.org
 S:	Maintained
 
-EMULEX LPFC FC SCSI DRIVER
-M:	James Smart <james.smart@emulex.com>
+EMULEX/AVAGO LPFC FC/FCOE SCSI DRIVER
+M:	James Smart <james.smart@avagotech.com>
+M:	Dick Kennedy <dick.kennedy@avagotech.com>
 L:	linux-scsi@vger.kernel.org
-W:	http://sourceforge.net/projects/lpfcxxxx
+W:	http://www.avagotech.com
 S:	Supported
 F:	drivers/scsi/lpfc/
 
@@ -4536,7 +4536,7 @@ M:	Jean Delvare <jdelvare@suse.de>
 M:	Guenter Roeck <linux@roeck-us.net>
 L:	lm-sensors@lm-sensors.org
 W:	http://www.lm-sensors.org/
-T:	quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-hwmon/
+T:	quilt http://jdelvare.nerim.net/devel/linux/jdelvare-hwmon/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git
 S:	Maintained
 F:	Documentation/hwmon/
@@ -8829,9 +8829,11 @@ F:	drivers/misc/phantom.c
 F:	include/uapi/linux/phantom.h
 
 SERVER ENGINES 10Gbps iSCSI - BladeEngine 2 DRIVER
-M:	Jayamohan Kallickal <jayamohan.kallickal@emulex.com>
+M:	Jayamohan Kallickal <jayamohan.kallickal@avagotech.com>
+M:	Minh Tran <minh.tran@avagotech.com>
+M:	John Soni Jose <sony.john-n@avagotech.com>
 L:	linux-scsi@vger.kernel.org
-W:	http://www.emulex.com
+W:	http://www.avagotech.com
 S:	Supported
 F:	drivers/scsi/be2iscsi/
 
@@ -10569,6 +10571,15 @@ F:	drivers/block/virtio_blk.c
 F:	include/linux/virtio_*.h
 F:	include/uapi/linux/virtio_*.h
 
+VIRTIO GPU DRIVER
+M:	David Airlie <airlied@linux.ie>
+M:	Gerd Hoffmann <kraxel@redhat.com>
+L:	dri-devel@lists.freedesktop.org
+L:	virtualization@lists.linux-foundation.org
+S:	Maintained
+F:	drivers/gpu/drm/virtio/
+F:	include/uapi/linux/virtio_gpu.h
+
 VIRTIO HOST (VHOST)
 M:	"Michael S. Tsirkin" <mst@redhat.com>
 L:	kvm@vger.kernel.org

+ 1 - 1
Makefile

@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 1
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc6
 NAME = Hurr durr I'ma sheep
 
 # *DOCUMENTATION*

+ 10 - 6
arch/alpha/boot/Makefile

@@ -14,6 +14,9 @@ targets		:= vmlinux.gz vmlinux \
 		   tools/bootpzh bootloader bootpheader bootpzheader 
 OBJSTRIP	:= $(obj)/tools/objstrip
 
+HOSTCFLAGS	:= -Wall -I$(objtree)/usr/include
+BOOTCFLAGS	+= -I$(obj) -I$(srctree)/$(obj)
+
 # SRM bootable image.  Copy to offset 512 of a partition.
 $(obj)/bootimage: $(addprefix $(obj)/tools/,mkbb lxboot bootlx) $(obj)/vmlinux.nh
 	( cat $(obj)/tools/lxboot $(obj)/tools/bootlx $(obj)/vmlinux.nh ) > $@ 
@@ -96,13 +99,14 @@ $(obj)/tools/bootph: $(obj)/bootpheader $(OBJSTRIP) FORCE
 $(obj)/tools/bootpzh: $(obj)/bootpzheader $(OBJSTRIP) FORCE
 	$(call if_changed,objstrip)
 
-LDFLAGS_bootloader   := -static -uvsprintf -T  #-N -relax
-LDFLAGS_bootpheader  := -static -uvsprintf -T  #-N -relax
-LDFLAGS_bootpzheader := -static -uvsprintf -T  #-N -relax
+LDFLAGS_bootloader   := -static -T # -N -relax
+LDFLAGS_bootloader   := -static -T # -N -relax
+LDFLAGS_bootpheader  := -static -T # -N -relax
+LDFLAGS_bootpzheader := -static -T # -N -relax
 
-OBJ_bootlx   := $(obj)/head.o $(obj)/main.o
-OBJ_bootph   := $(obj)/head.o $(obj)/bootp.o
-OBJ_bootpzh  := $(obj)/head.o $(obj)/bootpz.o $(obj)/misc.o
+OBJ_bootlx   := $(obj)/head.o $(obj)/stdio.o $(obj)/main.o
+OBJ_bootph   := $(obj)/head.o $(obj)/stdio.o $(obj)/bootp.o
+OBJ_bootpzh  := $(obj)/head.o $(obj)/stdio.o $(obj)/bootpz.o $(obj)/misc.o
 
 $(obj)/bootloader: $(obj)/bootloader.lds $(OBJ_bootlx) $(LIBS_Y) FORCE
 	$(call if_changed,ld)

+ 0 - 1
arch/alpha/boot/main.c

@@ -19,7 +19,6 @@
 
 #include "ksize.h"
 
-extern int vsprintf(char *, const char *, va_list);
 extern unsigned long switch_to_osf_pal(unsigned long nr,
 	struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa,
 	unsigned long *vptb);

+ 306 - 0
arch/alpha/boot/stdio.c

@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+
+size_t strnlen(const char * s, size_t count)
+{
+	const char *sc;
+
+	for (sc = s; count-- && *sc != '\0'; ++sc)
+		/* nothing */;
+	return sc - s;
+}
+
+# define do_div(n, base) ({						\
+	unsigned int __base = (base);					\
+	unsigned int __rem;						\
+	__rem = ((unsigned long long)(n)) % __base;			\
+	(n) = ((unsigned long long)(n)) / __base;			\
+	__rem;								\
+})
+
+
+static int skip_atoi(const char **s)
+{
+	int i, c;
+
+	for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s)
+		i = i*10 + c - '0';
+	return i;
+}
+
+#define ZEROPAD	1		/* pad with zero */
+#define SIGN	2		/* unsigned/signed long */
+#define PLUS	4		/* show plus */
+#define SPACE	8		/* space if plus */
+#define LEFT	16		/* left justified */
+#define SPECIAL	32		/* 0x */
+#define LARGE	64		/* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * str, unsigned long long num, int base, int size, int precision, int type)
+{
+	char c,sign,tmp[66];
+	const char *digits="0123456789abcdefghijklmnopqrstuvwxyz";
+	int i;
+
+	if (type & LARGE)
+		digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+	if (type & LEFT)
+		type &= ~ZEROPAD;
+	if (base < 2 || base > 36)
+		return 0;
+	c = (type & ZEROPAD) ? '0' : ' ';
+	sign = 0;
+	if (type & SIGN) {
+		if ((signed long long)num < 0) {
+			sign = '-';
+			num = - (signed long long)num;
+			size--;
+		} else if (type & PLUS) {
+			sign = '+';
+			size--;
+		} else if (type & SPACE) {
+			sign = ' ';
+			size--;
+		}
+	}
+	if (type & SPECIAL) {
+		if (base == 16)
+			size -= 2;
+		else if (base == 8)
+			size--;
+	}
+	i = 0;
+	if (num == 0)
+		tmp[i++]='0';
+	else while (num != 0) {
+		tmp[i++] = digits[do_div(num, base)];
+	}
+	if (i > precision)
+		precision = i;
+	size -= precision;
+	if (!(type&(ZEROPAD+LEFT)))
+		while(size-->0)
+			*str++ = ' ';
+	if (sign)
+		*str++ = sign;
+	if (type & SPECIAL) {
+		if (base==8)
+			*str++ = '0';
+		else if (base==16) {
+			*str++ = '0';
+			*str++ = digits[33];
+		}
+	}
+	if (!(type & LEFT))
+		while (size-- > 0)
+			*str++ = c;
+	while (i < precision--)
+		*str++ = '0';
+	while (i-- > 0)
+		*str++ = tmp[i];
+	while (size-- > 0)
+		*str++ = ' ';
+	return str;
+}
+
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+	int len;
+	unsigned long long num;
+	int i, base;
+	char * str;
+	const char *s;
+
+	int flags;		/* flags to number() */
+
+	int field_width;	/* width of output field */
+	int precision;		/* min. # of digits for integers; max
+				   number of chars for from string */
+	int qualifier;		/* 'h', 'l', or 'L' for integer fields */
+	                        /* 'z' support added 23/7/1999 S.H.    */
+				/* 'z' changed to 'Z' --davidm 1/25/99 */
+
+
+	for (str=buf ; *fmt ; ++fmt) {
+		if (*fmt != '%') {
+			*str++ = *fmt;
+			continue;
+		}
+
+		/* process flags */
+		flags = 0;
+		repeat:
+			++fmt;		/* this also skips first '%' */
+			switch (*fmt) {
+				case '-': flags |= LEFT; goto repeat;
+				case '+': flags |= PLUS; goto repeat;
+				case ' ': flags |= SPACE; goto repeat;
+				case '#': flags |= SPECIAL; goto repeat;
+				case '0': flags |= ZEROPAD; goto repeat;
+				}
+
+		/* get field width */
+		field_width = -1;
+		if ('0' <= *fmt && *fmt <= '9')
+			field_width = skip_atoi(&fmt);
+		else if (*fmt == '*') {
+			++fmt;
+			/* it's the next argument */
+			field_width = va_arg(args, int);
+			if (field_width < 0) {
+				field_width = -field_width;
+				flags |= LEFT;
+			}
+		}
+
+		/* get the precision */
+		precision = -1;
+		if (*fmt == '.') {
+			++fmt;
+			if ('0' <= *fmt && *fmt <= '9')
+				precision = skip_atoi(&fmt);
+			else if (*fmt == '*') {
+				++fmt;
+				/* it's the next argument */
+				precision = va_arg(args, int);
+			}
+			if (precision < 0)
+				precision = 0;
+		}
+
+		/* get the conversion qualifier */
+		qualifier = -1;
+		if (*fmt == 'l' && *(fmt + 1) == 'l') {
+			qualifier = 'q';
+			fmt += 2;
+		} else if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L'
+			|| *fmt == 'Z') {
+			qualifier = *fmt;
+			++fmt;
+		}
+
+		/* default base */
+		base = 10;
+
+		switch (*fmt) {
+		case 'c':
+			if (!(flags & LEFT))
+				while (--field_width > 0)
+					*str++ = ' ';
+			*str++ = (unsigned char) va_arg(args, int);
+			while (--field_width > 0)
+				*str++ = ' ';
+			continue;
+
+		case 's':
+			s = va_arg(args, char *);
+			if (!s)
+				s = "<NULL>";
+
+			len = strnlen(s, precision);
+
+			if (!(flags & LEFT))
+				while (len < field_width--)
+					*str++ = ' ';
+			for (i = 0; i < len; ++i)
+				*str++ = *s++;
+			while (len < field_width--)
+				*str++ = ' ';
+			continue;
+
+		case 'p':
+			if (field_width == -1) {
+				field_width = 2*sizeof(void *);
+				flags |= ZEROPAD;
+			}
+			str = number(str,
+				(unsigned long) va_arg(args, void *), 16,
+				field_width, precision, flags);
+			continue;
+
+
+		case 'n':
+			if (qualifier == 'l') {
+				long * ip = va_arg(args, long *);
+				*ip = (str - buf);
+			} else if (qualifier == 'Z') {
+				size_t * ip = va_arg(args, size_t *);
+				*ip = (str - buf);
+			} else {
+				int * ip = va_arg(args, int *);
+				*ip = (str - buf);
+			}
+			continue;
+
+		case '%':
+			*str++ = '%';
+			continue;
+
+		/* integer number formats - set up the flags and "break" */
+		case 'o':
+			base = 8;
+			break;
+
+		case 'X':
+			flags |= LARGE;
+		case 'x':
+			base = 16;
+			break;
+
+		case 'd':
+		case 'i':
+			flags |= SIGN;
+		case 'u':
+			break;
+
+		default:
+			*str++ = '%';
+			if (*fmt)
+				*str++ = *fmt;
+			else
+				--fmt;
+			continue;
+		}
+		if (qualifier == 'l') {
+			num = va_arg(args, unsigned long);
+			if (flags & SIGN)
+				num = (signed long) num;
+		} else if (qualifier == 'q') {
+			num = va_arg(args, unsigned long long);
+			if (flags & SIGN)
+				num = (signed long long) num;
+		} else if (qualifier == 'Z') {
+			num = va_arg(args, size_t);
+		} else if (qualifier == 'h') {
+			num = (unsigned short) va_arg(args, int);
+			if (flags & SIGN)
+				num = (signed short) num;
+		} else {
+			num = va_arg(args, unsigned int);
+			if (flags & SIGN)
+				num = (signed int) num;
+		}
+		str = number(str, num, base, field_width, precision, flags);
+	}
+	*str = '\0';
+	return str-buf;
+}
+
+int sprintf(char * buf, const char *fmt, ...)
+{
+	va_list args;
+	int i;
+
+	va_start(args, fmt);
+	i=vsprintf(buf,fmt,args);
+	va_end(args);
+	return i;
+}

+ 3 - 0
arch/alpha/boot/tools/objstrip.c

@@ -27,6 +27,9 @@
 #include <linux/param.h>
 #ifdef __ELF__
 # include <linux/elf.h>
+# define elfhdr elf64_hdr
+# define elf_phdr elf64_phdr
+# define elf_check_arch(x) ((x)->e_machine == EM_ALPHA)
 #endif
 
 /* bootfile size must be multiple of BLOCK_SIZE: */

+ 0 - 1
arch/alpha/include/asm/types.h

@@ -2,6 +2,5 @@
 #define _ALPHA_TYPES_H
 
 #include <asm-generic/int-ll64.h>
-#include <uapi/asm/types.h>
 
 #endif /* _ALPHA_TYPES_H */

+ 1 - 1
arch/alpha/include/asm/unistd.h

@@ -3,7 +3,7 @@
 
 #include <uapi/asm/unistd.h>
 
-#define NR_SYSCALLS			511
+#define NR_SYSCALLS			514
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_STAT64

+ 3 - 0
arch/alpha/include/uapi/asm/unistd.h

@@ -472,5 +472,8 @@
 #define __NR_sched_setattr		508
 #define __NR_sched_getattr		509
 #define __NR_renameat2			510
+#define __NR_getrandom			511
+#define __NR_memfd_create		512
+#define __NR_execveat			513
 
 #endif /* _UAPI_ALPHA_UNISTD_H */

+ 0 - 1
arch/alpha/kernel/err_ev6.c

@@ -6,7 +6,6 @@
  *	Error handling code supporting Alpha systems
  */
 
-#include <linux/init.h>
 #include <linux/sched.h>
 
 #include <asm/io.h>

+ 0 - 1
arch/alpha/kernel/irq.c

@@ -19,7 +19,6 @@
 #include <linux/ptrace.h>
 #include <linux/interrupt.h>
 #include <linux/random.h>
-#include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>

+ 1 - 2
arch/alpha/kernel/osf_sys.c

@@ -1019,14 +1019,13 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv,
  	if (tv) {
 		if (get_tv32((struct timeval *)&kts, tv))
 			return -EFAULT;
+		kts.tv_nsec *= 1000;
 	}
 	if (tz) {
 		if (copy_from_user(&ktz, tz, sizeof(*tz)))
 			return -EFAULT;
 	}
 
-	kts.tv_nsec *= 1000;
-
 	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
 }
 

+ 3 - 4
arch/alpha/kernel/process.c

@@ -236,12 +236,11 @@ release_thread(struct task_struct *dead_task)
 }
 
 /*
- * Copy an alpha thread..
+ * Copy architecture-specific thread state
  */
-
 int
 copy_thread(unsigned long clone_flags, unsigned long usp,
-	    unsigned long arg,
+	    unsigned long kthread_arg,
 	    struct task_struct *p)
 {
 	extern void ret_from_fork(void);
@@ -262,7 +261,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 			sizeof(struct switch_stack) + sizeof(struct pt_regs));
 		childstack->r26 = (unsigned long) ret_from_kernel_thread;
 		childstack->r9 = usp;	/* function */
-		childstack->r10 = arg;
+		childstack->r10 = kthread_arg;
 		childregs->hae = alpha_mv.hae_cache,
 		childti->pcb.usp = 0;
 		return 0;

+ 1 - 7
arch/alpha/kernel/smp.c

@@ -63,7 +63,6 @@ static struct {
 enum ipi_message_type {
 	IPI_RESCHEDULE,
 	IPI_CALL_FUNC,
-	IPI_CALL_FUNC_SINGLE,
 	IPI_CPU_STOP,
 };
 
@@ -506,7 +505,6 @@ setup_profiling_timer(unsigned int multiplier)
 	return -EINVAL;
 }
 
-
 static void
 send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
 {
@@ -552,10 +550,6 @@ handle_ipi(struct pt_regs *regs)
 			generic_smp_call_function_interrupt();
 			break;
 
-		case IPI_CALL_FUNC_SINGLE:
-			generic_smp_call_function_single_interrupt();
-			break;
-
 		case IPI_CPU_STOP:
 			halt();
 
@@ -606,7 +600,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-	send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+	send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
 }
 
 static void

+ 1 - 2
arch/alpha/kernel/srmcons.c

@@ -237,8 +237,7 @@ srmcons_init(void)
 
 	return -ENODEV;
 }
-
-module_init(srmcons_init);
+device_initcall(srmcons_init);
 
 
 /*

+ 1 - 1
arch/alpha/kernel/sys_marvel.c

@@ -331,7 +331,7 @@ marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin)
 	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline);
 	irq = intline;
 
-	msi_loc = pci_find_capability(dev, PCI_CAP_ID_MSI);
+	msi_loc = dev->msi_cap;
 	msg_ctl = 0;
 	if (msi_loc) 
 		pci_read_config_word(dev, msi_loc + PCI_MSI_FLAGS, &msg_ctl);

+ 3 - 0
arch/alpha/kernel/systbls.S

@@ -529,6 +529,9 @@ sys_call_table:
 	.quad sys_sched_setattr
 	.quad sys_sched_getattr
 	.quad sys_renameat2			/* 510 */
+	.quad sys_getrandom
+	.quad sys_memfd_create
+	.quad sys_execveat
 
 	.size sys_call_table, . - sys_call_table
 	.type sys_call_table, @object

+ 0 - 1
arch/alpha/kernel/traps.c

@@ -14,7 +14,6 @@
 #include <linux/tty.h>
 #include <linux/delay.h>
 #include <linux/module.h>
-#include <linux/init.h>
 #include <linux/kallsyms.h>
 #include <linux/ratelimit.h>
 

+ 0 - 1
arch/alpha/oprofile/op_model_ev4.c

@@ -8,7 +8,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 

+ 0 - 1
arch/alpha/oprofile/op_model_ev5.c

@@ -8,7 +8,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 

+ 0 - 1
arch/alpha/oprofile/op_model_ev6.c

@@ -8,7 +8,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 

+ 0 - 1
arch/alpha/oprofile/op_model_ev67.c

@@ -9,7 +9,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 

+ 1 - 1
arch/arm/boot/dts/Makefile

@@ -223,7 +223,7 @@ dtb-$(CONFIG_SOC_IMX25) += \
 	imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dtb \
 	imx25-karo-tx25.dtb \
 	imx25-pdk.dtb
-dtb-$(CONFIG_SOC_IMX31) += \
+dtb-$(CONFIG_SOC_IMX27) += \
 	imx27-apf27.dtb \
 	imx27-apf27dev.dtb \
 	imx27-eukrea-mbimxsd27-baseboard.dtb \

+ 0 - 4
arch/arm/boot/dts/am335x-boneblack.dts

@@ -80,7 +80,3 @@
 		status = "okay";
 	};
 };
-
-&rtc {
-	system-power-controller;
-};

+ 1 - 1
arch/arm/boot/dts/am335x-evmsk.dts

@@ -654,7 +654,7 @@
 	wlcore: wlcore@2 {
 		compatible = "ti,wl1271";
 		reg = <2>;
-		interrupt-parent = <&gpio1>;
+		interrupt-parent = <&gpio0>;
 		interrupts = <31 IRQ_TYPE_LEVEL_HIGH>; /* gpio 31 */
 		ref-clock-frequency = <38400000>;
 	};

+ 1 - 1
arch/arm/boot/dts/exynos4412-trats2.dts

@@ -736,7 +736,7 @@
 
 			display-timings {
 				timing-0 {
-					clock-frequency = <0>;
+					clock-frequency = <57153600>;
 					hactive = <720>;
 					vactive = <1280>;
 					hfront-porch = <5>;

+ 1 - 1
arch/arm/boot/dts/imx27.dtsi

@@ -533,7 +533,7 @@
 
 			fec: ethernet@1002b000 {
 				compatible = "fsl,imx27-fec";
-				reg = <0x1002b000 0x4000>;
+				reg = <0x1002b000 0x1000>;
 				interrupts = <50>;
 				clocks = <&clks IMX27_CLK_FEC_IPG_GATE>,
 					 <&clks IMX27_CLK_FEC_AHB_GATE>;

+ 2 - 0
arch/arm/boot/dts/omap3-devkit8000.dts

@@ -110,6 +110,8 @@
 	nand@0,0 {
 		reg = <0 0 4>; /* CS0, offset 0, IO size 4 */
 		nand-bus-width = <16>;
+		gpmc,device-width = <2>;
+		ti,nand-ecc-opt = "sw";
 
 		gpmc,sync-clk-ps = <0>;
 		gpmc,cs-on-ns = <0>;

+ 2 - 2
arch/arm/boot/dts/zynq-7000.dtsi

@@ -193,7 +193,7 @@
 		};
 
 		gem0: ethernet@e000b000 {
-			compatible = "cdns,gem";
+			compatible = "cdns,zynq-gem";
 			reg = <0xe000b000 0x1000>;
 			status = "disabled";
 			interrupts = <0 22 4>;
@@ -204,7 +204,7 @@
 		};
 
 		gem1: ethernet@e000c000 {
-			compatible = "cdns,gem";
+			compatible = "cdns,zynq-gem";
 			reg = <0xe000c000 0x1000>;
 			status = "disabled";
 			interrupts = <0 45 4>;

+ 1 - 1
arch/arm/configs/multi_v7_defconfig

@@ -429,7 +429,7 @@ CONFIG_USB_EHCI_EXYNOS=y
 CONFIG_USB_EHCI_TEGRA=y
 CONFIG_USB_EHCI_HCD_STI=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
-CONFIG_USB_ISP1760_HCD=y
+CONFIG_USB_ISP1760=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_STI=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y

+ 3 - 1
arch/arm/kernel/entry-common.S

@@ -33,7 +33,9 @@ ret_fast_syscall:
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)
 	disable_irq				@ disable interrupts
-	ldr	r1, [tsk, #TI_FLAGS]
+	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
+	tst	r1, #_TIF_SYSCALL_WORK
+	bne	__sys_trace_return
 	tst	r1, #_TIF_WORK_MASK
 	bne	fast_work_pending
 	asm_trace_hardirqs_on

+ 5 - 4
arch/arm/kernel/perf_event_cpu.c

@@ -304,16 +304,17 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 static int of_pmu_irq_cfg(struct platform_device *pdev)
 {
 	int i, irq;
-	int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
-
-	if (!irqs)
-		return -ENOMEM;
+	int *irqs;
 
 	/* Don't bother with PPIs; they're already affine */
 	irq = platform_get_irq(pdev, 0);
 	if (irq >= 0 && irq_is_percpu(irq))
 		return 0;
 
+	irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+	if (!irqs)
+		return -ENOMEM;
+
 	for (i = 0; i < pdev->num_resources; ++i) {
 		struct device_node *dn;
 		int cpu;

+ 13 - 3
arch/arm/mach-imx/gpc.c

@@ -280,9 +280,15 @@ void __init imx_gpc_check_dt(void)
 	struct device_node *np;
 
 	np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-gpc");
-	if (WARN_ON(!np ||
-		    !of_find_property(np, "interrupt-controller", NULL)))
-		pr_warn("Outdated DT detected, system is about to crash!!!\n");
+	if (WARN_ON(!np))
+		return;
+
+	if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) {
+		pr_warn("Outdated DT detected, suspend/resume will NOT work\n");
+
+		/* map GPC, so that at least CPUidle and WARs keep working */
+		gpc_base = of_iomap(np, 0);
+	}
 }
 
 #ifdef CONFIG_PM_GENERIC_DOMAINS
@@ -443,6 +449,10 @@ static int imx_gpc_probe(struct platform_device *pdev)
 	struct regulator *pu_reg;
 	int ret;
 
+	/* bail out if DT too old and doesn't provide the necessary info */
+	if (!of_property_read_bool(pdev->dev.of_node, "#power-domain-cells"))
+		return 0;
+
 	pu_reg = devm_regulator_get_optional(&pdev->dev, "pu");
 	if (PTR_ERR(pu_reg) == -ENODEV)
 		pu_reg = NULL;

+ 1 - 1
arch/arm/mach-pxa/pxa_cplds_irqs.c

@@ -107,7 +107,7 @@ static int cplds_probe(struct platform_device *pdev)
 	struct resource *res;
 	struct cplds *fpga;
 	int ret;
-	unsigned int base_irq = 0;
+	int base_irq;
 	unsigned long irqflags = 0;
 
 	fpga = devm_kzalloc(&pdev->dev, sizeof(*fpga), GFP_KERNEL);

+ 10 - 10
arch/arm/mm/mmu.c

@@ -1112,22 +1112,22 @@ void __init sanity_check_meminfo(void)
 			}
 
 			/*
-			 * Find the first non-section-aligned page, and point
+			 * Find the first non-pmd-aligned page, and point
 			 * memblock_limit at it. This relies on rounding the
-			 * limit down to be section-aligned, which happens at
-			 * the end of this function.
+			 * limit down to be pmd-aligned, which happens at the
+			 * end of this function.
 			 *
 			 * With this algorithm, the start or end of almost any
-			 * bank can be non-section-aligned. The only exception
-			 * is that the start of the bank 0 must be section-
+			 * bank can be non-pmd-aligned. The only exception is
+			 * that the start of the bank 0 must be section-
 			 * aligned, since otherwise memory would need to be
 			 * allocated when mapping the start of bank 0, which
 			 * occurs before any free memory is mapped.
 			 */
 			if (!memblock_limit) {
-				if (!IS_ALIGNED(block_start, SECTION_SIZE))
+				if (!IS_ALIGNED(block_start, PMD_SIZE))
 					memblock_limit = block_start;
-				else if (!IS_ALIGNED(block_end, SECTION_SIZE))
+				else if (!IS_ALIGNED(block_end, PMD_SIZE))
 					memblock_limit = arm_lowmem_limit;
 			}
 
@@ -1137,12 +1137,12 @@ void __init sanity_check_meminfo(void)
 	high_memory = __va(arm_lowmem_limit - 1) + 1;
 
 	/*
-	 * Round the memblock limit down to a section size.  This
+	 * Round the memblock limit down to a pmd size.  This
 	 * helps to ensure that we will allocate memory from the
-	 * last full section, which should be mapped.
+	 * last full pmd, which should be mapped.
 	 */
 	if (memblock_limit)
-		memblock_limit = round_down(memblock_limit, SECTION_SIZE);
+		memblock_limit = round_down(memblock_limit, PMD_SIZE);
 	if (!memblock_limit)
 		memblock_limit = arm_lowmem_limit;
 

+ 1 - 0
arch/arm/xen/enlighten.c

@@ -272,6 +272,7 @@ void xen_arch_pre_suspend(void) { }
 void xen_arch_post_suspend(int suspend_cancelled) { }
 void xen_timer_resume(void) { }
 void xen_arch_resume(void) { }
+void xen_arch_suspend(void) { }
 
 
 /* In the hypervisor.S file. */

+ 10 - 3
arch/ia64/pci/pci.c

@@ -478,9 +478,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 
 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
-	struct pci_controller *controller = bridge->bus->sysdata;
-
-	ACPI_COMPANION_SET(&bridge->dev, controller->companion);
+	/*
+	 * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
+	 * here, pci_create_root_bus() has been called by someone else and
+	 * sysdata is likely to be different from what we expect.  Let it go in
+	 * that case.
+	 */
+	if (!bridge->dev.parent) {
+		struct pci_controller *controller = bridge->bus->sysdata;
+		ACPI_COMPANION_SET(&bridge->dev, controller->companion);
+	}
 	return 0;
 }
 

+ 3 - 0
arch/mips/ath79/prom.c

@@ -1,6 +1,7 @@
 /*
  *  Atheros AR71XX/AR724X/AR913X specific prom routines
  *
+ *  Copyright (C) 2015 Laurent Fasnacht <l@libres.ch>
  *  Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org>
  *  Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
  *
@@ -25,12 +26,14 @@ void __init prom_init(void)
 {
 	fw_init_cmdline();
 
+#ifdef CONFIG_BLK_DEV_INITRD
 	/* Read the initrd address from the firmware environment */
 	initrd_start = fw_getenvl("initrd_start");
 	if (initrd_start) {
 		initrd_start = KSEG0ADDR(initrd_start);
 		initrd_end = initrd_start + fw_getenvl("initrd_size");
 	}
+#endif
 }
 
 void __init prom_free_prom_memory(void)

+ 1 - 1
arch/mips/configs/fuloong2e_defconfig

@@ -194,7 +194,7 @@ CONFIG_USB_WUSB_CBAF=m
 CONFIG_USB_C67X00_HCD=m
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_ISP1760_HCD=m
+CONFIG_USB_ISP1760=m
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=m
 CONFIG_USB_R8A66597_HCD=m

+ 1 - 1
arch/mips/kernel/irq.c

@@ -29,7 +29,7 @@
 int kgdb_early_setup;
 #endif
 
-static unsigned long irq_map[NR_IRQS / BITS_PER_LONG];
+static DECLARE_BITMAP(irq_map, NR_IRQS);
 
 int allocate_irqno(void)
 {

+ 1 - 1
arch/mips/kernel/smp-bmips.c

@@ -444,7 +444,7 @@ struct plat_smp_ops bmips5000_smp_ops = {
 static void bmips_wr_vec(unsigned long dst, char *start, char *end)
 {
 	memcpy((void *)dst, start, end - start);
-	dma_cache_wback((unsigned long)start, end - start);
+	dma_cache_wback(dst, end - start);
 	local_flush_icache_range(dst, dst + (end - start));
 	instruction_hazard();
 }

+ 13 - 2
arch/mips/lib/strnlen_user.S

@@ -34,7 +34,12 @@ LEAF(__strnlen_\func\()_asm)
 FEXPORT(__strnlen_\func\()_nocheck_asm)
 	move		v0, a0
 	PTR_ADDU	a1, a0			# stop pointer
-1:	beq		v0, a1, 1f		# limit reached?
+1:
+#ifdef CONFIG_CPU_DADDI_WORKAROUNDS
+	.set		noat
+	li		AT, 1
+#endif
+	beq		v0, a1, 1f		# limit reached?
 .ifeqs "\func", "kernel"
 	EX(lb, t0, (v0), .Lfault\@)
 .else
@@ -42,7 +47,13 @@ FEXPORT(__strnlen_\func\()_nocheck_asm)
 .endif
 	.set		noreorder
 	bnez		t0, 1b
-1:	 PTR_ADDIU	v0, 1
+1:
+#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
+	 PTR_ADDIU	v0, 1
+#else
+	 PTR_ADDU	v0, AT
+	.set		at
+#endif
 	.set		reorder
 	PTR_SUBU	v0, a0
 	jr		ra

+ 2 - 2
arch/powerpc/kernel/mce.c

@@ -73,7 +73,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
 		    uint64_t nip, uint64_t addr)
 {
 	uint64_t srr1;
-	int index = __this_cpu_inc_return(mce_nest_count);
+	int index = __this_cpu_inc_return(mce_nest_count) - 1;
 	struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
 
 	/*
@@ -184,7 +184,7 @@ void machine_check_queue_event(void)
 	if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
 		return;
 
-	index = __this_cpu_inc_return(mce_queue_count);
+	index = __this_cpu_inc_return(mce_queue_count) - 1;
 	/* If queue is full, just return for now. */
 	if (index >= MAX_MC_EVT) {
 		__this_cpu_dec(mce_queue_count);

+ 1 - 0
arch/powerpc/kernel/vmlinux.lds.S

@@ -213,6 +213,7 @@ SECTIONS
 		*(.opd)
 	}
 
+	. = ALIGN(256);
 	.got : AT(ADDR(.got) - LOAD_OFFSET) {
 		__toc_start = .;
 #ifndef CONFIG_RELOCATABLE

+ 3 - 2
arch/powerpc/kvm/book3s_hv.c

@@ -1952,7 +1952,7 @@ static void post_guest_process(struct kvmppc_vcore *vc)
  */
 static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 {
-	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu *vcpu, *vnext;
 	int i;
 	int srcu_idx;
 
@@ -1982,7 +1982,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 	 */
 	if ((threads_per_core > 1) &&
 	    ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
-		list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+		list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+					 arch.run_list) {
 			vcpu->arch.ret = -EBUSY;
 			kvmppc_remove_runnable(vc, vcpu);
 			wake_up(&vcpu->arch.cpu_run);

+ 16 - 9
arch/powerpc/mm/hugetlbpage.c

@@ -689,27 +689,34 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 struct page *
 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
 {
-	pte_t *ptep;
-	struct page *page;
+	pte_t *ptep, pte;
 	unsigned shift;
 	unsigned long mask, flags;
+	struct page *page = ERR_PTR(-EINVAL);
+
+	local_irq_save(flags);
+	ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
+	if (!ptep)
+		goto no_page;
+	pte = READ_ONCE(*ptep);
 	/*
+	 * Verify it is a huge page else bail.
 	 * Transparent hugepages are handled by generic code. We can skip them
 	 * here.
 	 */
-	local_irq_save(flags);
-	ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
+	if (!shift || pmd_trans_huge(__pmd(pte_val(pte))))
+		goto no_page;
 
-	/* Verify it is a huge page else bail. */
-	if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) {
-		local_irq_restore(flags);
-		return ERR_PTR(-EINVAL);
+	if (!pte_present(pte)) {
+		page = NULL;
+		goto no_page;
 	}
 	mask = (1UL << shift) - 1;
-	page = pte_page(*ptep);
+	page = pte_page(pte);
 	if (page)
 		page += (address & mask) / PAGE_SIZE;
 
+no_page:
 	local_irq_restore(flags);
 	return page;
 }

+ 11 - 0
arch/powerpc/mm/pgtable_64.c

@@ -839,6 +839,17 @@ pmd_t pmdp_get_and_clear(struct mm_struct *mm,
 	 * hash fault look at them.
 	 */
 	memset(pgtable, 0, PTE_FRAG_SIZE);
+	/*
+	 * Serialize against find_linux_pte_or_hugepte which does lock-less
+	 * lookup in page tables with local interrupts disabled. For huge pages
+	 * it casts pmd_t to pte_t. Since format of pte_t is different from
+	 * pmd_t we want to prevent transit from pmd pointing to page table
+	 * to pmd pointing to huge page (and back) while interrupts are disabled.
+	 * We clear pmd to possibly replace it with page table pointer in
+	 * different code paths. So make sure we wait for the parallel
+	 * find_linux_pte_or_hugepage to finish.
+	 */
+	kick_all_cpus_sync();
 	return old_pmd;
 }
 

+ 13 - 12
arch/s390/crypto/ghash_s390.c

@@ -16,11 +16,12 @@
 #define GHASH_DIGEST_SIZE	16
 
 struct ghash_ctx {
-	u8 icv[16];
-	u8 key[16];
+	u8 key[GHASH_BLOCK_SIZE];
 };
 
 struct ghash_desc_ctx {
+	u8 icv[GHASH_BLOCK_SIZE];
+	u8 key[GHASH_BLOCK_SIZE];
 	u8 buffer[GHASH_BLOCK_SIZE];
 	u32 bytes;
 };
@@ -28,8 +29,10 @@ struct ghash_desc_ctx {
 static int ghash_init(struct shash_desc *desc)
 {
 	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
 
 	memset(dctx, 0, sizeof(*dctx));
+	memcpy(dctx->key, ctx->key, GHASH_BLOCK_SIZE);
 
 	return 0;
 }
@@ -45,7 +48,6 @@ static int ghash_setkey(struct crypto_shash *tfm,
 	}
 
 	memcpy(ctx->key, key, GHASH_BLOCK_SIZE);
-	memset(ctx->icv, 0, GHASH_BLOCK_SIZE);
 
 	return 0;
 }
@@ -54,7 +56,6 @@ static int ghash_update(struct shash_desc *desc,
 			 const u8 *src, unsigned int srclen)
 {
 	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
 	unsigned int n;
 	u8 *buf = dctx->buffer;
 	int ret;
@@ -70,7 +71,7 @@ static int ghash_update(struct shash_desc *desc,
 		src += n;
 
 		if (!dctx->bytes) {
-			ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf,
+			ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf,
 					      GHASH_BLOCK_SIZE);
 			if (ret != GHASH_BLOCK_SIZE)
 				return -EIO;
@@ -79,7 +80,7 @@ static int ghash_update(struct shash_desc *desc,
 
 	n = srclen & ~(GHASH_BLOCK_SIZE - 1);
 	if (n) {
-		ret = crypt_s390_kimd(KIMD_GHASH, ctx, src, n);
+		ret = crypt_s390_kimd(KIMD_GHASH, dctx, src, n);
 		if (ret != n)
 			return -EIO;
 		src += n;
@@ -94,7 +95,7 @@ static int ghash_update(struct shash_desc *desc,
 	return 0;
 }
 
-static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
+static int ghash_flush(struct ghash_desc_ctx *dctx)
 {
 	u8 *buf = dctx->buffer;
 	int ret;
@@ -104,24 +105,24 @@ static int ghash_flush(struct ghash_ctx *ctx, struct ghash_desc_ctx *dctx)
 
 		memset(pos, 0, dctx->bytes);
 
-		ret = crypt_s390_kimd(KIMD_GHASH, ctx, buf, GHASH_BLOCK_SIZE);
+		ret = crypt_s390_kimd(KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
 		if (ret != GHASH_BLOCK_SIZE)
 			return -EIO;
+
+		dctx->bytes = 0;
 	}
 
-	dctx->bytes = 0;
 	return 0;
 }
 
 static int ghash_final(struct shash_desc *desc, u8 *dst)
 {
 	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-	struct ghash_ctx *ctx = crypto_shash_ctx(desc->tfm);
 	int ret;
 
-	ret = ghash_flush(ctx, dctx);
+	ret = ghash_flush(dctx);
 	if (!ret)
-		memcpy(dst, ctx->icv, GHASH_BLOCK_SIZE);
+		memcpy(dst, dctx->icv, GHASH_BLOCK_SIZE);
 	return ret;
 }
 

+ 1 - 1
arch/s390/crypto/prng.c

@@ -125,7 +125,7 @@ static int generate_entropy(u8 *ebuf, size_t nbytes)
 		/* fill page with urandom bytes */
 		get_random_bytes(pg, PAGE_SIZE);
 		/* exor page with stckf values */
-		for (n = 0; n < sizeof(PAGE_SIZE/sizeof(u64)); n++) {
+		for (n = 0; n < PAGE_SIZE / sizeof(u64); n++) {
 			u64 *p = ((u64 *)pg) + n;
 			*p ^= get_tod_clock_fast();
 		}

+ 1 - 1
arch/s390/include/asm/pgtable.h

@@ -494,7 +494,7 @@ static inline int pmd_large(pmd_t pmd)
 	return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
 }
 
-static inline int pmd_pfn(pmd_t pmd)
+static inline unsigned long pmd_pfn(pmd_t pmd)
 {
 	unsigned long origin_mask;
 

+ 10 - 9
arch/s390/net/bpf_jit_comp.c

@@ -443,8 +443,11 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
 
 /*
  * Compile one eBPF instruction into s390x code
+ *
+ * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
+ * stack space for the large switch statement.
  */
-static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
+static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
 {
 	struct bpf_insn *insn = &fp->insnsi[i];
 	int jmp_off, last, insn_count = 1;
@@ -588,8 +591,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
 		EMIT4(0xb9160000, dst_reg, rc_reg);
 		break;
 	}
-	case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / (u32) src */
-	case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % (u32) src */
+	case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
+	case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
 	{
 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
 
@@ -602,10 +605,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
 		EMIT4_IMM(0xa7090000, REG_W0, 0);
 		/* lgr %w1,%dst */
 		EMIT4(0xb9040000, REG_W1, dst_reg);
-		/* llgfr %dst,%src (u32 cast) */
-		EMIT4(0xb9160000, dst_reg, src_reg);
 		/* dlgr %w0,%dst */
-		EMIT4(0xb9870000, REG_W0, dst_reg);
+		EMIT4(0xb9870000, REG_W0, src_reg);
 		/* lgr %dst,%rc */
 		EMIT4(0xb9040000, dst_reg, rc_reg);
 		break;
@@ -632,8 +633,8 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
 		EMIT4(0xb9160000, dst_reg, rc_reg);
 		break;
 	}
-	case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / (u32) imm */
-	case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % (u32) imm */
+	case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
+	case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
 	{
 		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
 
@@ -649,7 +650,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i)
 		EMIT4(0xb9040000, REG_W1, dst_reg);
 		/* dlg %w0,<d(imm)>(%l) */
 		EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
-			      EMIT_CONST_U64((u32) imm));
+			      EMIT_CONST_U64(imm));
 		/* lgr %dst,%rc */
 		EMIT4(0xb9040000, dst_reg, rc_reg);
 		break;

+ 3 - 0
arch/x86/include/asm/kvm_host.h

@@ -207,6 +207,7 @@ union kvm_mmu_page_role {
 		unsigned nxe:1;
 		unsigned cr0_wp:1;
 		unsigned smep_andnot_wp:1;
+		unsigned smap_andnot_wp:1;
 	};
 };
 
@@ -400,6 +401,7 @@ struct kvm_vcpu_arch {
 	struct kvm_mmu_memory_cache mmu_page_header_cache;
 
 	struct fpu guest_fpu;
+	bool eager_fpu;
 	u64 xcr0;
 	u64 guest_supported_xcr0;
 	u32 guest_xstate_size;
@@ -743,6 +745,7 @@ struct kvm_x86_ops {
 	void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
 	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
 	void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
+	void (*fpu_activate)(struct kvm_vcpu *vcpu);
 	void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
 
 	void (*tlb_flush)(struct kvm_vcpu *vcpu);

+ 1 - 0
arch/x86/include/uapi/asm/msr-index.h

@@ -140,6 +140,7 @@
 #define MSR_CORE_C3_RESIDENCY		0x000003fc
 #define MSR_CORE_C6_RESIDENCY		0x000003fd
 #define MSR_CORE_C7_RESIDENCY		0x000003fe
+#define MSR_KNL_CORE_C6_RESIDENCY	0x000003ff
 #define MSR_PKG_C2_RESIDENCY		0x0000060d
 #define MSR_PKG_C8_RESIDENCY		0x00000630
 #define MSR_PKG_C9_RESIDENCY		0x00000631

+ 5 - 2
arch/x86/kernel/cpu/mcheck/mce.c

@@ -708,6 +708,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 			  struct pt_regs *regs)
 {
 	int i, ret = 0;
+	char *tmp;
 
 	for (i = 0; i < mca_cfg.banks; i++) {
 		m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
@@ -716,9 +717,11 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 			if (quirk_no_way_out)
 				quirk_no_way_out(i, m, regs);
 		}
-		if (mce_severity(m, mca_cfg.tolerant, msg, true) >=
-		    MCE_PANIC_SEVERITY)
+
+		if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+			*msg = tmp;
 			ret = 1;
+		}
 	}
 	return ret;
 }

+ 15 - 0
arch/x86/kernel/i387.c

@@ -173,6 +173,21 @@ static void init_thread_xstate(void)
 		xstate_size = sizeof(struct i387_fxsave_struct);
 	else
 		xstate_size = sizeof(struct i387_fsave_struct);
+
+	/*
+	 * Quirk: we don't yet handle the XSAVES* instructions
+	 * correctly, as we don't correctly convert between
+	 * standard and compacted format when interfacing
+	 * with user-space - so disable it for now.
+	 *
+	 * The difference is small: with recent CPUs the
+	 * compacted format is only marginally smaller than
+	 * the standard FPU state format.
+	 *
+	 * ( This is easy to backport while we are fixing
+	 *   XSAVES* support. )
+	 */
+	setup_clear_cpu_cap(X86_FEATURE_XSAVES);
 }
 
 /*

+ 4 - 0
arch/x86/kvm/cpuid.c

@@ -16,6 +16,8 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
+#include <asm/i387.h> /* For use_eager_fpu.  Ugh! */
+#include <asm/fpu-internal.h> /* For use_eager_fpu.  Ugh! */
 #include <asm/user.h>
 #include <asm/xsave.h>
 #include "cpuid.h"
@@ -95,6 +97,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 	if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
 		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
 
+	vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu);
+
 	/*
 	 * The existing code assumes virtual address is 48-bit in the canonical
 	 * address checks; exit if it is ever changed.

+ 8 - 0
arch/x86/kvm/cpuid.h

@@ -117,4 +117,12 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
 	best = kvm_find_cpuid_entry(vcpu, 7, 0);
 	return best && (best->ebx & bit(X86_FEATURE_RTM));
 }
+
+static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpuid_entry2 *best;
+
+	best = kvm_find_cpuid_entry(vcpu, 7, 0);
+	return best && (best->ebx & bit(X86_FEATURE_MPX));
+}
 #endif

+ 12 - 4
arch/x86/kvm/mmu.c

@@ -3736,8 +3736,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
 	}
 }
 
-void update_permission_bitmask(struct kvm_vcpu *vcpu,
-		struct kvm_mmu *mmu, bool ept)
+static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+				      struct kvm_mmu *mmu, bool ept)
 {
 	unsigned bit, byte, pfec;
 	u8 map;
@@ -3918,6 +3918,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 {
 	bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+	bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
 	struct kvm_mmu *context = &vcpu->arch.mmu;
 
 	MMU_WARN_ON(VALID_PAGE(context->root_hpa));
@@ -3936,6 +3937,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
 	context->base_role.cr0_wp  = is_write_protection(vcpu);
 	context->base_role.smep_andnot_wp
 		= smep && !is_write_protection(vcpu);
+	context->base_role.smap_andnot_wp
+		= smap && !is_write_protection(vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
 
@@ -4207,12 +4210,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		       const u8 *new, int bytes)
 {
 	gfn_t gfn = gpa >> PAGE_SHIFT;
-	union kvm_mmu_page_role mask = { .word = 0 };
 	struct kvm_mmu_page *sp;
 	LIST_HEAD(invalid_list);
 	u64 entry, gentry, *spte;
 	int npte;
 	bool remote_flush, local_flush, zap_page;
+	union kvm_mmu_page_role mask = (union kvm_mmu_page_role) {
+		.cr0_wp = 1,
+		.cr4_pae = 1,
+		.nxe = 1,
+		.smep_andnot_wp = 1,
+		.smap_andnot_wp = 1,
+	};
 
 	/*
 	 * If we don't have indirect shadow pages, it means no page is
@@ -4238,7 +4247,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	++vcpu->kvm->stat.mmu_pte_write;
 	kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
 
-	mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
 	for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
 		if (detect_write_misaligned(sp, gpa, bytes) ||
 		      detect_write_flooding(sp)) {

+ 2 - 2
arch/x86/kvm/mmu.h

@@ -71,8 +71,6 @@ enum {
 int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
 void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
-void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
-		bool ept);
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 {
@@ -166,6 +164,8 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	int index = (pfec >> 1) +
 		    (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
 
+	WARN_ON(pfec & PFERR_RSVD_MASK);
+
 	return (mmu->permissions[index] >> pte_access) & 1;
 }
 

+ 7 - 0
arch/x86/kvm/paging_tmpl.h

@@ -718,6 +718,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
 					      mmu_is_nested(vcpu));
 		if (likely(r != RET_MMIO_PF_INVALID))
 			return r;
+
+		/*
+		 * page fault with PFEC.RSVD  = 1 is caused by shadow
+		 * page fault, should not be used to walk guest page
+		 * table.
+		 */
+		error_code &= ~PFERR_RSVD_MASK;
 	};
 
 	r = mmu_topup_memory_caches(vcpu);

+ 1 - 0
arch/x86/kvm/svm.c

@@ -4381,6 +4381,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.cache_reg = svm_cache_reg,
 	.get_rflags = svm_get_rflags,
 	.set_rflags = svm_set_rflags,
+	.fpu_activate = svm_fpu_activate,
 	.fpu_deactivate = svm_fpu_deactivate,
 
 	.tlb_flush = svm_flush_tlb,

+ 1 - 0
arch/x86/kvm/vmx.c

@@ -10185,6 +10185,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.cache_reg = vmx_cache_reg,
 	.get_rflags = vmx_get_rflags,
 	.set_rflags = vmx_set_rflags,
+	.fpu_activate = vmx_fpu_activate,
 	.fpu_deactivate = vmx_fpu_deactivate,
 
 	.tlb_flush = vmx_flush_tlb,

+ 19 - 7
arch/x86/kvm/x86.c

@@ -702,8 +702,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
 int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
 	unsigned long old_cr4 = kvm_read_cr4(vcpu);
-	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE |
-				   X86_CR4_PAE | X86_CR4_SMEP;
+	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
+				   X86_CR4_SMEP | X86_CR4_SMAP;
+
 	if (cr4 & CR4_RESERVED_BITS)
 		return 1;
 
@@ -744,9 +745,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	    (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
 		kvm_mmu_reset_context(vcpu);
 
-	if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
-		update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
-
 	if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
 		kvm_update_cpuid(vcpu);
 
@@ -6197,6 +6195,8 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
 		return;
 
 	page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+	if (is_error_page(page))
+		return;
 	kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
 
 	/*
@@ -7060,7 +7060,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 	fpu_save_init(&vcpu->arch.guest_fpu);
 	__kernel_fpu_end();
 	++vcpu->stat.fpu_reload;
-	kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+	if (!vcpu->arch.eager_fpu)
+		kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
+
 	trace_kvm_fpu(0);
 }
 
@@ -7076,11 +7078,21 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 						unsigned int id)
 {
+	struct kvm_vcpu *vcpu;
+
 	if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
 		printk_once(KERN_WARNING
 		"kvm: SMP vm created on host with unstable TSC; "
 		"guest TSC will not be reliable\n");
-	return kvm_x86_ops->vcpu_create(kvm, id);
+
+	vcpu = kvm_x86_ops->vcpu_create(kvm, id);
+
+	/*
+	 * Activate fpu unconditionally in case the guest needs eager FPU.  It will be
+	 * deactivated soon if it doesn't.
+	 */
+	kvm_x86_ops->fpu_activate(vcpu);
+	return vcpu;
 }
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)

+ 6 - 1
arch/x86/net/bpf_jit_comp.c

@@ -966,7 +966,12 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 	}
 	ctx.cleanup_addr = proglen;
 
-	for (pass = 0; pass < 10; pass++) {
+	/* JITed image shrinks with every pass and the loop iterates
+	 * until the image stops shrinking. Very large bpf programs
+	 * may converge on the last pass. In such case do one more
+	 * pass to emit the final image
+	 */
+	for (pass = 0; pass < 10 || image; pass++) {
 		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
 		if (proglen <= 0) {
 			image = NULL;

+ 10 - 3
arch/x86/pci/acpi.c

@@ -482,9 +482,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 
 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
-	struct pci_sysdata *sd = bridge->bus->sysdata;
-
-	ACPI_COMPANION_SET(&bridge->dev, sd->companion);
+	/*
+	 * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
+	 * here, pci_create_root_bus() has been called by someone else and
+	 * sysdata is likely to be different from what we expect.  Let it go in
+	 * that case.
+	 */
+	if (!bridge->dev.parent) {
+		struct pci_sysdata *sd = bridge->bus->sysdata;
+		ACPI_COMPANION_SET(&bridge->dev, sd->companion);
+	}
 	return 0;
 }
 

+ 13 - 0
arch/xtensa/include/asm/dma-mapping.h

@@ -185,4 +185,17 @@ static inline int dma_get_sgtable(struct device *dev, struct sg_table *sgt,
 	return -EINVAL;
 }
 
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+				    dma_addr_t *dma_handle, gfp_t flag,
+				    struct dma_attrs *attrs)
+{
+	return NULL;
+}
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+				  void *vaddr, dma_addr_t dma_handle,
+				  struct dma_attrs *attrs)
+{
+}
+
 #endif	/* _XTENSA_DMA_MAPPING_H */

+ 3 - 2
block/blk-core.c

@@ -734,6 +734,8 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 }
 EXPORT_SYMBOL(blk_init_queue_node);
 
+static void blk_queue_bio(struct request_queue *q, struct bio *bio);
+
 struct request_queue *
 blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 			 spinlock_t *lock)
@@ -1578,7 +1580,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 	blk_rq_bio_prep(req->q, req, bio);
 }
 
-void blk_queue_bio(struct request_queue *q, struct bio *bio)
+static void blk_queue_bio(struct request_queue *q, struct bio *bio)
 {
 	const bool sync = !!(bio->bi_rw & REQ_SYNC);
 	struct blk_plug *plug;
@@ -1686,7 +1688,6 @@ out_unlock:
 		spin_unlock_irq(q->queue_lock);
 	}
 }
-EXPORT_SYMBOL_GPL(blk_queue_bio);	/* for device mapper only */
 
 /*
  * If bio->bi_dev is a partition, remap the location

+ 0 - 9
crypto/Kconfig

@@ -1512,15 +1512,6 @@ config CRYPTO_USER_API_RNG
 	  This option enables the user-spaces interface for random
 	  number generator algorithms.
 
-config CRYPTO_USER_API_AEAD
-	tristate "User-space interface for AEAD cipher algorithms"
-	depends on NET
-	select CRYPTO_AEAD
-	select CRYPTO_USER_API
-	help
-	  This option enables the user-spaces interface for AEAD
-	  cipher algorithms.
-
 config CRYPTO_HASH_INFO
 	bool
 

+ 4 - 5
crypto/algif_aead.c

@@ -33,7 +33,7 @@ struct aead_ctx {
 	/*
 	 * RSGL_MAX_ENTRIES is an artificial limit where user space at maximum
 	 * can cause the kernel to allocate RSGL_MAX_ENTRIES * ALG_MAX_PAGES
-	 * bytes
+	 * pages
 	 */
 #define RSGL_MAX_ENTRIES ALG_MAX_PAGES
 	struct af_alg_sgl rsgl[RSGL_MAX_ENTRIES];
@@ -435,11 +435,10 @@ static int aead_recvmsg(struct socket *sock, struct msghdr *msg, size_t ignored,
 		if (err < 0)
 			goto unlock;
 		usedpages += err;
-		/* chain the new scatterlist with initial list */
+		/* chain the new scatterlist with previous one */
 		if (cnt)
-			scatterwalk_crypto_chain(ctx->rsgl[0].sg,
-					ctx->rsgl[cnt].sg, 1,
-					sg_nents(ctx->rsgl[cnt-1].sg));
+			af_alg_link_sg(&ctx->rsgl[cnt-1], &ctx->rsgl[cnt]);
+
 		/* we do not need more iovecs as we have sufficient memory */
 		if (outlen <= usedpages)
 			break;

+ 2 - 1
drivers/block/nvme-scsi.c

@@ -2257,7 +2257,8 @@ static int nvme_trans_inquiry(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	page_code = GET_INQ_PAGE_CODE(cmd);
 	alloc_len = GET_INQ_ALLOC_LENGTH(cmd);
 
-	inq_response = kmalloc(alloc_len, GFP_KERNEL);
+	inq_response = kmalloc(max(alloc_len, STANDARD_INQUIRY_LENGTH),
+				GFP_KERNEL);
 	if (inq_response == NULL) {
 		res = -ENOMEM;
 		goto out_mem;

+ 4 - 0
drivers/bluetooth/ath3k.c

@@ -88,6 +88,7 @@ static const struct usb_device_id ath3k_table[] = {
 	{ USB_DEVICE(0x04CA, 0x3007) },
 	{ USB_DEVICE(0x04CA, 0x3008) },
 	{ USB_DEVICE(0x04CA, 0x300b) },
+	{ USB_DEVICE(0x04CA, 0x300f) },
 	{ USB_DEVICE(0x04CA, 0x3010) },
 	{ USB_DEVICE(0x0930, 0x0219) },
 	{ USB_DEVICE(0x0930, 0x0220) },
@@ -104,6 +105,7 @@ static const struct usb_device_id ath3k_table[] = {
 	{ USB_DEVICE(0x0cf3, 0xe003) },
 	{ USB_DEVICE(0x0CF3, 0xE004) },
 	{ USB_DEVICE(0x0CF3, 0xE005) },
+	{ USB_DEVICE(0x0CF3, 0xE006) },
 	{ USB_DEVICE(0x13d3, 0x3362) },
 	{ USB_DEVICE(0x13d3, 0x3375) },
 	{ USB_DEVICE(0x13d3, 0x3393) },
@@ -143,6 +145,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
 	{ USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
@@ -158,6 +161,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
 	{ USB_DEVICE(0x0CF3, 0x817a), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0cf3, 0xe006), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 },

+ 3 - 0
drivers/bluetooth/btusb.c

@@ -186,6 +186,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
@@ -202,6 +203,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0cf3, 0xe006), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3362), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3375), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x13d3, 0x3393), .driver_info = BTUSB_ATH3012 },
@@ -218,6 +220,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 },
 
 	/* QCA ROME chipset */
+	{ USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0cf3, 0xe300), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0cf3, 0xe360), .driver_info = BTUSB_QCA_ROME },
 

+ 2 - 2
drivers/bus/mips_cdmm.c

@@ -453,7 +453,7 @@ void __iomem *mips_cdmm_early_probe(unsigned int dev_type)
 
 	/* Look for a specific device type */
 	for (; drb < bus->drbs; drb += size + 1) {
-		acsr = readl(cdmm + drb * CDMM_DRB_SIZE);
+		acsr = __raw_readl(cdmm + drb * CDMM_DRB_SIZE);
 		type = (acsr & CDMM_ACSR_DEVTYPE) >> CDMM_ACSR_DEVTYPE_SHIFT;
 		if (type == dev_type)
 			return cdmm + drb * CDMM_DRB_SIZE;
@@ -500,7 +500,7 @@ static void mips_cdmm_bus_discover(struct mips_cdmm_bus *bus)
 	bus->discovered = true;
 	pr_info("cdmm%u discovery (%u blocks)\n", cpu, bus->drbs);
 	for (; drb < bus->drbs; drb += size + 1) {
-		acsr = readl(cdmm + drb * CDMM_DRB_SIZE);
+		acsr = __raw_readl(cdmm + drb * CDMM_DRB_SIZE);
 		type = (acsr & CDMM_ACSR_DEVTYPE) >> CDMM_ACSR_DEVTYPE_SHIFT;
 		size = (acsr & CDMM_ACSR_DEVSIZE) >> CDMM_ACSR_DEVSIZE_SHIFT;
 		rev  = (acsr & CDMM_ACSR_DEVREV)  >> CDMM_ACSR_DEVREV_SHIFT;

+ 45 - 18
drivers/clk/clk-si5351.c

@@ -1128,13 +1128,6 @@ static int si5351_dt_parse(struct i2c_client *client,
 	if (!pdata)
 		return -ENOMEM;
 
-	pdata->clk_xtal = of_clk_get(np, 0);
-	if (!IS_ERR(pdata->clk_xtal))
-		clk_put(pdata->clk_xtal);
-	pdata->clk_clkin = of_clk_get(np, 1);
-	if (!IS_ERR(pdata->clk_clkin))
-		clk_put(pdata->clk_clkin);
-
 	/*
 	 * property silabs,pll-source : <num src>, [<..>]
 	 * allow to selectively set pll source
@@ -1328,8 +1321,22 @@ static int si5351_i2c_probe(struct i2c_client *client,
 	i2c_set_clientdata(client, drvdata);
 	drvdata->client = client;
 	drvdata->variant = variant;
-	drvdata->pxtal = pdata->clk_xtal;
-	drvdata->pclkin = pdata->clk_clkin;
+	drvdata->pxtal = devm_clk_get(&client->dev, "xtal");
+	drvdata->pclkin = devm_clk_get(&client->dev, "clkin");
+
+	if (PTR_ERR(drvdata->pxtal) == -EPROBE_DEFER ||
+	    PTR_ERR(drvdata->pclkin) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
+
+	/*
+	 * Check for valid parent clock: VARIANT_A and VARIANT_B need XTAL,
+	 *   VARIANT_C can have CLKIN instead.
+	 */
+	if (IS_ERR(drvdata->pxtal) &&
+	    (drvdata->variant != SI5351_VARIANT_C || IS_ERR(drvdata->pclkin))) {
+		dev_err(&client->dev, "missing parent clock\n");
+		return -EINVAL;
+	}
 
 	drvdata->regmap = devm_regmap_init_i2c(client, &si5351_regmap_config);
 	if (IS_ERR(drvdata->regmap)) {
@@ -1393,6 +1400,11 @@ static int si5351_i2c_probe(struct i2c_client *client,
 		}
 	}
 
+	if (!IS_ERR(drvdata->pxtal))
+		clk_prepare_enable(drvdata->pxtal);
+	if (!IS_ERR(drvdata->pclkin))
+		clk_prepare_enable(drvdata->pclkin);
+
 	/* register xtal input clock gate */
 	memset(&init, 0, sizeof(init));
 	init.name = si5351_input_names[0];
@@ -1407,7 +1419,8 @@ static int si5351_i2c_probe(struct i2c_client *client,
 	clk = devm_clk_register(&client->dev, &drvdata->xtal);
 	if (IS_ERR(clk)) {
 		dev_err(&client->dev, "unable to register %s\n", init.name);
-		return PTR_ERR(clk);
+		ret = PTR_ERR(clk);
+		goto err_clk;
 	}
 
 	/* register clkin input clock gate */
@@ -1425,7 +1438,8 @@ static int si5351_i2c_probe(struct i2c_client *client,
 		if (IS_ERR(clk)) {
 			dev_err(&client->dev, "unable to register %s\n",
 				init.name);
-			return PTR_ERR(clk);
+			ret = PTR_ERR(clk);
+			goto err_clk;
 		}
 	}
 
@@ -1447,7 +1461,8 @@ static int si5351_i2c_probe(struct i2c_client *client,
 	clk = devm_clk_register(&client->dev, &drvdata->pll[0].hw);
 	if (IS_ERR(clk)) {
 		dev_err(&client->dev, "unable to register %s\n", init.name);
-		return -EINVAL;
+		ret = PTR_ERR(clk);
+		goto err_clk;
 	}
 
 	/* register PLLB or VXCO (Si5351B) */
@@ -1471,7 +1486,8 @@ static int si5351_i2c_probe(struct i2c_client *client,
 	clk = devm_clk_register(&client->dev, &drvdata->pll[1].hw);
 	if (IS_ERR(clk)) {
 		dev_err(&client->dev, "unable to register %s\n", init.name);
-		return -EINVAL;
+		ret = PTR_ERR(clk);
+		goto err_clk;
 	}
 
 	/* register clk multisync and clk out divider */
@@ -1492,8 +1508,10 @@ static int si5351_i2c_probe(struct i2c_client *client,
 		num_clocks * sizeof(*drvdata->onecell.clks), GFP_KERNEL);
 
 	if (WARN_ON(!drvdata->msynth || !drvdata->clkout ||
-		    !drvdata->onecell.clks))
-		return -ENOMEM;
+		    !drvdata->onecell.clks)) {
+		ret = -ENOMEM;
+		goto err_clk;
+	}
 
 	for (n = 0; n < num_clocks; n++) {
 		drvdata->msynth[n].num = n;
@@ -1511,7 +1529,8 @@ static int si5351_i2c_probe(struct i2c_client *client,
 		if (IS_ERR(clk)) {
 			dev_err(&client->dev, "unable to register %s\n",
 				init.name);
-			return -EINVAL;
+			ret = PTR_ERR(clk);
+			goto err_clk;
 		}
 	}
 
@@ -1538,7 +1557,8 @@ static int si5351_i2c_probe(struct i2c_client *client,
 		if (IS_ERR(clk)) {
 			dev_err(&client->dev, "unable to register %s\n",
 				init.name);
-			return -EINVAL;
+			ret = PTR_ERR(clk);
+			goto err_clk;
 		}
 		drvdata->onecell.clks[n] = clk;
 
@@ -1557,10 +1577,17 @@ static int si5351_i2c_probe(struct i2c_client *client,
 				  &drvdata->onecell);
 	if (ret) {
 		dev_err(&client->dev, "unable to add clk provider\n");
-		return ret;
+		goto err_clk;
 	}
 
 	return 0;
+
+err_clk:
+	if (!IS_ERR(drvdata->pxtal))
+		clk_disable_unprepare(drvdata->pxtal);
+	if (!IS_ERR(drvdata->pclkin))
+		clk_disable_unprepare(drvdata->pclkin);
+	return ret;
 }
 
 static const struct i2c_device_id si5351_i2c_ids[] = {

+ 8 - 0
drivers/clk/clk.c

@@ -1475,8 +1475,10 @@ static struct clk_core *__clk_set_parent_before(struct clk_core *clk,
 	 */
 	if (clk->prepare_count) {
 		clk_core_prepare(parent);
+		flags = clk_enable_lock();
 		clk_core_enable(parent);
 		clk_core_enable(clk);
+		clk_enable_unlock(flags);
 	}
 
 	/* update the clk tree topology */
@@ -1491,13 +1493,17 @@ static void __clk_set_parent_after(struct clk_core *core,
 				   struct clk_core *parent,
 				   struct clk_core *old_parent)
 {
+	unsigned long flags;
+
 	/*
 	 * Finish the migration of prepare state and undo the changes done
 	 * for preventing a race with clk_enable().
 	 */
 	if (core->prepare_count) {
+		flags = clk_enable_lock();
 		clk_core_disable(core);
 		clk_core_disable(old_parent);
+		clk_enable_unlock(flags);
 		clk_core_unprepare(old_parent);
 	}
 }
@@ -1525,8 +1531,10 @@ static int __clk_set_parent(struct clk_core *clk, struct clk_core *parent,
 		clk_enable_unlock(flags);
 
 		if (clk->prepare_count) {
+			flags = clk_enable_lock();
 			clk_core_disable(clk);
 			clk_core_disable(parent);
+			clk_enable_unlock(flags);
 			clk_core_unprepare(parent);
 		}
 		return ret;

+ 2 - 2
drivers/clk/qcom/gcc-msm8916.c

@@ -71,8 +71,8 @@ static const char *gcc_xo_gpll0_bimc[] = {
 static const struct parent_map gcc_xo_gpll0a_gpll1_gpll2a_map[] = {
 	{ P_XO, 0 },
 	{ P_GPLL0_AUX, 3 },
-	{ P_GPLL2_AUX, 2 },
 	{ P_GPLL1, 1 },
+	{ P_GPLL2_AUX, 2 },
 };
 
 static const char *gcc_xo_gpll0a_gpll1_gpll2a[] = {
@@ -1115,7 +1115,7 @@ static struct clk_rcg2 usb_hs_system_clk_src = {
 static const struct freq_tbl ftbl_gcc_venus0_vcodec0_clk[] = {
 	F(100000000, P_GPLL0, 8, 0, 0),
 	F(160000000, P_GPLL0, 5, 0, 0),
-	F(228570000, P_GPLL0, 5, 0, 0),
+	F(228570000, P_GPLL0, 3.5, 0, 0),
 	{ }
 };
 

+ 1 - 1
drivers/clk/samsung/Makefile

@@ -10,7 +10,7 @@ obj-$(CONFIG_SOC_EXYNOS5250)	+= clk-exynos5250.o
 obj-$(CONFIG_SOC_EXYNOS5260)	+= clk-exynos5260.o
 obj-$(CONFIG_SOC_EXYNOS5410)	+= clk-exynos5410.o
 obj-$(CONFIG_SOC_EXYNOS5420)	+= clk-exynos5420.o
-obj-$(CONFIG_ARCH_EXYNOS5433)	+= clk-exynos5433.o
+obj-$(CONFIG_ARCH_EXYNOS)	+= clk-exynos5433.o
 obj-$(CONFIG_SOC_EXYNOS5440)	+= clk-exynos5440.o
 obj-$(CONFIG_ARCH_EXYNOS)	+= clk-exynos-audss.o
 obj-$(CONFIG_ARCH_EXYNOS)	+= clk-exynos-clkout.o

+ 1 - 0
drivers/clk/samsung/clk-exynos5420.c

@@ -271,6 +271,7 @@ static const struct samsung_clk_reg_dump exynos5420_set_clksrc[] = {
 	{ .offset = SRC_MASK_PERIC0,		.value = 0x11111110, },
 	{ .offset = SRC_MASK_PERIC1,		.value = 0x11111100, },
 	{ .offset = SRC_MASK_ISP,		.value = 0x11111000, },
+	{ .offset = GATE_BUS_TOP,		.value = 0xffffffff, },
 	{ .offset = GATE_BUS_DISP1,		.value = 0xffffffff, },
 	{ .offset = GATE_IP_PERIC,		.value = 0xffffffff, },
 };

+ 6 - 6
drivers/clk/samsung/clk-exynos5433.c

@@ -748,7 +748,7 @@ static struct samsung_pll_rate_table exynos5443_pll_rates[] = {
 	PLL_35XX_RATE(825000000U,  275, 4,  1),
 	PLL_35XX_RATE(800000000U,  400, 6,  1),
 	PLL_35XX_RATE(733000000U,  733, 12, 1),
-	PLL_35XX_RATE(700000000U,  360, 6,  1),
+	PLL_35XX_RATE(700000000U,  175, 3,  1),
 	PLL_35XX_RATE(667000000U,  222, 4,  1),
 	PLL_35XX_RATE(633000000U,  211, 4,  1),
 	PLL_35XX_RATE(600000000U,  500, 5,  2),
@@ -760,14 +760,14 @@ static struct samsung_pll_rate_table exynos5443_pll_rates[] = {
 	PLL_35XX_RATE(444000000U,  370, 5,  2),
 	PLL_35XX_RATE(420000000U,  350, 5,  2),
 	PLL_35XX_RATE(400000000U,  400, 6,  2),
-	PLL_35XX_RATE(350000000U,  360, 6,  2),
+	PLL_35XX_RATE(350000000U,  350, 6,  2),
 	PLL_35XX_RATE(333000000U,  222, 4,  2),
 	PLL_35XX_RATE(300000000U,  500, 5,  3),
 	PLL_35XX_RATE(266000000U,  532, 6,  3),
 	PLL_35XX_RATE(200000000U,  400, 6,  3),
 	PLL_35XX_RATE(166000000U,  332, 6,  3),
 	PLL_35XX_RATE(160000000U,  320, 6,  3),
-	PLL_35XX_RATE(133000000U,  552, 6,  4),
+	PLL_35XX_RATE(133000000U,  532, 6,  4),
 	PLL_35XX_RATE(100000000U,  400, 6,  4),
 	{ /* sentinel */ }
 };
@@ -1490,7 +1490,7 @@ static struct samsung_gate_clock mif_gate_clks[] __initdata = {
 
 	/* ENABLE_PCLK_MIF_SECURE_MONOTONIC_CNT */
 	GATE(CLK_PCLK_MONOTONIC_CNT, "pclk_monotonic_cnt", "div_aclk_mif_133",
-			ENABLE_PCLK_MIF_SECURE_RTC, 0, 0, 0),
+			ENABLE_PCLK_MIF_SECURE_MONOTONIC_CNT, 0, 0, 0),
 
 	/* ENABLE_PCLK_MIF_SECURE_RTC */
 	GATE(CLK_PCLK_RTC, "pclk_rtc", "div_aclk_mif_133",
@@ -3665,7 +3665,7 @@ static struct samsung_gate_clock apollo_gate_clks[] __initdata = {
 			ENABLE_SCLK_APOLLO, 3, CLK_IGNORE_UNUSED, 0),
 	GATE(CLK_SCLK_HPM_APOLLO, "sclk_hpm_apollo", "div_sclk_hpm_apollo",
 			ENABLE_SCLK_APOLLO, 1, CLK_IGNORE_UNUSED, 0),
-	GATE(CLK_SCLK_APOLLO, "sclk_apollo", "div_apollo_pll",
+	GATE(CLK_SCLK_APOLLO, "sclk_apollo", "div_apollo2",
 			ENABLE_SCLK_APOLLO, 0, CLK_IGNORE_UNUSED, 0),
 };
 
@@ -3927,7 +3927,7 @@ CLK_OF_DECLARE(exynos5433_cmu_atlas, "samsung,exynos5433-cmu-atlas",
 #define ENABLE_PCLK_MSCL				0x0900
 #define ENABLE_PCLK_MSCL_SECURE_SMMU_M2MSCALER0		0x0904
 #define ENABLE_PCLK_MSCL_SECURE_SMMU_M2MSCALER1		0x0908
-#define ENABLE_PCLK_MSCL_SECURE_SMMU_JPEG		0x000c
+#define ENABLE_PCLK_MSCL_SECURE_SMMU_JPEG		0x090c
 #define ENABLE_SCLK_MSCL				0x0a00
 #define ENABLE_IP_MSCL0					0x0b00
 #define ENABLE_IP_MSCL1					0x0b04

+ 1 - 1
drivers/gpio/gpio-kempld.c

@@ -117,7 +117,7 @@ static int kempld_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
 		= container_of(chip, struct kempld_gpio_data, chip);
 	struct kempld_device_data *pld = gpio->pld;
 
-	return kempld_gpio_get_bit(pld, KEMPLD_GPIO_DIR_NUM(offset), offset);
+	return !kempld_gpio_get_bit(pld, KEMPLD_GPIO_DIR_NUM(offset), offset);
 }
 
 static int kempld_gpio_pincount(struct kempld_device_data *pld)

+ 6 - 4
drivers/gpio/gpiolib.c

@@ -53,6 +53,11 @@ static DEFINE_MUTEX(gpio_lookup_lock);
 static LIST_HEAD(gpio_lookup_list);
 LIST_HEAD(gpio_chips);
 
+
+static void gpiochip_free_hogs(struct gpio_chip *chip);
+static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
+
+
 static inline void desc_set_label(struct gpio_desc *d, const char *label)
 {
 	d->label = label;
@@ -297,6 +302,7 @@ int gpiochip_add(struct gpio_chip *chip)
 
 err_remove_chip:
 	acpi_gpiochip_remove(chip);
+	gpiochip_free_hogs(chip);
 	of_gpiochip_remove(chip);
 	spin_lock_irqsave(&gpio_lock, flags);
 	list_del(&chip->list);
@@ -313,10 +319,6 @@ err_free_descs:
 }
 EXPORT_SYMBOL_GPL(gpiochip_add);
 
-/* Forward-declaration */
-static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
-static void gpiochip_free_hogs(struct gpio_chip *chip);
-
 /**
  * gpiochip_remove() - unregister a gpio_chip
  * @chip: the chip to unregister

+ 2 - 0
drivers/gpu/drm/Kconfig

@@ -206,6 +206,8 @@ source "drivers/gpu/drm/qxl/Kconfig"
 
 source "drivers/gpu/drm/bochs/Kconfig"
 
+source "drivers/gpu/drm/virtio/Kconfig"
+
 source "drivers/gpu/drm/msm/Kconfig"
 
 source "drivers/gpu/drm/tegra/Kconfig"

+ 2 - 1
drivers/gpu/drm/Makefile

@@ -58,9 +58,10 @@ obj-$(CONFIG_DRM_ATMEL_HLCDC)	+= atmel-hlcdc/
 obj-$(CONFIG_DRM_RCAR_DU) += rcar-du/
 obj-$(CONFIG_DRM_SHMOBILE) +=shmobile/
 obj-$(CONFIG_DRM_OMAP)	+= omapdrm/
-obj-$(CONFIG_DRM_TILCDC)	+= tilcdc/
+obj-y			+= tilcdc/
 obj-$(CONFIG_DRM_QXL) += qxl/
 obj-$(CONFIG_DRM_BOCHS) += bochs/
+obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio/
 obj-$(CONFIG_DRM_MSM) += msm/
 obj-$(CONFIG_DRM_TEGRA) += tegra/
 obj-$(CONFIG_DRM_STI) += sti/

+ 2 - 1
drivers/gpu/drm/amd/amdkfd/Makefile

@@ -12,6 +12,7 @@ amdkfd-y	:= kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
 		kfd_kernel_queue_vi.o kfd_packet_manager.o \
 		kfd_process_queue_manager.o kfd_device_queue_manager.o \
 		kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
-		kfd_interrupt.o kfd_events.o cik_event_interrupt.o
+		kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
+		kfd_dbgdev.o kfd_dbgmgr.o
 
 obj-$(CONFIG_HSA_AMD)	+= amdkfd.o

+ 308 - 0
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

@@ -35,6 +35,7 @@
 #include <asm/processor.h>
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
+#include "kfd_dbgmgr.h"
 
 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
 static int kfd_open(struct inode *, struct file *);
@@ -432,6 +433,301 @@ out:
 	return err;
 }
 
+static int kfd_ioctl_dbg_register(struct file *filep,
+				struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_dbg_register_args *args = data;
+	struct kfd_dev *dev;
+	struct kfd_dbgmgr *dbgmgr_ptr;
+	struct kfd_process_device *pdd;
+	bool create_ok;
+	long status = 0;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (dev == NULL)
+		return -EINVAL;
+
+	if (dev->device_info->asic_family == CHIP_CARRIZO) {
+		pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(kfd_get_dbgmgr_mutex());
+	mutex_lock(&p->mutex);
+
+	/*
+	 * make sure that we have pdd, if this the first queue created for
+	 * this process
+	 */
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		mutex_unlock(&p->mutex);
+		mutex_unlock(kfd_get_dbgmgr_mutex());
+		return PTR_ERR(pdd);
+	}
+
+	if (dev->dbgmgr == NULL) {
+		/* In case of a legal call, we have no dbgmgr yet */
+		create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
+		if (create_ok) {
+			status = kfd_dbgmgr_register(dbgmgr_ptr, p);
+			if (status != 0)
+				kfd_dbgmgr_destroy(dbgmgr_ptr);
+			else
+				dev->dbgmgr = dbgmgr_ptr;
+		}
+	} else {
+		pr_debug("debugger already registered\n");
+		status = -EINVAL;
+	}
+
+	mutex_unlock(&p->mutex);
+	mutex_unlock(kfd_get_dbgmgr_mutex());
+
+	return status;
+}
+
+static int kfd_ioctl_dbg_unrgesiter(struct file *filep,
+				struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_dbg_unregister_args *args = data;
+	struct kfd_dev *dev;
+	long status;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (dev == NULL)
+		return -EINVAL;
+
+	if (dev->device_info->asic_family == CHIP_CARRIZO) {
+		pr_debug("kfd_ioctl_dbg_unrgesiter not supported on CZ\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(kfd_get_dbgmgr_mutex());
+
+	status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
+	if (status == 0) {
+		kfd_dbgmgr_destroy(dev->dbgmgr);
+		dev->dbgmgr = NULL;
+	}
+
+	mutex_unlock(kfd_get_dbgmgr_mutex());
+
+	return status;
+}
+
+/*
+ * Parse and generate variable size data structure for address watch.
+ * Total size of the buffer and # watch points is limited in order
+ * to prevent kernel abuse. (no bearing to the much smaller HW limitation
+ * which is enforced by dbgdev module)
+ * please also note that the watch address itself are not "copied from user",
+ * since it be set into the HW in user mode values.
+ *
+ */
+static int kfd_ioctl_dbg_address_watch(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_dbg_address_watch_args *args = data;
+	struct kfd_dev *dev;
+	struct dbg_address_watch_info aw_info;
+	unsigned char *args_buff;
+	long status;
+	void __user *cmd_from_user;
+	uint64_t watch_mask_value = 0;
+	unsigned int args_idx = 0;
+
+	memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (dev == NULL)
+		return -EINVAL;
+
+	if (dev->device_info->asic_family == CHIP_CARRIZO) {
+		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
+		return -EINVAL;
+	}
+
+	cmd_from_user = (void __user *) args->content_ptr;
+
+	/* Validate arguments */
+
+	if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
+		(args->buf_size_in_bytes <= sizeof(*args)) ||
+		(cmd_from_user == NULL))
+		return -EINVAL;
+
+	/* this is the actual buffer to work with */
+
+	args_buff = kmalloc(args->buf_size_in_bytes -
+					sizeof(*args), GFP_KERNEL);
+	if (args_buff == NULL)
+		return -ENOMEM;
+
+	status = copy_from_user(args_buff, cmd_from_user,
+				args->buf_size_in_bytes - sizeof(*args));
+
+	if (status != 0) {
+		pr_debug("Failed to copy address watch user data\n");
+		kfree(args_buff);
+		return -EINVAL;
+	}
+
+	aw_info.process = p;
+
+	aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
+	args_idx += sizeof(aw_info.num_watch_points);
+
+	aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
+	args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
+
+	/*
+	 * set watch address base pointer to point on the array base
+	 * within args_buff
+	 */
+	aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
+
+	/* skip over the addresses buffer */
+	args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
+
+	if (args_idx >= args->buf_size_in_bytes) {
+		kfree(args_buff);
+		return -EINVAL;
+	}
+
+	watch_mask_value = (uint64_t) args_buff[args_idx];
+
+	if (watch_mask_value > 0) {
+		/*
+		 * There is an array of masks.
+		 * set watch mask base pointer to point on the array base
+		 * within args_buff
+		 */
+		aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
+
+		/* skip over the masks buffer */
+		args_idx += sizeof(aw_info.watch_mask) *
+				aw_info.num_watch_points;
+	} else {
+		/* just the NULL mask, set to NULL and skip over it */
+		aw_info.watch_mask = NULL;
+		args_idx += sizeof(aw_info.watch_mask);
+	}
+
+	if (args_idx > args->buf_size_in_bytes) {
+		kfree(args_buff);
+		return -EINVAL;
+	}
+
+	/* Currently HSA Event is not supported for DBG */
+	aw_info.watch_event = NULL;
+
+	mutex_lock(kfd_get_dbgmgr_mutex());
+
+	status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
+
+	mutex_unlock(kfd_get_dbgmgr_mutex());
+
+	kfree(args_buff);
+
+	return status;
+}
+
+/* Parse and generate fixed size data structure for wave control */
+static int kfd_ioctl_dbg_wave_control(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_dbg_wave_control_args *args = data;
+	struct kfd_dev *dev;
+	struct dbg_wave_control_info wac_info;
+	unsigned char *args_buff;
+	uint32_t computed_buff_size;
+	long status;
+	void __user *cmd_from_user;
+	unsigned int args_idx = 0;
+
+	memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
+
+	/* we use compact form, independent of the packing attribute value */
+	computed_buff_size = sizeof(*args) +
+				sizeof(wac_info.mode) +
+				sizeof(wac_info.operand) +
+				sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
+				sizeof(wac_info.dbgWave_msg.MemoryVA) +
+				sizeof(wac_info.trapId);
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (dev == NULL)
+		return -EINVAL;
+
+	if (dev->device_info->asic_family == CHIP_CARRIZO) {
+		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
+		return -EINVAL;
+	}
+
+	/* input size must match the computed "compact" size */
+	if (args->buf_size_in_bytes != computed_buff_size) {
+		pr_debug("size mismatch, computed : actual %u : %u\n",
+				args->buf_size_in_bytes, computed_buff_size);
+		return -EINVAL;
+	}
+
+	cmd_from_user = (void __user *) args->content_ptr;
+
+	if (cmd_from_user == NULL)
+		return -EINVAL;
+
+	/* this is the actual buffer to work with */
+
+	args_buff = kmalloc(args->buf_size_in_bytes - sizeof(*args),
+			GFP_KERNEL);
+
+	if (args_buff == NULL)
+		return -ENOMEM;
+
+	/* Now copy the entire buffer from user */
+	status = copy_from_user(args_buff, cmd_from_user,
+				args->buf_size_in_bytes - sizeof(*args));
+	if (status != 0) {
+		pr_debug("Failed to copy wave control user data\n");
+		kfree(args_buff);
+		return -EINVAL;
+	}
+
+	/* move ptr to the start of the "pay-load" area */
+	wac_info.process = p;
+
+	wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
+	args_idx += sizeof(wac_info.operand);
+
+	wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
+	args_idx += sizeof(wac_info.mode);
+
+	wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
+	args_idx += sizeof(wac_info.trapId);
+
+	wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
+					*((uint32_t *)(&args_buff[args_idx]));
+	wac_info.dbgWave_msg.MemoryVA = NULL;
+
+	mutex_lock(kfd_get_dbgmgr_mutex());
+
+	pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
+			wac_info.process, wac_info.operand,
+			wac_info.mode, wac_info.trapId,
+			wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
+
+	status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
+
+	pr_debug("Returned status of dbg manager is %ld\n", status);
+
+	mutex_unlock(kfd_get_dbgmgr_mutex());
+
+	kfree(args_buff);
+
+	return status;
+}
+
 static int kfd_ioctl_get_clock_counters(struct file *filep,
 				struct kfd_process *p, void *data)
 {
@@ -612,6 +908,18 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
 			kfd_ioctl_wait_events, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
+			kfd_ioctl_dbg_register, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
+			kfd_ioctl_dbg_unrgesiter, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
+			kfd_ioctl_dbg_address_watch, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
+			kfd_ioctl_dbg_wave_control, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)

+ 886 - 0
drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c

@@ -0,0 +1,886 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+
+#include "kfd_pm4_headers.h"
+#include "kfd_pm4_headers_diq.h"
+#include "kfd_kernel_queue.h"
+#include "kfd_priv.h"
+#include "kfd_pm4_opcodes.h"
+#include "cik_regs.h"
+#include "kfd_dbgmgr.h"
+#include "kfd_dbgdev.h"
+#include "kfd_device_queue_manager.h"
+#include "../../radeon/cik_reg.h"
+
+static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
+{
+	BUG_ON(!dev || !dev->kfd2kgd);
+
+	dev->kfd2kgd->address_watch_disable(dev->kgd);
+}
+
+static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
+				unsigned int pasid, uint64_t vmid0_address,
+				uint32_t *packet_buff, size_t size_in_bytes)
+{
+	struct pm4__release_mem *rm_packet;
+	struct pm4__indirect_buffer_pasid *ib_packet;
+	struct kfd_mem_obj *mem_obj;
+	size_t pq_packets_size_in_bytes;
+	union ULARGE_INTEGER *largep;
+	union ULARGE_INTEGER addr;
+	struct kernel_queue *kq;
+	uint64_t *rm_state;
+	unsigned int *ib_packet_buff;
+	int status;
+
+	BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes);
+
+	kq = dbgdev->kq;
+
+	pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
+				sizeof(struct pm4__indirect_buffer_pasid);
+
+	/*
+	 * We acquire a buffer from DIQ
+	 * The receive packet buff will be sitting on the Indirect Buffer
+	 * and in the PQ we put the IB packet + sync packet(s).
+	 */
+	status = kq->ops.acquire_packet_buffer(kq,
+				pq_packets_size_in_bytes / sizeof(uint32_t),
+				&ib_packet_buff);
+	if (status != 0) {
+		pr_err("amdkfd: acquire_packet_buffer failed\n");
+		return status;
+	}
+
+	memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
+
+	ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
+
+	ib_packet->header.count = 3;
+	ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
+	ib_packet->header.type = PM4_TYPE_3;
+
+	largep = (union ULARGE_INTEGER *) &vmid0_address;
+
+	ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
+	ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
+
+	ib_packet->control = (1 << 23) | (1 << 31) |
+			((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
+
+	ib_packet->bitfields5.pasid = pasid;
+
+	/*
+	 * for now we use release mem for GPU-CPU synchronization
+	 * Consider WaitRegMem + WriteData as a better alternative
+	 * we get a GART allocations ( gpu/cpu mapping),
+	 * for the sync variable, and wait until:
+	 * (a) Sync with HW
+	 * (b) Sync var is written by CP to mem.
+	 */
+	rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
+			(sizeof(struct pm4__indirect_buffer_pasid) /
+					sizeof(unsigned int)));
+
+	status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
+					&mem_obj);
+
+	if (status != 0) {
+		pr_err("amdkfd: Failed to allocate GART memory\n");
+		kq->ops.rollback_packet(kq);
+		return status;
+	}
+
+	rm_state = (uint64_t *) mem_obj->cpu_ptr;
+
+	*rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
+
+	rm_packet->header.opcode = IT_RELEASE_MEM;
+	rm_packet->header.type = PM4_TYPE_3;
+	rm_packet->header.count = sizeof(struct pm4__release_mem) /
+					sizeof(unsigned int) - 2;
+
+	rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+	rm_packet->bitfields2.event_index =
+				event_index___release_mem__end_of_pipe;
+
+	rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
+	rm_packet->bitfields2.atc = 0;
+	rm_packet->bitfields2.tc_wb_action_ena = 1;
+
+	addr.quad_part = mem_obj->gpu_addr;
+
+	rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
+	rm_packet->address_hi = addr.u.high_part;
+
+	rm_packet->bitfields3.data_sel =
+				data_sel___release_mem__send_64_bit_data;
+
+	rm_packet->bitfields3.int_sel =
+			int_sel___release_mem__send_data_after_write_confirm;
+
+	rm_packet->bitfields3.dst_sel =
+			dst_sel___release_mem__memory_controller;
+
+	rm_packet->data_lo = QUEUESTATE__ACTIVE;
+
+	kq->ops.submit_packet(kq);
+
+	/* Wait till CP writes sync code: */
+	status = amdkfd_fence_wait_timeout(
+			(unsigned int *) rm_state,
+			QUEUESTATE__ACTIVE, 1500);
+
+	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
+
+	return status;
+}
+
+static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
+{
+	BUG_ON(!dbgdev);
+
+	/*
+	 * no action is needed in this case,
+	 * just make sure diq will not be used
+	 */
+
+	dbgdev->kq = NULL;
+
+	return 0;
+}
+
+static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
+{
+	struct queue_properties properties;
+	unsigned int qid;
+	struct kernel_queue *kq = NULL;
+	int status;
+
+	BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev);
+
+	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
+				&properties, 0, KFD_QUEUE_TYPE_DIQ,
+				&qid);
+
+	if (status) {
+		pr_err("amdkfd: Failed to create DIQ\n");
+		return status;
+	}
+
+	pr_debug("DIQ Created with queue id: %d\n", qid);
+
+	kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
+
+	if (kq == NULL) {
+		pr_err("amdkfd: Error getting DIQ\n");
+		pqm_destroy_queue(dbgdev->pqm, qid);
+		return -EFAULT;
+	}
+
+	dbgdev->kq = kq;
+
+	return status;
+}
+
+static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
+{
+	BUG_ON(!dbgdev || !dbgdev->dev);
+
+	/* disable watch address */
+	dbgdev_address_watch_disable_nodiq(dbgdev->dev);
+	return 0;
+}
+
+static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
+{
+	/* todo - disable address watch */
+	int status;
+
+	BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq);
+
+	status = pqm_destroy_queue(dbgdev->pqm,
+			dbgdev->kq->queue->properties.queue_id);
+	dbgdev->kq = NULL;
+
+	return status;
+}
+
+static void dbgdev_address_watch_set_registers(
+			const struct dbg_address_watch_info *adw_info,
+			union TCP_WATCH_ADDR_H_BITS *addrHi,
+			union TCP_WATCH_ADDR_L_BITS *addrLo,
+			union TCP_WATCH_CNTL_BITS *cntl,
+			unsigned int index, unsigned int vmid)
+{
+	union ULARGE_INTEGER addr;
+
+	BUG_ON(!adw_info || !addrHi || !addrLo || !cntl);
+
+	addr.quad_part = 0;
+	addrHi->u32All = 0;
+	addrLo->u32All = 0;
+	cntl->u32All = 0;
+
+	if (adw_info->watch_mask != NULL)
+		cntl->bitfields.mask =
+			(uint32_t) (adw_info->watch_mask[index] &
+					ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
+	else
+		cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
+
+	addr.quad_part = (unsigned long long) adw_info->watch_address[index];
+
+	addrHi->bitfields.addr = addr.u.high_part &
+					ADDRESS_WATCH_REG_ADDHIGH_MASK;
+	addrLo->bitfields.addr =
+			(addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
+
+	cntl->bitfields.mode = adw_info->watch_mode[index];
+	cntl->bitfields.vmid = (uint32_t) vmid;
+	/* for now assume it is an ATC address */
+	cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
+
+	pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
+	pr_debug("\t\t%20s %08x\n", "set reg add high :",
+			addrHi->bitfields.addr);
+	pr_debug("\t\t%20s %08x\n", "set reg add low :",
+			addrLo->bitfields.addr);
+}
+
+static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
+					struct dbg_address_watch_info *adw_info)
+{
+	union TCP_WATCH_ADDR_H_BITS addrHi;
+	union TCP_WATCH_ADDR_L_BITS addrLo;
+	union TCP_WATCH_CNTL_BITS cntl;
+	struct kfd_process_device *pdd;
+	unsigned int i;
+
+	BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
+
+	/* taking the vmid for that process on the safe way using pdd */
+	pdd = kfd_get_process_device_data(dbgdev->dev,
+					adw_info->process);
+	if (!pdd) {
+		pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
+		return -EFAULT;
+	}
+
+	addrHi.u32All = 0;
+	addrLo.u32All = 0;
+	cntl.u32All = 0;
+
+	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
+			(adw_info->num_watch_points == 0)) {
+		pr_err("amdkfd: num_watch_points is invalid\n");
+		return -EINVAL;
+	}
+
+	if ((adw_info->watch_mode == NULL) ||
+		(adw_info->watch_address == NULL)) {
+		pr_err("amdkfd: adw_info fields are not valid\n");
+		return -EINVAL;
+	}
+
+	for (i = 0 ; i < adw_info->num_watch_points ; i++) {
+		dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
+						&cntl, i, pdd->qpd.vmid);
+
+		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+		pr_debug("\t\t%20s %08x\n", "register index :", i);
+		pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
+		pr_debug("\t\t%20s %08x\n", "Address Low is :",
+				addrLo.bitfields.addr);
+		pr_debug("\t\t%20s %08x\n", "Address high is :",
+				addrHi.bitfields.addr);
+		pr_debug("\t\t%20s %08x\n", "Address high is :",
+				addrHi.bitfields.addr);
+		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
+				cntl.bitfields.mask);
+		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
+				cntl.bitfields.mode);
+		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
+				cntl.bitfields.vmid);
+		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
+				cntl.bitfields.atc);
+		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+
+		pdd->dev->kfd2kgd->address_watch_execute(
+						dbgdev->dev->kgd,
+						i,
+						cntl.u32All,
+						addrHi.u32All,
+						addrLo.u32All);
+	}
+
+	return 0;
+}
+
+static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
+					struct dbg_address_watch_info *adw_info)
+{
+	struct pm4__set_config_reg *packets_vec;
+	union TCP_WATCH_ADDR_H_BITS addrHi;
+	union TCP_WATCH_ADDR_L_BITS addrLo;
+	union TCP_WATCH_CNTL_BITS cntl;
+	struct kfd_mem_obj *mem_obj;
+	unsigned int aw_reg_add_dword;
+	uint32_t *packet_buff_uint;
+	unsigned int i;
+	int status;
+	size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
+	/* we do not control the vmid in DIQ mode, just a place holder */
+	unsigned int vmid = 0;
+
+	BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
+
+	addrHi.u32All = 0;
+	addrLo.u32All = 0;
+	cntl.u32All = 0;
+
+	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
+			(adw_info->num_watch_points == 0)) {
+		pr_err("amdkfd: num_watch_points is invalid\n");
+		return -EINVAL;
+	}
+
+	if ((NULL == adw_info->watch_mode) ||
+			(NULL == adw_info->watch_address)) {
+		pr_err("amdkfd: adw_info fields are not valid\n");
+		return -EINVAL;
+	}
+
+	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
+
+	if (status != 0) {
+		pr_err("amdkfd: Failed to allocate GART memory\n");
+		return status;
+	}
+
+	packet_buff_uint = mem_obj->cpu_ptr;
+
+	memset(packet_buff_uint, 0, ib_size);
+
+	packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
+
+	packets_vec[0].header.count = 1;
+	packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
+	packets_vec[0].header.type = PM4_TYPE_3;
+	packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
+	packets_vec[0].bitfields2.insert_vmid = 1;
+	packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
+	packets_vec[1].bitfields2.insert_vmid = 0;
+	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
+	packets_vec[2].bitfields2.insert_vmid = 0;
+	packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
+	packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
+	packets_vec[3].bitfields2.insert_vmid = 1;
+
+	for (i = 0; i < adw_info->num_watch_points; i++) {
+		dbgdev_address_watch_set_registers(adw_info,
+						&addrHi,
+						&addrLo,
+						&cntl,
+						i,
+						vmid);
+
+		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+		pr_debug("\t\t%20s %08x\n", "register index :", i);
+		pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
+		pr_debug("\t\t%20s %p\n", "Add ptr is :",
+				adw_info->watch_address);
+		pr_debug("\t\t%20s %08llx\n", "Add     is :",
+				adw_info->watch_address[i]);
+		pr_debug("\t\t%20s %08x\n", "Address Low is :",
+				addrLo.bitfields.addr);
+		pr_debug("\t\t%20s %08x\n", "Address high is :",
+				addrHi.bitfields.addr);
+		pr_debug("\t\t%20s %08x\n", "Control Mask is :",
+				cntl.bitfields.mask);
+		pr_debug("\t\t%20s %08x\n", "Control Mode is :",
+				cntl.bitfields.mode);
+		pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
+				cntl.bitfields.vmid);
+		pr_debug("\t\t%20s %08x\n", "Control atc  is :",
+				cntl.bitfields.atc);
+		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+
+		aw_reg_add_dword =
+				dbgdev->dev->kfd2kgd->address_watch_get_offset(
+					dbgdev->dev->kgd,
+					i,
+					ADDRESS_WATCH_REG_CNTL);
+
+		aw_reg_add_dword /= sizeof(uint32_t);
+
+		packets_vec[0].bitfields2.reg_offset =
+					aw_reg_add_dword - CONFIG_REG_BASE;
+
+		packets_vec[0].reg_data[0] = cntl.u32All;
+
+		aw_reg_add_dword =
+				dbgdev->dev->kfd2kgd->address_watch_get_offset(
+					dbgdev->dev->kgd,
+					i,
+					ADDRESS_WATCH_REG_ADDR_HI);
+
+		aw_reg_add_dword /= sizeof(uint32_t);
+
+		packets_vec[1].bitfields2.reg_offset =
+					aw_reg_add_dword - CONFIG_REG_BASE;
+		packets_vec[1].reg_data[0] = addrHi.u32All;
+
+		aw_reg_add_dword =
+				dbgdev->dev->kfd2kgd->address_watch_get_offset(
+					dbgdev->dev->kgd,
+					i,
+					ADDRESS_WATCH_REG_ADDR_LO);
+
+		aw_reg_add_dword /= sizeof(uint32_t);
+
+		packets_vec[2].bitfields2.reg_offset =
+				aw_reg_add_dword - CONFIG_REG_BASE;
+		packets_vec[2].reg_data[0] = addrLo.u32All;
+
+		/* enable watch flag if address is not zero*/
+		if (adw_info->watch_address[i] > 0)
+			cntl.bitfields.valid = 1;
+		else
+			cntl.bitfields.valid = 0;
+
+		aw_reg_add_dword =
+				dbgdev->dev->kfd2kgd->address_watch_get_offset(
+					dbgdev->dev->kgd,
+					i,
+					ADDRESS_WATCH_REG_CNTL);
+
+		aw_reg_add_dword /= sizeof(uint32_t);
+
+		packets_vec[3].bitfields2.reg_offset =
+					aw_reg_add_dword - CONFIG_REG_BASE;
+		packets_vec[3].reg_data[0] = cntl.u32All;
+
+		status = dbgdev_diq_submit_ib(
+					dbgdev,
+					adw_info->process->pasid,
+					mem_obj->gpu_addr,
+					packet_buff_uint,
+					ib_size);
+
+		if (status != 0) {
+			pr_err("amdkfd: Failed to submit IB to DIQ\n");
+			break;
+		}
+	}
+
+	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
+	return status;
+}
+
+static int dbgdev_wave_control_set_registers(
+				struct dbg_wave_control_info *wac_info,
+				union SQ_CMD_BITS *in_reg_sq_cmd,
+				union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
+{
+	int status;
+	union SQ_CMD_BITS reg_sq_cmd;
+	union GRBM_GFX_INDEX_BITS reg_gfx_index;
+	struct HsaDbgWaveMsgAMDGen2 *pMsg;
+
+	BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index);
+
+	reg_sq_cmd.u32All = 0;
+	reg_gfx_index.u32All = 0;
+	pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
+
+	switch (wac_info->mode) {
+	/* Send command to single wave */
+	case HSA_DBG_WAVEMODE_SINGLE:
+		/*
+		 * Limit access to the process waves only,
+		 * by setting vmid check
+		 */
+		reg_sq_cmd.bits.check_vmid = 1;
+		reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
+		reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
+		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
+
+		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
+		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
+		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
+
+		break;
+
+	/* Send command to all waves with matching VMID */
+	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
+
+		reg_gfx_index.bits.sh_broadcast_writes = 1;
+		reg_gfx_index.bits.se_broadcast_writes = 1;
+		reg_gfx_index.bits.instance_broadcast_writes = 1;
+
+		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
+
+		break;
+
+	/* Send command to all CU waves with matching VMID */
+	case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
+
+		reg_sq_cmd.bits.check_vmid = 1;
+		reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
+
+		reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
+		reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
+		reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	switch (wac_info->operand) {
+	case HSA_DBG_WAVEOP_HALT:
+		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
+		break;
+
+	case HSA_DBG_WAVEOP_RESUME:
+		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
+		break;
+
+	case HSA_DBG_WAVEOP_KILL:
+		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
+		break;
+
+	case HSA_DBG_WAVEOP_DEBUG:
+		reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
+		break;
+
+	case HSA_DBG_WAVEOP_TRAP:
+		if (wac_info->trapId < MAX_TRAPID) {
+			reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
+			reg_sq_cmd.bits.trap_id = wac_info->trapId;
+		} else {
+			status = -EINVAL;
+		}
+		break;
+
+	default:
+		status = -EINVAL;
+		break;
+	}
+
+	if (status == 0) {
+		*in_reg_sq_cmd = reg_sq_cmd;
+		*in_reg_gfx_index = reg_gfx_index;
+	}
+
+	return status;
+}
+
+static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
+					struct dbg_wave_control_info *wac_info)
+{
+
+	int status;
+	union SQ_CMD_BITS reg_sq_cmd;
+	union GRBM_GFX_INDEX_BITS reg_gfx_index;
+	struct kfd_mem_obj *mem_obj;
+	uint32_t *packet_buff_uint;
+	struct pm4__set_config_reg *packets_vec;
+	size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
+
+	BUG_ON(!dbgdev || !wac_info);
+
+	reg_sq_cmd.u32All = 0;
+
+	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
+							&reg_gfx_index);
+	if (status) {
+		pr_err("amdkfd: Failed to set wave control registers\n");
+		return status;
+	}
+
+	/* we do not control the VMID in DIQ,so reset it to a known value */
+	reg_sq_cmd.bits.vm_id = 0;
+
+	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
+	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
+	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
+	pr_debug("\t\t msg value is: %u\n",
+			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
+	pr_debug("\t\t vmid      is: N/A\n");
+
+	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
+	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
+	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
+	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
+	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
+	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
+	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
+
+	pr_debug("\t\t ibw       is : %u\n",
+			reg_gfx_index.bitfields.instance_broadcast_writes);
+	pr_debug("\t\t ii        is : %u\n",
+			reg_gfx_index.bitfields.instance_index);
+	pr_debug("\t\t sebw      is : %u\n",
+			reg_gfx_index.bitfields.se_broadcast_writes);
+	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
+	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
+	pr_debug("\t\t sbw       is : %u\n",
+			reg_gfx_index.bitfields.sh_broadcast_writes);
+
+	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
+
+	if (status != 0) {
+		pr_err("amdkfd: Failed to allocate GART memory\n");
+		return status;
+	}
+
+	packet_buff_uint = mem_obj->cpu_ptr;
+
+	memset(packet_buff_uint, 0, ib_size);
+
+	packets_vec =  (struct pm4__set_config_reg *) packet_buff_uint;
+	packets_vec[0].header.count = 1;
+	packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
+	packets_vec[0].header.type = PM4_TYPE_3;
+	packets_vec[0].bitfields2.reg_offset =
+			GRBM_GFX_INDEX / (sizeof(uint32_t)) -
+				USERCONFIG_REG_BASE;
+
+	packets_vec[0].bitfields2.insert_vmid = 0;
+	packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
+
+	packets_vec[1].header.count = 1;
+	packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
+	packets_vec[1].header.type = PM4_TYPE_3;
+	packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
+						CONFIG_REG_BASE;
+
+	packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
+	packets_vec[1].bitfields2.insert_vmid = 1;
+	packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
+
+	/* Restore the GRBM_GFX_INDEX register */
+
+	reg_gfx_index.u32All = 0;
+	reg_gfx_index.bits.sh_broadcast_writes = 1;
+	reg_gfx_index.bits.instance_broadcast_writes = 1;
+	reg_gfx_index.bits.se_broadcast_writes = 1;
+
+
+	packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
+	packets_vec[2].bitfields2.reg_offset =
+				GRBM_GFX_INDEX / (sizeof(uint32_t)) -
+					USERCONFIG_REG_BASE;
+
+	packets_vec[2].bitfields2.insert_vmid = 0;
+	packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
+
+	status = dbgdev_diq_submit_ib(
+			dbgdev,
+			wac_info->process->pasid,
+			mem_obj->gpu_addr,
+			packet_buff_uint,
+			ib_size);
+
+	if (status != 0)
+		pr_err("amdkfd: Failed to submit IB to DIQ\n");
+
+	kfd_gtt_sa_free(dbgdev->dev, mem_obj);
+
+	return status;
+}
+
+static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
+					struct dbg_wave_control_info *wac_info)
+{
+	int status;
+	union SQ_CMD_BITS reg_sq_cmd;
+	union GRBM_GFX_INDEX_BITS reg_gfx_index;
+	struct kfd_process_device *pdd;
+
+	BUG_ON(!dbgdev || !dbgdev->dev || !wac_info);
+
+	reg_sq_cmd.u32All = 0;
+
+	/* taking the VMID for that process on the safe way using PDD */
+	pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
+
+	if (!pdd) {
+		pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
+		return -EFAULT;
+	}
+	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
+							&reg_gfx_index);
+	if (status) {
+		pr_err("amdkfd: Failed to set wave control registers\n");
+		return status;
+	}
+
+	/* for non DIQ we need to patch the VMID: */
+
+	reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
+
+	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+	pr_debug("\t\t mode      is: %u\n", wac_info->mode);
+	pr_debug("\t\t operand   is: %u\n", wac_info->operand);
+	pr_debug("\t\t trap id   is: %u\n", wac_info->trapId);
+	pr_debug("\t\t msg value is: %u\n",
+			wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
+	pr_debug("\t\t vmid      is: %u\n", pdd->qpd.vmid);
+
+	pr_debug("\t\t chk_vmid  is : %u\n", reg_sq_cmd.bitfields.check_vmid);
+	pr_debug("\t\t command   is : %u\n", reg_sq_cmd.bitfields.cmd);
+	pr_debug("\t\t queue id  is : %u\n", reg_sq_cmd.bitfields.queue_id);
+	pr_debug("\t\t simd id   is : %u\n", reg_sq_cmd.bitfields.simd_id);
+	pr_debug("\t\t mode      is : %u\n", reg_sq_cmd.bitfields.mode);
+	pr_debug("\t\t vm_id     is : %u\n", reg_sq_cmd.bitfields.vm_id);
+	pr_debug("\t\t wave_id   is : %u\n", reg_sq_cmd.bitfields.wave_id);
+
+	pr_debug("\t\t ibw       is : %u\n",
+			reg_gfx_index.bitfields.instance_broadcast_writes);
+	pr_debug("\t\t ii        is : %u\n",
+			reg_gfx_index.bitfields.instance_index);
+	pr_debug("\t\t sebw      is : %u\n",
+			reg_gfx_index.bitfields.se_broadcast_writes);
+	pr_debug("\t\t se_ind    is : %u\n", reg_gfx_index.bitfields.se_index);
+	pr_debug("\t\t sh_ind    is : %u\n", reg_gfx_index.bitfields.sh_index);
+	pr_debug("\t\t sbw       is : %u\n",
+			reg_gfx_index.bitfields.sh_broadcast_writes);
+
+	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+	return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
+							reg_gfx_index.u32All,
+							reg_sq_cmd.u32All);
+}
+
+int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
+{
+	int status = 0;
+	unsigned int vmid;
+	union SQ_CMD_BITS reg_sq_cmd;
+	union GRBM_GFX_INDEX_BITS reg_gfx_index;
+	struct kfd_process_device *pdd;
+	struct dbg_wave_control_info wac_info;
+	int temp;
+	int first_vmid_to_scan = 8;
+	int last_vmid_to_scan = 15;
+
+	first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1;
+	temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan;
+	last_vmid_to_scan = first_vmid_to_scan + ffz(temp);
+
+	reg_sq_cmd.u32All = 0;
+	status = 0;
+
+	wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
+	wac_info.operand = HSA_DBG_WAVEOP_KILL;
+
+	pr_debug("Killing all process wavefronts\n");
+
+	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
+	 * ATC_VMID15_PASID_MAPPING
+	 * to check which VMID the current process is mapped to. */
+
+	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
+		if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
+				(dev->kgd, vmid)) {
+			if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
+					(dev->kgd, vmid) == p->pasid) {
+				pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
+						vmid, p->pasid);
+				break;
+			}
+		}
+	}
+
+	if (vmid > last_vmid_to_scan) {
+		pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid);
+		return -EFAULT;
+	}
+
+	/* taking the VMID for that process on the safe way using PDD */
+	pdd = kfd_get_process_device_data(dev, p);
+	if (!pdd)
+		return -EFAULT;
+
+	status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
+			&reg_gfx_index);
+	if (status != 0)
+		return -EINVAL;
+
+	/* for non DIQ we need to patch the VMID: */
+	reg_sq_cmd.bits.vm_id = vmid;
+
+	dev->kfd2kgd->wave_control_execute(dev->kgd,
+					reg_gfx_index.u32All,
+					reg_sq_cmd.u32All);
+
+	return 0;
+}
+
+void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
+			enum DBGDEV_TYPE type)
+{
+	BUG_ON(!pdbgdev || !pdev);
+
+	pdbgdev->dev = pdev;
+	pdbgdev->kq = NULL;
+	pdbgdev->type = type;
+	pdbgdev->pqm = NULL;
+
+	switch (type) {
+	case DBGDEV_TYPE_NODIQ:
+		pdbgdev->dbgdev_register = dbgdev_register_nodiq;
+		pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
+		pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
+		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
+		break;
+	case DBGDEV_TYPE_DIQ:
+	default:
+		pdbgdev->dbgdev_register = dbgdev_register_diq;
+		pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
+		pdbgdev->dbgdev_wave_control =  dbgdev_wave_control_diq;
+		pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
+		break;
+	}
+
+}

+ 193 - 0
drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h

@@ -0,0 +1,193 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_DBGDEV_H_
+#define KFD_DBGDEV_H_
+
+enum {
+	SQ_CMD_VMID_OFFSET = 28,
+	ADDRESS_WATCH_CNTL_OFFSET = 24
+};
+
+enum {
+	PRIV_QUEUE_SYNC_TIME_MS = 200
+};
+
+/* CONTEXT reg space definition */
+enum {
+	CONTEXT_REG_BASE = 0xA000,
+	CONTEXT_REG_END = 0xA400,
+	CONTEXT_REG_SIZE = CONTEXT_REG_END - CONTEXT_REG_BASE
+};
+
+/* USER CONFIG reg space definition */
+enum {
+	USERCONFIG_REG_BASE = 0xC000,
+	USERCONFIG_REG_END = 0x10000,
+	USERCONFIG_REG_SIZE = USERCONFIG_REG_END - USERCONFIG_REG_BASE
+};
+
+/* CONFIG reg space definition */
+enum {
+	CONFIG_REG_BASE = 0x2000,	/* in dwords */
+	CONFIG_REG_END = 0x2B00,
+	CONFIG_REG_SIZE = CONFIG_REG_END - CONFIG_REG_BASE
+};
+
+/* SH reg space definition */
+enum {
+	SH_REG_BASE = 0x2C00,
+	SH_REG_END = 0x3000,
+	SH_REG_SIZE = SH_REG_END - SH_REG_BASE
+};
+
+enum SQ_IND_CMD_CMD {
+	SQ_IND_CMD_CMD_NULL = 0x00000000,
+	SQ_IND_CMD_CMD_HALT = 0x00000001,
+	SQ_IND_CMD_CMD_RESUME = 0x00000002,
+	SQ_IND_CMD_CMD_KILL = 0x00000003,
+	SQ_IND_CMD_CMD_DEBUG = 0x00000004,
+	SQ_IND_CMD_CMD_TRAP = 0x00000005,
+};
+
+enum SQ_IND_CMD_MODE {
+	SQ_IND_CMD_MODE_SINGLE = 0x00000000,
+	SQ_IND_CMD_MODE_BROADCAST = 0x00000001,
+	SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002,
+	SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003,
+	SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004,
+};
+
+union SQ_IND_INDEX_BITS {
+	struct {
+		uint32_t wave_id:4;
+		uint32_t simd_id:2;
+		uint32_t thread_id:6;
+		 uint32_t:1;
+		uint32_t force_read:1;
+		uint32_t read_timeout:1;
+		uint32_t unindexed:1;
+		uint32_t index:16;
+
+	} bitfields, bits;
+	uint32_t u32All;
+	signed int i32All;
+	float f32All;
+};
+
+union SQ_IND_CMD_BITS {
+	struct {
+		uint32_t data:32;
+	} bitfields, bits;
+	uint32_t u32All;
+	signed int i32All;
+	float f32All;
+};
+
+union SQ_CMD_BITS {
+	struct {
+		uint32_t cmd:3;
+		 uint32_t:1;
+		uint32_t mode:3;
+		uint32_t check_vmid:1;
+		uint32_t trap_id:3;
+		 uint32_t:5;
+		uint32_t wave_id:4;
+		uint32_t simd_id:2;
+		 uint32_t:2;
+		uint32_t queue_id:3;
+		 uint32_t:1;
+		uint32_t vm_id:4;
+	} bitfields, bits;
+	uint32_t u32All;
+	signed int i32All;
+	float f32All;
+};
+
+union SQ_IND_DATA_BITS {
+	struct {
+		uint32_t data:32;
+	} bitfields, bits;
+	uint32_t u32All;
+	signed int i32All;
+	float f32All;
+};
+
+union GRBM_GFX_INDEX_BITS {
+	struct {
+		uint32_t instance_index:8;
+		uint32_t sh_index:8;
+		uint32_t se_index:8;
+		 uint32_t:5;
+		uint32_t sh_broadcast_writes:1;
+		uint32_t instance_broadcast_writes:1;
+		uint32_t se_broadcast_writes:1;
+	} bitfields, bits;
+	uint32_t u32All;
+	signed int i32All;
+	float f32All;
+};
+
+union TCP_WATCH_ADDR_H_BITS {
+	struct {
+		uint32_t addr:16;
+		 uint32_t:16;
+
+	} bitfields, bits;
+	uint32_t u32All;
+	signed int i32All;
+	float f32All;
+};
+
+union TCP_WATCH_ADDR_L_BITS {
+	struct {
+		uint32_t:6;
+		uint32_t addr:26;
+	} bitfields, bits;
+	uint32_t u32All;
+	signed int i32All;
+	float f32All;
+};
+
+enum {
+	QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */
+	QUEUESTATE__ACTIVE_COMPLETION_PENDING,
+	QUEUESTATE__ACTIVE
+};
+
+union ULARGE_INTEGER {
+	struct {
+		uint32_t low_part;
+		uint32_t high_part;
+	} u;
+	unsigned long long quad_part;
+};
+
+
+#define KFD_CIK_VMID_START_OFFSET (8)
+#define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8))
+
+
+void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
+			enum DBGDEV_TYPE type);
+
+#endif	/* KFD_DBGDEV_H_ */

+ 168 - 0
drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c

@@ -0,0 +1,168 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+
+#include "kfd_priv.h"
+#include "cik_regs.h"
+#include "kfd_pm4_headers.h"
+#include "kfd_pm4_headers_diq.h"
+#include "kfd_dbgmgr.h"
+#include "kfd_dbgdev.h"
+
+static DEFINE_MUTEX(kfd_dbgmgr_mutex);
+
+struct mutex *kfd_get_dbgmgr_mutex(void)
+{
+	return &kfd_dbgmgr_mutex;
+}
+
+
+static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr)
+{
+	BUG_ON(!pmgr);
+
+	kfree(pmgr->dbgdev);
+
+	pmgr->dbgdev = NULL;
+	pmgr->pasid = 0;
+	pmgr->dev = NULL;
+}
+
+void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr)
+{
+	if (pmgr != NULL) {
+		kfd_dbgmgr_uninitialize(pmgr);
+		kfree(pmgr);
+	}
+}
+
+bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
+{
+	enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ;
+	struct kfd_dbgmgr *new_buff;
+
+	BUG_ON(pdev == NULL);
+	BUG_ON(!pdev->init_complete);
+
+	new_buff = kfd_alloc_struct(new_buff);
+	if (!new_buff) {
+		pr_err("amdkfd: Failed to allocate dbgmgr instance\n");
+		return false;
+	}
+
+	new_buff->pasid = 0;
+	new_buff->dev = pdev;
+	new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev);
+	if (!new_buff->dbgdev) {
+		pr_err("amdkfd: Failed to allocate dbgdev instance\n");
+		kfree(new_buff);
+		return false;
+	}
+
+	/* get actual type of DBGDevice cpsch or not */
+	if (sched_policy == KFD_SCHED_POLICY_NO_HWS)
+		type = DBGDEV_TYPE_NODIQ;
+
+	kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
+	*ppmgr = new_buff;
+
+	return true;
+}
+
+long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
+{
+	BUG_ON(!p || !pmgr || !pmgr->dbgdev);
+
+	if (pmgr->pasid != 0) {
+		pr_debug("H/W debugger is already active using pasid %d\n",
+				pmgr->pasid);
+		return -EBUSY;
+	}
+
+	/* remember pasid */
+	pmgr->pasid = p->pasid;
+
+	/* provide the pqm for diq generation */
+	pmgr->dbgdev->pqm = &p->pqm;
+
+	/* activate the actual registering */
+	pmgr->dbgdev->dbgdev_register(pmgr->dbgdev);
+
+	return 0;
+}
+
+long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
+{
+	BUG_ON(!p || !pmgr || !pmgr->dbgdev);
+
+	/* Is the requests coming from the already registered process? */
+	if (pmgr->pasid != p->pasid) {
+		pr_debug("H/W debugger is not registered by calling pasid %d\n",
+				p->pasid);
+		return -EINVAL;
+	}
+
+	pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev);
+
+	pmgr->pasid = 0;
+
+	return 0;
+}
+
+long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
+				struct dbg_wave_control_info *wac_info)
+{
+	BUG_ON(!pmgr || !pmgr->dbgdev || !wac_info);
+
+	/* Is the requests coming from the already registered process? */
+	if (pmgr->pasid != wac_info->process->pasid) {
+		pr_debug("H/W debugger support was not registered for requester pasid %d\n",
+				wac_info->process->pasid);
+		return -EINVAL;
+	}
+
+	return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info);
+}
+
+long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
+				struct dbg_address_watch_info *adw_info)
+{
+	BUG_ON(!pmgr || !pmgr->dbgdev || !adw_info);
+
+
+	/* Is the requests coming from the already registered process? */
+	if (pmgr->pasid != adw_info->process->pasid) {
+		pr_debug("H/W debugger support was not registered for requester pasid %d\n",
+				adw_info->process->pasid);
+		return -EINVAL;
+	}
+
+	return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev,
+							adw_info);
+}
+

+ 294 - 0
drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h

@@ -0,0 +1,294 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_DBGMGR_H_
+#define KFD_DBGMGR_H_
+
+#include "kfd_priv.h"
+
+/* must align with hsakmttypes definition */
+#pragma pack(push, 4)
+
+enum HSA_DBG_WAVEOP {
+	HSA_DBG_WAVEOP_HALT = 1,	/* Halts a wavefront		*/
+	HSA_DBG_WAVEOP_RESUME = 2,	/* Resumes a wavefront		*/
+	HSA_DBG_WAVEOP_KILL = 3,	/* Kills a wavefront		*/
+	HSA_DBG_WAVEOP_DEBUG = 4,	/* Causes wavefront to enter
+						debug mode		*/
+	HSA_DBG_WAVEOP_TRAP = 5,	/* Causes wavefront to take
+						a trap			*/
+	HSA_DBG_NUM_WAVEOP = 5,
+	HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF
+};
+
+enum HSA_DBG_WAVEMODE {
+	/* send command to a single wave */
+	HSA_DBG_WAVEMODE_SINGLE = 0,
+	/*
+	 * Broadcast to all wavefronts of all processes is not
+	 * supported for HSA user mode
+	 */
+
+	/* send to waves within current process */
+	HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2,
+	/* send to waves within current process on CU  */
+	HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3,
+	HSA_DBG_NUM_WAVEMODE = 3,
+	HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF
+};
+
+enum HSA_DBG_WAVEMSG_TYPE {
+	HSA_DBG_WAVEMSG_AUTO = 0,
+	HSA_DBG_WAVEMSG_USER = 1,
+	HSA_DBG_WAVEMSG_ERROR = 2,
+	HSA_DBG_NUM_WAVEMSG,
+	HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF
+};
+
+enum HSA_DBG_WATCH_MODE {
+	HSA_DBG_WATCH_READ = 0,		/* Read operations only */
+	HSA_DBG_WATCH_NONREAD = 1,	/* Write or Atomic operations only */
+	HSA_DBG_WATCH_ATOMIC = 2,	/* Atomic Operations only */
+	HSA_DBG_WATCH_ALL = 3,		/* Read, Write or Atomic operations */
+	HSA_DBG_WATCH_NUM,
+	HSA_DBG_WATCH_SIZE = 0xFFFFFFFF
+};
+
+/* This structure is hardware specific and may change in the future */
+struct HsaDbgWaveMsgAMDGen2 {
+	union {
+		struct ui32 {
+			uint32_t UserData:8;	/* user data */
+			uint32_t ShaderArray:1;	/* Shader array */
+			uint32_t Priv:1;	/* Privileged */
+			uint32_t Reserved0:4;	/* This field is reserved,
+						   should be 0 */
+			uint32_t WaveId:4;	/* wave id */
+			uint32_t SIMD:2;	/* SIMD id */
+			uint32_t HSACU:4;	/* Compute unit */
+			uint32_t ShaderEngine:2;/* Shader engine */
+			uint32_t MessageType:2;	/* see HSA_DBG_WAVEMSG_TYPE */
+			uint32_t Reserved1:4;	/* This field is reserved,
+						   should be 0 */
+		} ui32;
+		uint32_t Value;
+	};
+	uint32_t Reserved2;
+};
+
+union HsaDbgWaveMessageAMD {
+	struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2;
+	/* for future HsaDbgWaveMsgAMDGen3; */
+};
+
+struct HsaDbgWaveMessage {
+	void *MemoryVA;		/* ptr to associated host-accessible data */
+	union HsaDbgWaveMessageAMD DbgWaveMsg;
+};
+
+/*
+ * TODO: This definitions to be MOVED to kfd_event, once it is implemented.
+ *
+ * HSA sync primitive, Event and HW Exception notification API definitions.
+ * The API functions allow the runtime to define a so-called sync-primitive,
+ * a SW object combining a user-mode provided "syncvar" and a scheduler event
+ * that can be signaled through a defined GPU interrupt. A syncvar is
+ * a process virtual memory location of a certain size that can be accessed
+ * by CPU and GPU shader code within the process to set and query the content
+ * within that memory. The definition of the content is determined by the HSA
+ * runtime and potentially GPU shader code interfacing with the HSA runtime.
+ * The syncvar values may be commonly written through an PM4 WRITE_DATA packet
+ * in the user mode instruction stream. The OS scheduler event is typically
+ * associated and signaled by an interrupt issued by the GPU, but other HSA
+ * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced
+ * by the KFD by this mechanism, too. */
+
+/* these are the new definitions for events */
+enum HSA_EVENTTYPE {
+	HSA_EVENTTYPE_SIGNAL = 0,	/* user-mode generated GPU signal */
+	HSA_EVENTTYPE_NODECHANGE = 1,	/* HSA node change (attach/detach) */
+	HSA_EVENTTYPE_DEVICESTATECHANGE = 2,	/* HSA device state change
+						   (start/stop) */
+	HSA_EVENTTYPE_HW_EXCEPTION = 3,	/* GPU shader exception event */
+	HSA_EVENTTYPE_SYSTEM_EVENT = 4,	/* GPU SYSCALL with parameter info */
+	HSA_EVENTTYPE_DEBUG_EVENT = 5,	/* GPU signal for debugging */
+	HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */
+	HSA_EVENTTYPE_QUEUE_EVENT = 7,	/* GPU signal queue idle state
+					   (EOP pm4) */
+	/* ...  */
+	HSA_EVENTTYPE_MAXID,
+	HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF
+};
+
+/* Sub-definitions for various event types: Syncvar */
+struct HsaSyncVar {
+	union SyncVar {
+		void *UserData;	/* pointer to user mode data */
+		uint64_t UserDataPtrValue; /* 64bit compatibility of value */
+	} SyncVar;
+	uint64_t SyncVarSize;
+};
+
+/* Sub-definitions for various event types: NodeChange */
+
+enum HSA_EVENTTYPE_NODECHANGE_FLAGS {
+	HSA_EVENTTYPE_NODECHANGE_ADD = 0,
+	HSA_EVENTTYPE_NODECHANGE_REMOVE = 1,
+	HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF
+};
+
+struct HsaNodeChange {
+	/* HSA node added/removed on the platform */
+	enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags;
+};
+
+/* Sub-definitions for various event types: DeviceStateChange */
+enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS {
+	/* device started (and available) */
+	HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0,
+	/* device stopped (i.e. unavailable) */
+	HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1,
+	HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF
+};
+
+enum HSA_DEVICE {
+	HSA_DEVICE_CPU = 0,
+	HSA_DEVICE_GPU = 1,
+	MAX_HSA_DEVICE = 2
+};
+
+struct HsaDeviceStateChange {
+	uint32_t NodeId;	/* F-NUMA node that contains the device */
+	enum HSA_DEVICE Device;	/* device type: GPU or CPU */
+	enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */
+};
+
+struct HsaEventData {
+	enum HSA_EVENTTYPE EventType; /* event type */
+	union EventData {
+		/*
+		 * return data associated with HSA_EVENTTYPE_SIGNAL
+		 * and other events
+		 */
+		struct HsaSyncVar SyncVar;
+
+		/* data associated with HSA_EVENTTYPE_NODE_CHANGE */
+		struct HsaNodeChange NodeChangeState;
+
+		/* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */
+		struct HsaDeviceStateChange DeviceState;
+	} EventData;
+
+	/* the following data entries are internal to the KFD & thunk itself */
+
+	/* internal thunk store for Event data (OsEventHandle) */
+	uint64_t HWData1;
+	/* internal thunk store for Event data (HWAddress) */
+	uint64_t HWData2;
+	/* internal thunk store for Event data (HWData) */
+	uint32_t HWData3;
+};
+
+struct HsaEventDescriptor {
+	/* event type to allocate */
+	enum HSA_EVENTTYPE EventType;
+	/* H-NUMA node containing GPU device that is event source */
+	uint32_t NodeId;
+	/* pointer to user mode syncvar data, syncvar->UserDataPtrValue
+	 * may be NULL
+	 */
+	struct HsaSyncVar SyncVar;
+};
+
+struct HsaEvent {
+	uint32_t EventId;
+	struct HsaEventData EventData;
+};
+
+#pragma pack(pop)
+
+enum DBGDEV_TYPE {
+	DBGDEV_TYPE_ILLEGAL = 0,
+	DBGDEV_TYPE_NODIQ = 1,
+	DBGDEV_TYPE_DIQ = 2,
+	DBGDEV_TYPE_TEST = 3
+};
+
+struct dbg_address_watch_info {
+	struct kfd_process *process;
+	enum HSA_DBG_WATCH_MODE *watch_mode;
+	uint64_t *watch_address;
+	uint64_t *watch_mask;
+	struct HsaEvent *watch_event;
+	uint32_t num_watch_points;
+};
+
+struct dbg_wave_control_info {
+	struct kfd_process *process;
+	uint32_t trapId;
+	enum HSA_DBG_WAVEOP operand;
+	enum HSA_DBG_WAVEMODE mode;
+	struct HsaDbgWaveMessage dbgWave_msg;
+};
+
+struct kfd_dbgdev {
+
+	/* The device that owns this data. */
+	struct kfd_dev *dev;
+
+	/* kernel queue for DIQ */
+	struct kernel_queue *kq;
+
+	/* a pointer to the pqm of the calling process */
+	struct process_queue_manager *pqm;
+
+	/* type of debug device ( DIQ, non DIQ, etc. ) */
+	enum DBGDEV_TYPE type;
+
+	/* virtualized function pointers to device dbg */
+	int (*dbgdev_register)(struct kfd_dbgdev *dbgdev);
+	int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev);
+	int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev,
+				struct dbg_address_watch_info *adw_info);
+	int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev,
+				struct dbg_wave_control_info *wac_info);
+
+};
+
+struct kfd_dbgmgr {
+	unsigned int pasid;
+	struct kfd_dev *dev;
+	struct kfd_dbgdev *dbgdev;
+};
+
+/* prototypes for debug manager functions */
+struct mutex *kfd_get_dbgmgr_mutex(void);
+void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr);
+bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev);
+long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
+long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
+long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
+				struct dbg_wave_control_info *wac_info);
+long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
+			struct dbg_address_watch_info *adw_info);
+#endif /* KFD_DBGMGR_H_ */

+ 5 - 0
drivers/gpu/drm/amd/amdkfd/kfd_device.c

@@ -33,8 +33,11 @@
 static const struct kfd_device_info kaveri_device_info = {
 	.asic_family = CHIP_KAVERI,
 	.max_pasid_bits = 16,
+	/* max num of queues for KV.TODO should be a dynamic value */
+	.max_no_of_hqd	= 24,
 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
 	.event_interrupt_class = &event_interrupt_class_cik,
+	.num_of_watch_points = 4,
 	.mqd_size_aligned = MQD_SIZE_ALIGNED
 };
 
@@ -294,6 +297,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 		goto dqm_start_error;
 	}
 
+	kfd->dbgmgr = NULL;
+
 	kfd->init_complete = true;
 	dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
 		 kfd->pdev->device);

+ 39 - 9
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

@@ -45,7 +45,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
 
 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
-static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
+static int destroy_queues_cpsch(struct device_queue_manager *dqm,
+				bool preempt_static_queues, bool lock);
 
 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
 					struct queue *q,
@@ -775,7 +776,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
 
 	BUG_ON(!dqm);
 
-	destroy_queues_cpsch(dqm, true);
+	destroy_queues_cpsch(dqm, true, true);
 
 	list_for_each_entry(node, &dqm->queues, list) {
 		pdd = qpd_to_pdd(node->qpd);
@@ -829,7 +830,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
 	pr_debug("kfd: In %s\n", __func__);
 
 	mutex_lock(&dqm->lock);
-	destroy_queues_cpsch(dqm, false);
+	/* here we actually preempt the DIQ */
+	destroy_queues_cpsch(dqm, true, false);
 	list_del(&kq->list);
 	dqm->queue_count--;
 	qpd->is_debug = false;
@@ -913,7 +915,7 @@ out:
 	return retval;
 }
 
-static int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
+int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
 				unsigned int fence_value,
 				unsigned long timeout)
 {
@@ -935,13 +937,16 @@ static int destroy_sdma_queues(struct device_queue_manager *dqm,
 				unsigned int sdma_engine)
 {
 	return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
-			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
+			KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false,
 			sdma_engine);
 }
 
-static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
+static int destroy_queues_cpsch(struct device_queue_manager *dqm,
+				bool preempt_static_queues, bool lock)
 {
 	int retval;
+	enum kfd_preempt_type_filter preempt_type;
+	struct kfd_process *p;
 
 	BUG_ON(!dqm);
 
@@ -960,8 +965,12 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
 		destroy_sdma_queues(dqm, 1);
 	}
 
+	preempt_type = preempt_static_queues ?
+			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES :
+			KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES;
+
 	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
-			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
+			preempt_type, 0, false, 0);
 	if (retval != 0)
 		goto out;
 
@@ -969,8 +978,13 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
 	pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
 				KFD_FENCE_COMPLETED);
 	/* should be timed out */
-	amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
+	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
+	if (retval != 0) {
+		p = kfd_get_process(current);
+		p->reset_wavefronts = true;
+		goto out;
+	}
 	pm_release_ib(&dqm->packets);
 	dqm->active_runlist = false;
 
@@ -989,7 +1003,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
 	if (lock)
 		mutex_lock(&dqm->lock);
 
-	retval = destroy_queues_cpsch(dqm, false);
+	retval = destroy_queues_cpsch(dqm, false, false);
 	if (retval != 0) {
 		pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
 		goto out;
@@ -1024,13 +1038,27 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 {
 	int retval;
 	struct mqd_manager *mqd;
+	bool preempt_all_queues;
 
 	BUG_ON(!dqm || !qpd || !q);
 
+	preempt_all_queues = false;
+
 	retval = 0;
 
 	/* remove queue from list to prevent rescheduling after preemption */
 	mutex_lock(&dqm->lock);
+
+	if (qpd->is_debug) {
+		/*
+		 * error, currently we do not allow to destroy a queue
+		 * of a currently debugged process
+		 */
+		retval = -EBUSY;
+		goto failed_try_destroy_debugged_queue;
+
+	}
+
 	mqd = dqm->ops.get_mqd_manager(dqm,
 			get_mqd_type_from_queue_type(q->properties.type));
 	if (!mqd) {
@@ -1062,6 +1090,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 	return 0;
 
 failed:
+failed_try_destroy_debugged_queue:
+
 	mutex_unlock(&dqm->lock);
 	return retval;
 }

Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно