Browse Source

Merge branch 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull irq updates from Thomas Gleixner:
 "The irq department delivers:

   - Expand the generic infrastructure handling the irq migration on CPU
     hotplug and convert X86 over to it. (Thomas Gleixner)

     Aside of consolidating code this is a preparatory change for:

   - Finalizing the affinity management for multi-queue devices. The
     main change here is to shut down interrupts which are affine to a
     outgoing CPU and reenabling them when the CPU comes online again.
     That avoids moving interrupts pointlessly around and breaking and
     reestablishing affinities for no value. (Christoph Hellwig)

     Note: This contains also the BLOCK-MQ and NVME changes which depend
     on the rework of the irq core infrastructure. Jens acked them and
     agreed that they should go with the irq changes.

   - Consolidation of irq domain code (Marc Zyngier)

   - State tracking consolidation in the core code (Jeffy Chen)

   - Add debug infrastructure for hierarchical irq domains (Thomas
     Gleixner)

   - Infrastructure enhancement for managing generic interrupt chips via
     devmem (Bartosz Golaszewski)

   - Constification work all over the place (Tobias Klauser)

   - Two new interrupt controller drivers for MVEBU (Thomas Petazzoni)

   - The usual set of fixes, updates and enhancements all over the
     place"

* 'irq-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (112 commits)
  irqchip/or1k-pic: Fix interrupt acknowledgement
  irqchip/irq-mvebu-gicp: Allocate enough memory for spi_bitmap
  irqchip/gic-v3: Fix out-of-bound access in gic_set_affinity
  nvme: Allocate queues for all possible CPUs
  blk-mq: Create hctx for each present CPU
  blk-mq: Include all present CPUs in the default queue mapping
  genirq: Avoid unnecessary low level irq function calls
  genirq: Set irq masked state when initializing irq_desc
  genirq/timings: Add infrastructure for estimating the next interrupt arrival time
  genirq/timings: Add infrastructure to track the interrupt timings
  genirq/debugfs: Remove pointless NULL pointer check
  irqchip/gic-v3-its: Don't assume GICv3 hardware supports 16bit INTID
  irqchip/gic-v3-its: Add ACPI NUMA node mapping
  irqchip/gic-v3-its-platform-msi: Make of_device_ids const
  irqchip/gic-v3-its: Make of_device_ids const
  irqchip/irq-mvebu-icu: Add new driver for Marvell ICU
  irqchip/irq-mvebu-gicp: Add new driver for Marvell GICP
  dt-bindings/interrupt-controller: Add DT binding for the Marvell ICU
  genirq/irqdomain: Remove auto-recursive hierarchy support
  irqchip/MSI: Use irq_domain_update_bus_token instead of an open coded access
  ...
Linus Torvalds 8 years ago
parent
commit
03ffbcdd78
86 changed files with 3342 additions and 709 deletions
  1. 39 2
      Documentation/IRQ-domain.txt
  2. 5 2
      Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-nmi.txt
  3. 25 0
      Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-i2c-ic.txt
  4. 5 4
      Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-vic.txt
  5. 27 0
      Documentation/devicetree/bindings/interrupt-controller/marvell,gicp.txt
  6. 51 0
      Documentation/devicetree/bindings/interrupt-controller/marvell,icu.txt
  7. 2 0
      Documentation/driver-model/devres.txt
  8. 2 0
      arch/x86/Kconfig
  9. 11 25
      arch/x86/include/asm/apic.h
  10. 0 1
      arch/x86/include/asm/irq.h
  11. 2 1
      arch/x86/include/asm/irq_remapping.h
  12. 22 13
      arch/x86/kernel/apic/apic.c
  13. 2 2
      arch/x86/kernel/apic/apic_flat_64.c
  14. 1 1
      arch/x86/kernel/apic/apic_noop.c
  15. 2 2
      arch/x86/kernel/apic/apic_numachip.c
  16. 1 1
      arch/x86/kernel/apic/bigsmp_32.c
  17. 15 4
      arch/x86/kernel/apic/htirq.c
  18. 20 2
      arch/x86/kernel/apic/io_apic.c
  19. 43 10
      arch/x86/kernel/apic/msi.c
  20. 1 1
      arch/x86/kernel/apic/probe_32.c
  21. 35 14
      arch/x86/kernel/apic/vector.c
  22. 16 20
      arch/x86/kernel/apic/x2apic_cluster.c
  23. 1 1
      arch/x86/kernel/apic/x2apic_phys.c
  24. 7 19
      arch/x86/kernel/apic/x2apic_uv_x.c
  25. 2 76
      arch/x86/kernel/irq.c
  26. 13 5
      arch/x86/platform/uv/uv_irq.c
  27. 1 1
      arch/x86/xen/apic.c
  28. 4 6
      block/blk-mq-cpumap.c
  29. 11 109
      block/blk-mq.c
  30. 0 5
      block/blk-mq.h
  31. 1 1
      drivers/base/platform-msi.c
  32. 15 7
      drivers/iommu/amd_iommu.c
  33. 21 10
      drivers/iommu/intel_irq_remapping.c
  34. 6 0
      drivers/irqchip/Kconfig
  35. 3 1
      drivers/irqchip/Makefile
  36. 130 18
      drivers/irqchip/irq-armada-370-xp.c
  37. 115 0
      drivers/irqchip/irq-aspeed-i2c-ic.c
  38. 3 2
      drivers/irqchip/irq-aspeed-vic.c
  39. 1 1
      drivers/irqchip/irq-gic-v2m.c
  40. 17 18
      drivers/irqchip/irq-gic-v3-its-pci-msi.c
  41. 1 1
      drivers/irqchip/irq-gic-v3-its-platform-msi.c
  42. 93 20
      drivers/irqchip/irq-gic-v3-its.c
  43. 3 0
      drivers/irqchip/irq-gic-v3.c
  44. 1 1
      drivers/irqchip/irq-i8259.c
  45. 1 1
      drivers/irqchip/irq-imx-gpcv2.c
  46. 1 1
      drivers/irqchip/irq-mbigen.c
  47. 1 1
      drivers/irqchip/irq-mips-cpu.c
  48. 2 2
      drivers/irqchip/irq-mips-gic.c
  49. 279 0
      drivers/irqchip/irq-mvebu-gicp.c
  50. 11 0
      drivers/irqchip/irq-mvebu-gicp.h
  51. 289 0
      drivers/irqchip/irq-mvebu-icu.c
  52. 1 1
      drivers/irqchip/irq-or1k-pic.c
  53. 1 1
      drivers/irqchip/irq-renesas-h8300h.c
  54. 1 1
      drivers/irqchip/irq-renesas-h8s.c
  55. 54 14
      drivers/irqchip/irq-sunxi-nmi.c
  56. 1 6
      drivers/irqchip/qcom-irq-combiner.c
  57. 1 1
      drivers/nvme/host/pci.c
  58. 7 1
      drivers/pci/host/vmd.c
  59. 1 1
      drivers/pci/msi.c
  60. 1 1
      drivers/staging/fsl-mc/bus/fsl-mc-msi.c
  61. 5 1
      drivers/xen/events/events_base.c
  62. 15 0
      include/dt-bindings/interrupt-controller/mvebu-icu.h
  63. 1 1
      include/linux/cpuhotplug.h
  64. 6 0
      include/linux/interrupt.h
  65. 89 0
      include/linux/irq.h
  66. 4 0
      include/linux/irqdesc.h
  67. 39 4
      include/linux/irqdomain.h
  68. 5 0
      kernel/cpu.c
  69. 18 0
      kernel/irq/Kconfig
  70. 2 0
      kernel/irq/Makefile
  71. 63 13
      kernel/irq/affinity.c
  72. 2 2
      kernel/irq/autoprobe.c
  73. 157 38
      kernel/irq/chip.c
  74. 126 24
      kernel/irq/cpuhotplug.c
  75. 213 0
      kernel/irq/debugfs.c
  76. 86 0
      kernel/irq/devres.c
  77. 3 4
      kernel/irq/generic-chip.c
  78. 2 0
      kernel/irq/handle.c
  79. 223 2
      kernel/irq/internals.h
  80. 26 10
      kernel/irq/irqdesc.c
  81. 279 80
      kernel/irq/irqdomain.c
  82. 47 72
      kernel/irq/manage.c
  83. 30 0
      kernel/irq/migration.c
  84. 10 3
      kernel/irq/msi.c
  85. 94 16
      kernel/irq/proc.c
  86. 369 0
      kernel/irq/timings.c

+ 39 - 2
Documentation/IRQ-domain.txt

@@ -231,5 +231,42 @@ needs to:
 4) No need to implement irq_domain_ops.map and irq_domain_ops.unmap,
    they are unused with hierarchy irq_domain.
 
-Hierarchy irq_domain may also be used to support other architectures,
-such as ARM, ARM64 etc.
+Hierarchy irq_domain is in no way x86 specific, and is heavily used to
+support other architectures, such as ARM, ARM64 etc.
+
+=== Debugging ===
+
+If you switch on CONFIG_IRQ_DOMAIN_DEBUG (which depends on
+CONFIG_IRQ_DOMAIN and CONFIG_DEBUG_FS), you will find a new file in
+your debugfs mount point, called irq_domain_mapping. This file
+contains a live snapshot of all the IRQ domains in the system:
+
+ name              mapped  linear-max  direct-max  devtree-node
+ pl061                  8           8           0  /smb/gpio@e0080000
+ pl061                  8           8           0  /smb/gpio@e1050000
+ pMSI                   0           0           0  /interrupt-controller@e1101000/v2m@e0080000
+ MSI                   37           0           0  /interrupt-controller@e1101000/v2m@e0080000
+ GICv2m                37           0           0  /interrupt-controller@e1101000/v2m@e0080000
+ GICv2                448         448           0  /interrupt-controller@e1101000
+
+it also iterates over the interrupts to display their mapping in the
+domains, and makes the domain stacking visible:
+
+
+irq    hwirq    chip name        chip data           active  type            domain
+    1  0x00019  GICv2            0xffff00000916bfd8     *    LINEAR          GICv2
+    2  0x0001d  GICv2            0xffff00000916bfd8          LINEAR          GICv2
+    3  0x0001e  GICv2            0xffff00000916bfd8     *    LINEAR          GICv2
+    4  0x0001b  GICv2            0xffff00000916bfd8     *    LINEAR          GICv2
+    5  0x0001a  GICv2            0xffff00000916bfd8          LINEAR          GICv2
+[...]
+   96  0x81808  MSI              0x          (null)           RADIX          MSI
+   96+ 0x00063  GICv2m           0xffff8003ee116980           RADIX          GICv2m
+   96+ 0x00063  GICv2            0xffff00000916bfd8          LINEAR          GICv2
+   97  0x08800  MSI              0x          (null)     *     RADIX          MSI
+   97+ 0x00064  GICv2m           0xffff8003ee116980     *     RADIX          GICv2m
+   97+ 0x00064  GICv2            0xffff00000916bfd8     *    LINEAR          GICv2
+
+Here, interrupts 1-5 are only using a single domain, while 96 and 97
+are build out of a stack of three domain, each level performing a
+particular function.

+ 5 - 2
Documentation/devicetree/bindings/interrupt-controller/allwinner,sunxi-nmi.txt

@@ -3,8 +3,11 @@ Allwinner Sunxi NMI Controller
 
 Required properties:
 
-- compatible : should be "allwinner,sun7i-a20-sc-nmi" or
-  "allwinner,sun6i-a31-sc-nmi" or "allwinner,sun9i-a80-nmi"
+- compatible : should be one of the following:
+  - "allwinner,sun7i-a20-sc-nmi"
+  - "allwinner,sun6i-a31-sc-nmi" (deprecated)
+  - "allwinner,sun6i-a31-r-intc"
+  - "allwinner,sun9i-a80-nmi"
 - reg : Specifies base physical address and size of the registers.
 - interrupt-controller : Identifies the node as an interrupt controller
 - #interrupt-cells : Specifies the number of cells needed to encode an

+ 25 - 0
Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-i2c-ic.txt

@@ -0,0 +1,25 @@
+Device tree configuration for the I2C Interrupt Controller on the AST24XX and
+AST25XX SoCs.
+
+Required Properties:
+- #address-cells	: should be 1
+- #size-cells 		: should be 1
+- #interrupt-cells 	: should be 1
+- compatible 		: should be "aspeed,ast2400-i2c-ic"
+			  or "aspeed,ast2500-i2c-ic"
+- reg			: address start and range of controller
+- interrupts		: interrupt number
+- interrupt-controller	: denotes that the controller receives and fires
+			  new interrupts for child busses
+
+Example:
+
+i2c_ic: interrupt-controller@0 {
+	#address-cells = <1>;
+	#size-cells = <1>;
+	#interrupt-cells = <1>;
+	compatible = "aspeed,ast2400-i2c-ic";
+	reg = <0x0 0x40>;
+	interrupts = <12>;
+	interrupt-controller;
+};

+ 5 - 4
Documentation/devicetree/bindings/interrupt-controller/aspeed,ast2400-vic.txt

@@ -1,12 +1,13 @@
 Aspeed Vectored Interrupt Controller
 
-These bindings are for the Aspeed AST2400 interrupt controller register layout.
-The SoC has an legacy register layout, but this driver does not support that
-mode of operation.
+These bindings are for the Aspeed interrupt controller. The AST2400 and
+AST2500 SoC families include a legacy register layout before a re-designed
+layout, but the bindings do not prescribe the use of one or the other.
 
 Required properties:
 
-- compatible : should be "aspeed,ast2400-vic".
+- compatible : "aspeed,ast2400-vic"
+               "aspeed,ast2500-vic"
 
 - interrupt-controller : Identifies the node as an interrupt controller
 - #interrupt-cells : Specifies the number of cells needed to encode an

+ 27 - 0
Documentation/devicetree/bindings/interrupt-controller/marvell,gicp.txt

@@ -0,0 +1,27 @@
+Marvell GICP Controller
+-----------------------
+
+GICP is a Marvell extension of the GIC that allows to trigger GIC SPI
+interrupts by doing a memory transaction. It is used by the ICU
+located in the Marvell CP110 to turn wired interrupts inside the CP
+into GIC SPI interrupts.
+
+Required properties:
+
+- compatible: Must be "marvell,ap806-gicp"
+
+- reg: Must be the address and size of the GICP SPI registers
+
+- marvell,spi-ranges: tuples of GIC SPI interrupts ranges available
+  for this GICP
+
+- msi-controller: indicates that this is an MSI controller
+
+Example:
+
+gicp_spi: gicp-spi@3f0040 {
+	compatible = "marvell,ap806-gicp";
+	reg = <0x3f0040 0x10>;
+	marvell,spi-ranges = <64 64>, <288 64>;
+	msi-controller;
+};

+ 51 - 0
Documentation/devicetree/bindings/interrupt-controller/marvell,icu.txt

@@ -0,0 +1,51 @@
+Marvell ICU Interrupt Controller
+--------------------------------
+
+The Marvell ICU (Interrupt Consolidation Unit) controller is
+responsible for collecting all wired-interrupt sources in the CP and
+communicating them to the GIC in the AP, the unit translates interrupt
+requests on input wires to MSG memory mapped transactions to the GIC.
+
+Required properties:
+
+- compatible: Should be "marvell,cp110-icu"
+
+- reg: Should contain ICU registers location and length.
+
+- #interrupt-cells: Specifies the number of cells needed to encode an
+  interrupt source. The value shall be 3.
+
+  The 1st cell is the group type of the ICU interrupt. Possible group
+  types are:
+
+   ICU_GRP_NSR (0x0) : Shared peripheral interrupt, non-secure
+   ICU_GRP_SR  (0x1) : Shared peripheral interrupt, secure
+   ICU_GRP_SEI (0x4) : System error interrupt
+   ICU_GRP_REI (0x5) : RAM error interrupt
+
+  The 2nd cell is the index of the interrupt in the ICU unit.
+
+  The 3rd cell is the type of the interrupt. See arm,gic.txt for
+  details.
+
+- interrupt-controller: Identifies the node as an interrupt
+  controller.
+
+- msi-parent: Should point to the GICP controller, the GIC extension
+  that allows to trigger interrupts using MSG memory mapped
+  transactions.
+
+Example:
+
+icu: interrupt-controller@1e0000 {
+	compatible = "marvell,cp110-icu";
+	reg = <0x1e0000 0x10>;
+	#interrupt-cells = <3>;
+	interrupt-controller;
+	msi-parent = <&gicp>;
+};
+
+usb3h0: usb3@500000 {
+	interrupt-parent = <&icu>;
+	interrupts = <ICU_GRP_NSR 106 IRQ_TYPE_LEVEL_HIGH>;
+};

+ 2 - 0
Documentation/driver-model/devres.txt

@@ -311,6 +311,8 @@ IRQ
   devm_irq_alloc_desc_at()
   devm_irq_alloc_desc_from()
   devm_irq_alloc_descs_from()
+  devm_irq_alloc_generic_chip()
+  devm_irq_setup_generic_chip()
 
 LED
   devm_led_classdev_register()

+ 2 - 0
arch/x86/Kconfig

@@ -87,6 +87,8 @@ config X86
 	select GENERIC_EARLY_IOREMAP
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_IOMAP
+	select GENERIC_IRQ_EFFECTIVE_AFF_MASK	if SMP
+	select GENERIC_IRQ_MIGRATION		if SMP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
 	select GENERIC_PENDING_IRQ		if SMP

+ 11 - 25
arch/x86/include/asm/apic.h

@@ -252,6 +252,8 @@ static inline int x2apic_enabled(void) { return 0; }
 #define	x2apic_supported()	(0)
 #endif /* !CONFIG_X86_X2APIC */
 
+struct irq_data;
+
 /*
  * Copyright 2004 James Cleverdon, IBM.
  * Subject to the GNU Public License, v.2
@@ -296,9 +298,9 @@ struct apic {
 	/* Can't be NULL on 64-bit */
 	unsigned long (*set_apic_id)(unsigned int id);
 
-	int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
-				      const struct cpumask *andmask,
-				      unsigned int *apicid);
+	int (*cpu_mask_to_apicid)(const struct cpumask *cpumask,
+				  struct irq_data *irqdata,
+				  unsigned int *apicid);
 
 	/* ipi */
 	void (*send_IPI)(int cpu, int vector);
@@ -540,28 +542,12 @@ static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
 
 #endif
 
-static inline int
-flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			    const struct cpumask *andmask,
-			    unsigned int *apicid)
-{
-	unsigned long cpu_mask = cpumask_bits(cpumask)[0] &
-				 cpumask_bits(andmask)[0] &
-				 cpumask_bits(cpu_online_mask)[0] &
-				 APIC_ALL_CPUS;
-
-	if (likely(cpu_mask)) {
-		*apicid = (unsigned int)cpu_mask;
-		return 0;
-	} else {
-		return -EINVAL;
-	}
-}
-
-extern int
-default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			       const struct cpumask *andmask,
-			       unsigned int *apicid);
+extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask,
+				   struct irq_data *irqdata,
+				   unsigned int *apicid);
+extern int default_cpu_mask_to_apicid(const struct cpumask *cpumask,
+				      struct irq_data *irqdata,
+				      unsigned int *apicid);
 
 static inline void
 flat_vector_allocation_domain(int cpu, struct cpumask *retmask,

+ 0 - 1
arch/x86/include/asm/irq.h

@@ -29,7 +29,6 @@ struct irq_desc;
 #include <linux/cpumask.h>
 extern int check_irq_vectors_for_cpu_disable(void);
 extern void fixup_irqs(void);
-extern void irq_force_complete_move(struct irq_desc *desc);
 #endif
 
 #ifdef CONFIG_HAVE_KVM

+ 2 - 1
arch/x86/include/asm/irq_remapping.h

@@ -55,7 +55,8 @@ extern struct irq_domain *
 irq_remapping_get_irq_domain(struct irq_alloc_info *info);
 
 /* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */
-extern struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent);
+extern struct irq_domain *
+arch_create_remap_msi_irq_domain(struct irq_domain *par, const char *n, int id);
 
 /* Get parent irqdomain for interrupt remapping irqdomain */
 static inline struct irq_domain *arch_get_ir_parent_domain(void)

+ 22 - 13
arch/x86/kernel/apic/apic.c

@@ -2201,23 +2201,32 @@ void default_init_apic_ldr(void)
 	apic_write(APIC_LDR, val);
 }
 
-int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-				   const struct cpumask *andmask,
-				   unsigned int *apicid)
+int default_cpu_mask_to_apicid(const struct cpumask *mask,
+			       struct irq_data *irqdata,
+			       unsigned int *apicid)
 {
-	unsigned int cpu;
+	unsigned int cpu = cpumask_first(mask);
 
-	for_each_cpu_and(cpu, cpumask, andmask) {
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
-	}
+	if (cpu >= nr_cpu_ids)
+		return -EINVAL;
+	*apicid = per_cpu(x86_cpu_to_apicid, cpu);
+	irq_data_update_effective_affinity(irqdata, cpumask_of(cpu));
+	return 0;
+}
 
-	if (likely(cpu < nr_cpu_ids)) {
-		*apicid = per_cpu(x86_cpu_to_apicid, cpu);
-		return 0;
-	}
+int flat_cpu_mask_to_apicid(const struct cpumask *mask,
+			    struct irq_data *irqdata,
+			    unsigned int *apicid)
 
-	return -EINVAL;
+{
+	struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata);
+	unsigned long cpu_mask = cpumask_bits(mask)[0] & APIC_ALL_CPUS;
+
+	if (!cpu_mask)
+		return -EINVAL;
+	*apicid = (unsigned int)cpu_mask;
+	cpumask_bits(effmsk)[0] = cpu_mask;
+	return 0;
 }
 
 /*

+ 2 - 2
arch/x86/kernel/apic/apic_flat_64.c

@@ -172,7 +172,7 @@ static struct apic apic_flat __ro_after_init = {
 	.get_apic_id			= flat_get_apic_id,
 	.set_apic_id			= set_apic_id,
 
-	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
 
 	.send_IPI			= default_send_IPI_single,
 	.send_IPI_mask			= flat_send_IPI_mask,
@@ -268,7 +268,7 @@ static struct apic apic_physflat __ro_after_init = {
 	.get_apic_id			= flat_get_apic_id,
 	.set_apic_id			= set_apic_id,
 
-	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
 
 	.send_IPI			= default_send_IPI_single_phys,
 	.send_IPI_mask			= default_send_IPI_mask_sequence_phys,

+ 1 - 1
arch/x86/kernel/apic/apic_noop.c

@@ -141,7 +141,7 @@ struct apic apic_noop __ro_after_init = {
 	.get_apic_id			= noop_get_apic_id,
 	.set_apic_id			= NULL,
 
-	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
 
 	.send_IPI			= noop_send_IPI,
 	.send_IPI_mask			= noop_send_IPI_mask,

+ 2 - 2
arch/x86/kernel/apic/apic_numachip.c

@@ -267,7 +267,7 @@ static const struct apic apic_numachip1 __refconst = {
 	.get_apic_id			= numachip1_get_apic_id,
 	.set_apic_id			= numachip1_set_apic_id,
 
-	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
 
 	.send_IPI			= numachip_send_IPI_one,
 	.send_IPI_mask			= numachip_send_IPI_mask,
@@ -318,7 +318,7 @@ static const struct apic apic_numachip2 __refconst = {
 	.get_apic_id			= numachip2_get_apic_id,
 	.set_apic_id			= numachip2_set_apic_id,
 
-	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
 
 	.send_IPI			= numachip_send_IPI_one,
 	.send_IPI_mask			= numachip_send_IPI_mask,

+ 1 - 1
arch/x86/kernel/apic/bigsmp_32.c

@@ -172,7 +172,7 @@ static struct apic apic_bigsmp __ro_after_init = {
 	.get_apic_id			= bigsmp_get_apic_id,
 	.set_apic_id			= NULL,
 
-	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
 
 	.send_IPI			= default_send_IPI_single_phys,
 	.send_IPI_mask			= default_send_IPI_mask_sequence_phys,

+ 15 - 4
arch/x86/kernel/apic/htirq.c

@@ -152,14 +152,25 @@ static const struct irq_domain_ops htirq_domain_ops = {
 
 void __init arch_init_htirq_domain(struct irq_domain *parent)
 {
+	struct fwnode_handle *fn;
+
 	if (disable_apic)
 		return;
 
-	htirq_domain = irq_domain_add_tree(NULL, &htirq_domain_ops, NULL);
+	fn = irq_domain_alloc_named_fwnode("PCI-HT");
+	if (!fn)
+		goto warn;
+
+	htirq_domain = irq_domain_create_tree(fn, &htirq_domain_ops, NULL);
+	irq_domain_free_fwnode(fn);
 	if (!htirq_domain)
-		pr_warn("failed to initialize irqdomain for HTIRQ.\n");
-	else
-		htirq_domain->parent = parent;
+		goto warn;
+
+	htirq_domain->parent = parent;
+	return;
+
+warn:
+	pr_warn("Failed to initialize irqdomain for HTIRQ.\n");
 }
 
 int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,

+ 20 - 2
arch/x86/kernel/apic/io_apic.c

@@ -2201,6 +2201,8 @@ static int mp_irqdomain_create(int ioapic)
 	struct ioapic *ip = &ioapics[ioapic];
 	struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg;
 	struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+	struct fwnode_handle *fn;
+	char *name = "IO-APIC";
 
 	if (cfg->type == IOAPIC_DOMAIN_INVALID)
 		return 0;
@@ -2211,9 +2213,25 @@ static int mp_irqdomain_create(int ioapic)
 	parent = irq_remapping_get_ir_irq_domain(&info);
 	if (!parent)
 		parent = x86_vector_domain;
+	else
+		name = "IO-APIC-IR";
+
+	/* Handle device tree enumerated APICs proper */
+	if (cfg->dev) {
+		fn = of_node_to_fwnode(cfg->dev);
+	} else {
+		fn = irq_domain_alloc_named_id_fwnode(name, ioapic);
+		if (!fn)
+			return -ENOMEM;
+	}
+
+	ip->irqdomain = irq_domain_create_linear(fn, hwirqs, cfg->ops,
+						 (void *)(long)ioapic);
+
+	/* Release fw handle if it was allocated above */
+	if (!cfg->dev)
+		irq_domain_free_fwnode(fn);
 
-	ip->irqdomain = irq_domain_add_linear(cfg->dev, hwirqs, cfg->ops,
-					      (void *)(long)ioapic);
 	if (!ip->irqdomain)
 		return -ENOMEM;
 

+ 43 - 10
arch/x86/kernel/apic/msi.c

@@ -138,11 +138,18 @@ static struct msi_domain_info pci_msi_domain_info = {
 
 void __init arch_init_msi_domain(struct irq_domain *parent)
 {
+	struct fwnode_handle *fn;
+
 	if (disable_apic)
 		return;
 
-	msi_default_domain = pci_msi_create_irq_domain(NULL,
-					&pci_msi_domain_info, parent);
+	fn = irq_domain_alloc_named_fwnode("PCI-MSI");
+	if (fn) {
+		msi_default_domain =
+			pci_msi_create_irq_domain(fn, &pci_msi_domain_info,
+						  parent);
+		irq_domain_free_fwnode(fn);
+	}
 	if (!msi_default_domain)
 		pr_warn("failed to initialize irqdomain for MSI/MSI-x.\n");
 }
@@ -167,9 +174,18 @@ static struct msi_domain_info pci_msi_ir_domain_info = {
 	.handler_name	= "edge",
 };
 
-struct irq_domain *arch_create_msi_irq_domain(struct irq_domain *parent)
+struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent,
+						    const char *name, int id)
 {
-	return pci_msi_create_irq_domain(NULL, &pci_msi_ir_domain_info, parent);
+	struct fwnode_handle *fn;
+	struct irq_domain *d;
+
+	fn = irq_domain_alloc_named_id_fwnode(name, id);
+	if (!fn)
+		return NULL;
+	d = pci_msi_create_irq_domain(fn, &pci_msi_ir_domain_info, parent);
+	irq_domain_free_fwnode(fn);
+	return d;
 }
 #endif
 
@@ -221,13 +237,20 @@ static struct irq_domain *dmar_get_irq_domain(void)
 {
 	static struct irq_domain *dmar_domain;
 	static DEFINE_MUTEX(dmar_lock);
+	struct fwnode_handle *fn;
 
 	mutex_lock(&dmar_lock);
-	if (dmar_domain == NULL)
-		dmar_domain = msi_create_irq_domain(NULL, &dmar_msi_domain_info,
+	if (dmar_domain)
+		goto out;
+
+	fn = irq_domain_alloc_named_fwnode("DMAR-MSI");
+	if (fn) {
+		dmar_domain = msi_create_irq_domain(fn, &dmar_msi_domain_info,
 						    x86_vector_domain);
+		irq_domain_free_fwnode(fn);
+	}
+out:
 	mutex_unlock(&dmar_lock);
-
 	return dmar_domain;
 }
 
@@ -317,9 +340,10 @@ static struct msi_domain_info hpet_msi_domain_info = {
 
 struct irq_domain *hpet_create_irq_domain(int hpet_id)
 {
-	struct irq_domain *parent;
-	struct irq_alloc_info info;
 	struct msi_domain_info *domain_info;
+	struct irq_domain *parent, *d;
+	struct irq_alloc_info info;
+	struct fwnode_handle *fn;
 
 	if (x86_vector_domain == NULL)
 		return NULL;
@@ -340,7 +364,16 @@ struct irq_domain *hpet_create_irq_domain(int hpet_id)
 	else
 		hpet_msi_controller.name = "IR-HPET-MSI";
 
-	return msi_create_irq_domain(NULL, domain_info, parent);
+	fn = irq_domain_alloc_named_id_fwnode(hpet_msi_controller.name,
+					      hpet_id);
+	if (!fn) {
+		kfree(domain_info);
+		return NULL;
+	}
+
+	d = msi_create_irq_domain(fn, domain_info, parent);
+	irq_domain_free_fwnode(fn);
+	return d;
 }
 
 int hpet_assign_irq(struct irq_domain *domain, struct hpet_dev *dev,

+ 1 - 1
arch/x86/kernel/apic/probe_32.c

@@ -102,7 +102,7 @@ static struct apic apic_default __ro_after_init = {
 	.get_apic_id			= default_get_apic_id,
 	.set_apic_id			= NULL,
 
-	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
 
 	.send_IPI			= default_send_IPI_single,
 	.send_IPI_mask			= default_send_IPI_mask_logical,

+ 35 - 14
arch/x86/kernel/apic/vector.c

@@ -103,7 +103,8 @@ static void free_apic_chip_data(struct apic_chip_data *data)
 }
 
 static int __assign_irq_vector(int irq, struct apic_chip_data *d,
-			       const struct cpumask *mask)
+			       const struct cpumask *mask,
+			       struct irq_data *irqdata)
 {
 	/*
 	 * NOTE! The local APIC isn't very good at handling
@@ -141,7 +142,7 @@ static int __assign_irq_vector(int irq, struct apic_chip_data *d,
 		/*
 		 * Clear the offline cpus from @vector_cpumask for searching
 		 * and verify whether the result overlaps with @mask. If true,
-		 * then the call to apic->cpu_mask_to_apicid_and() will
+		 * then the call to apic->cpu_mask_to_apicid() will
 		 * succeed as well. If not, no point in trying to find a
 		 * vector in this mask.
 		 */
@@ -221,34 +222,40 @@ success:
 	 * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
 	 * as we already established, that mask & d->domain & cpu_online_mask
 	 * is not empty.
+	 *
+	 * vector_searchmask is a subset of d->domain and has the offline
+	 * cpus masked out.
 	 */
-	BUG_ON(apic->cpu_mask_to_apicid_and(mask, d->domain,
-					    &d->cfg.dest_apicid));
+	cpumask_and(vector_searchmask, vector_searchmask, mask);
+	BUG_ON(apic->cpu_mask_to_apicid(vector_searchmask, irqdata,
+					&d->cfg.dest_apicid));
 	return 0;
 }
 
 static int assign_irq_vector(int irq, struct apic_chip_data *data,
-			     const struct cpumask *mask)
+			     const struct cpumask *mask,
+			     struct irq_data *irqdata)
 {
 	int err;
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
-	err = __assign_irq_vector(irq, data, mask);
+	err = __assign_irq_vector(irq, data, mask, irqdata);
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
 	return err;
 }
 
 static int assign_irq_vector_policy(int irq, int node,
 				    struct apic_chip_data *data,
-				    struct irq_alloc_info *info)
+				    struct irq_alloc_info *info,
+				    struct irq_data *irqdata)
 {
 	if (info && info->mask)
-		return assign_irq_vector(irq, data, info->mask);
+		return assign_irq_vector(irq, data, info->mask, irqdata);
 	if (node != NUMA_NO_NODE &&
-	    assign_irq_vector(irq, data, cpumask_of_node(node)) == 0)
+	    assign_irq_vector(irq, data, cpumask_of_node(node), irqdata) == 0)
 		return 0;
-	return assign_irq_vector(irq, data, apic->target_cpus());
+	return assign_irq_vector(irq, data, apic->target_cpus(), irqdata);
 }
 
 static void clear_irq_vector(int irq, struct apic_chip_data *data)
@@ -360,9 +367,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 		irq_data->chip = &lapic_controller;
 		irq_data->chip_data = data;
 		irq_data->hwirq = virq + i;
-		err = assign_irq_vector_policy(virq + i, node, data, info);
+		err = assign_irq_vector_policy(virq + i, node, data, info,
+					       irq_data);
 		if (err)
 			goto error;
+		/*
+		 * If the apic destination mode is physical, then the
+		 * effective affinity is restricted to a single target
+		 * CPU. Mark the interrupt accordingly.
+		 */
+		if (!apic->irq_dest_mode)
+			irqd_set_single_target(irq_data);
 	}
 
 	return 0;
@@ -429,11 +444,16 @@ static inline void init_legacy_irqs(void) { }
 
 int __init arch_early_irq_init(void)
 {
+	struct fwnode_handle *fn;
+
 	init_legacy_irqs();
 
-	x86_vector_domain = irq_domain_add_tree(NULL, &x86_vector_domain_ops,
-						NULL);
+	fn = irq_domain_alloc_named_fwnode("VECTOR");
+	BUG_ON(!fn);
+	x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops,
+						   NULL);
 	BUG_ON(x86_vector_domain == NULL);
+	irq_domain_free_fwnode(fn);
 	irq_set_default_host(x86_vector_domain);
 
 	arch_init_msi_domain(x86_vector_domain);
@@ -529,11 +549,12 @@ static int apic_set_affinity(struct irq_data *irq_data,
 	if (!cpumask_intersects(dest, cpu_online_mask))
 		return -EINVAL;
 
-	err = assign_irq_vector(irq, data, dest);
+	err = assign_irq_vector(irq, data, dest, irq_data);
 	return err ? err : IRQ_SET_MASK_OK;
 }
 
 static struct irq_chip lapic_controller = {
+	.name			= "APIC",
 	.irq_ack		= apic_ack_edge,
 	.irq_set_affinity	= apic_set_affinity,
 	.irq_retrigger		= apic_retrigger_irq,

+ 16 - 20
arch/x86/kernel/apic/x2apic_cluster.c

@@ -4,6 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/ctype.h>
 #include <linux/dmar.h>
+#include <linux/irq.h>
 #include <linux/cpu.h>
 
 #include <asm/smp.h>
@@ -104,35 +105,30 @@ static void x2apic_send_IPI_all(int vector)
 }
 
 static int
-x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			      const struct cpumask *andmask,
-			      unsigned int *apicid)
+x2apic_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata,
+			  unsigned int *apicid)
 {
+	struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata);
+	unsigned int cpu;
 	u32 dest = 0;
 	u16 cluster;
-	int i;
-
-	for_each_cpu_and(i, cpumask, andmask) {
-		if (!cpumask_test_cpu(i, cpu_online_mask))
-			continue;
-		dest = per_cpu(x86_cpu_to_logical_apicid, i);
-		cluster = x2apic_cluster(i);
-		break;
-	}
 
-	if (!dest)
+	cpu = cpumask_first(mask);
+	if (cpu >= nr_cpu_ids)
 		return -EINVAL;
 
-	for_each_cpu_and(i, cpumask, andmask) {
-		if (!cpumask_test_cpu(i, cpu_online_mask))
-			continue;
-		if (cluster != x2apic_cluster(i))
+	dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
+	cluster = x2apic_cluster(cpu);
+
+	cpumask_clear(effmsk);
+	for_each_cpu(cpu, mask) {
+		if (cluster != x2apic_cluster(cpu))
 			continue;
-		dest |= per_cpu(x86_cpu_to_logical_apicid, i);
+		dest |= per_cpu(x86_cpu_to_logical_apicid, cpu);
+		cpumask_set_cpu(cpu, effmsk);
 	}
 
 	*apicid = dest;
-
 	return 0;
 }
 
@@ -256,7 +252,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
 	.get_apic_id			= x2apic_get_apic_id,
 	.set_apic_id			= x2apic_set_apic_id,
 
-	.cpu_mask_to_apicid_and		= x2apic_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= x2apic_cpu_mask_to_apicid,
 
 	.send_IPI			= x2apic_send_IPI,
 	.send_IPI_mask			= x2apic_send_IPI_mask,

+ 1 - 1
arch/x86/kernel/apic/x2apic_phys.c

@@ -127,7 +127,7 @@ static struct apic apic_x2apic_phys __ro_after_init = {
 	.get_apic_id			= x2apic_get_apic_id,
 	.set_apic_id			= x2apic_set_apic_id,
 
-	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
 
 	.send_IPI			= x2apic_send_IPI,
 	.send_IPI_mask			= x2apic_send_IPI_mask,

+ 7 - 19
arch/x86/kernel/apic/x2apic_uv_x.c

@@ -526,27 +526,15 @@ static void uv_init_apic_ldr(void)
 }
 
 static int
-uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
-			  const struct cpumask *andmask,
-			  unsigned int *apicid)
+uv_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata,
+		      unsigned int *apicid)
 {
-	int unsigned cpu;
+	int ret = default_cpu_mask_to_apicid(mask, irqdata, apicid);
 
-	/*
-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
-	 * May as well be the first.
-	 */
-	for_each_cpu_and(cpu, cpumask, andmask) {
-		if (cpumask_test_cpu(cpu, cpu_online_mask))
-			break;
-	}
-
-	if (likely(cpu < nr_cpu_ids)) {
-		*apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
-		return 0;
-	}
+	if (!ret)
+		*apicid |= uv_apicid_hibits;
 
-	return -EINVAL;
+	return ret;
 }
 
 static unsigned int x2apic_get_apic_id(unsigned long x)
@@ -614,7 +602,7 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
 	.get_apic_id			= x2apic_get_apic_id,
 	.set_apic_id			= set_apic_id,
 
-	.cpu_mask_to_apicid_and		= uv_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= uv_cpu_mask_to_apicid,
 
 	.send_IPI			= uv_send_IPI_one,
 	.send_IPI_mask			= uv_send_IPI_mask,

+ 2 - 76
arch/x86/kernel/irq.c

@@ -432,84 +432,12 @@ int check_irq_vectors_for_cpu_disable(void)
 /* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
 void fixup_irqs(void)
 {
-	unsigned int irq, vector;
-	static int warned;
+	unsigned int irr, vector;
 	struct irq_desc *desc;
 	struct irq_data *data;
 	struct irq_chip *chip;
-	int ret;
 
-	for_each_irq_desc(irq, desc) {
-		int break_affinity = 0;
-		int set_affinity = 1;
-		const struct cpumask *affinity;
-
-		if (!desc)
-			continue;
-		if (irq == 2)
-			continue;
-
-		/* interrupt's are disabled at this point */
-		raw_spin_lock(&desc->lock);
-
-		data = irq_desc_get_irq_data(desc);
-		affinity = irq_data_get_affinity_mask(data);
-		if (!irq_has_action(irq) || irqd_is_per_cpu(data) ||
-		    cpumask_subset(affinity, cpu_online_mask)) {
-			raw_spin_unlock(&desc->lock);
-			continue;
-		}
-
-		/*
-		 * Complete the irq move. This cpu is going down and for
-		 * non intr-remapping case, we can't wait till this interrupt
-		 * arrives at this cpu before completing the irq move.
-		 */
-		irq_force_complete_move(desc);
-
-		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
-			break_affinity = 1;
-			affinity = cpu_online_mask;
-		}
-
-		chip = irq_data_get_irq_chip(data);
-		/*
-		 * The interrupt descriptor might have been cleaned up
-		 * already, but it is not yet removed from the radix tree
-		 */
-		if (!chip) {
-			raw_spin_unlock(&desc->lock);
-			continue;
-		}
-
-		if (!irqd_can_move_in_process_context(data) && chip->irq_mask)
-			chip->irq_mask(data);
-
-		if (chip->irq_set_affinity) {
-			ret = chip->irq_set_affinity(data, affinity, true);
-			if (ret == -ENOSPC)
-				pr_crit("IRQ %d set affinity failed because there are no available vectors.  The device assigned to this IRQ is unstable.\n", irq);
-		} else {
-			if (!(warned++))
-				set_affinity = 0;
-		}
-
-		/*
-		 * We unmask if the irq was not marked masked by the
-		 * core code. That respects the lazy irq disable
-		 * behaviour.
-		 */
-		if (!irqd_can_move_in_process_context(data) &&
-		    !irqd_irq_masked(data) && chip->irq_unmask)
-			chip->irq_unmask(data);
-
-		raw_spin_unlock(&desc->lock);
-
-		if (break_affinity && set_affinity)
-			pr_notice("Broke affinity for irq %i\n", irq);
-		else if (!set_affinity)
-			pr_notice("Cannot set affinity for irq %i\n", irq);
-	}
+	irq_migrate_all_off_this_cpu();
 
 	/*
 	 * We can remove mdelay() and then send spuriuous interrupts to
@@ -528,8 +456,6 @@ void fixup_irqs(void)
 	 * nothing else will touch it.
 	 */
 	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		unsigned int irr;
-
 		if (IS_ERR_OR_NULL(__this_cpu_read(vector_irq[vector])))
 			continue;
 

+ 13 - 5
arch/x86/platform/uv/uv_irq.c

@@ -160,13 +160,21 @@ static struct irq_domain *uv_get_irq_domain(void)
 {
 	static struct irq_domain *uv_domain;
 	static DEFINE_MUTEX(uv_lock);
+	struct fwnode_handle *fn;
 
 	mutex_lock(&uv_lock);
-	if (uv_domain == NULL) {
-		uv_domain = irq_domain_add_tree(NULL, &uv_domain_ops, NULL);
-		if (uv_domain)
-			uv_domain->parent = x86_vector_domain;
-	}
+	if (uv_domain)
+		goto out;
+
+	fn = irq_domain_alloc_named_fwnode("UV-CORE");
+	if (!fn)
+		goto out;
+
+	uv_domain = irq_domain_create_tree(fn, &uv_domain_ops, NULL);
+	irq_domain_free_fwnode(fn);
+	if (uv_domain)
+		uv_domain->parent = x86_vector_domain;
+out:
 	mutex_unlock(&uv_lock);
 
 	return uv_domain;

+ 1 - 1
arch/x86/xen/apic.c

@@ -178,7 +178,7 @@ static struct apic xen_pv_apic = {
 	.get_apic_id 			= xen_get_apic_id,
 	.set_apic_id 			= xen_set_apic_id, /* Can be NULL on 32-bit. */
 
-	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
+	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
 
 #ifdef CONFIG_SMP
 	.send_IPI_mask 			= xen_send_IPI_mask,

+ 4 - 6
block/blk-mq-cpumap.c

@@ -14,13 +14,12 @@
 #include "blk.h"
 #include "blk-mq.h"
 
-static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
-			      const struct cpumask *online_mask)
+static int cpu_to_queue_index(unsigned int nr_queues, const int cpu)
 {
 	/*
 	 * Non online CPU will be mapped to queue index 0.
 	 */
-	if (!cpumask_test_cpu(cpu, online_mask))
+	if (!cpu_online(cpu))
 		return 0;
 	return cpu % nr_queues;
 }
@@ -40,7 +39,6 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
 {
 	unsigned int *map = set->mq_map;
 	unsigned int nr_queues = set->nr_hw_queues;
-	const struct cpumask *online_mask = cpu_online_mask;
 	unsigned int cpu, first_sibling;
 
 	for_each_possible_cpu(cpu) {
@@ -51,11 +49,11 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
 		 * performace optimizations.
 		 */
 		if (cpu < nr_queues) {
-			map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+			map[cpu] = cpu_to_queue_index(nr_queues, cpu);
 		} else {
 			first_sibling = get_first_sibling(cpu);
 			if (first_sibling == cpu)
-				map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+				map[cpu] = cpu_to_queue_index(nr_queues, cpu);
 			else
 				map[cpu] = map[first_sibling];
 		}

+ 11 - 109
block/blk-mq.c

@@ -37,9 +37,6 @@
 #include "blk-wbt.h"
 #include "blk-mq-sched.h"
 
-static DEFINE_MUTEX(all_q_mutex);
-static LIST_HEAD(all_q_list);
-
 static void blk_mq_poll_stats_start(struct request_queue *q);
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
 
@@ -1984,8 +1981,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
 		INIT_LIST_HEAD(&__ctx->rq_list);
 		__ctx->queue = q;
 
-		/* If the cpu isn't online, the cpu is mapped to first hctx */
-		if (!cpu_online(i))
+		/* If the cpu isn't present, the cpu is mapped to first hctx */
+		if (!cpu_present(i))
 			continue;
 
 		hctx = blk_mq_map_queue(q, i);
@@ -2028,8 +2025,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
 	}
 }
 
-static void blk_mq_map_swqueue(struct request_queue *q,
-			       const struct cpumask *online_mask)
+static void blk_mq_map_swqueue(struct request_queue *q)
 {
 	unsigned int i, hctx_idx;
 	struct blk_mq_hw_ctx *hctx;
@@ -2047,13 +2043,11 @@ static void blk_mq_map_swqueue(struct request_queue *q,
 	}
 
 	/*
-	 * Map software to hardware queues
+	 * Map software to hardware queues.
+	 *
+	 * If the cpu isn't present, the cpu is mapped to first hctx.
 	 */
-	for_each_possible_cpu(i) {
-		/* If the cpu isn't online, the cpu is mapped to first hctx */
-		if (!cpumask_test_cpu(i, online_mask))
-			continue;
-
+	for_each_present_cpu(i) {
 		hctx_idx = q->mq_map[i];
 		/* unmapped hw queue can be remapped after CPU topo changed */
 		if (!set->tags[hctx_idx] &&
@@ -2363,16 +2357,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 		blk_queue_softirq_done(q, set->ops->complete);
 
 	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
-
-	get_online_cpus();
-	mutex_lock(&all_q_mutex);
-
-	list_add_tail(&q->all_q_node, &all_q_list);
 	blk_mq_add_queue_tag_set(set, q);
-	blk_mq_map_swqueue(q, cpu_online_mask);
-
-	mutex_unlock(&all_q_mutex);
-	put_online_cpus();
+	blk_mq_map_swqueue(q);
 
 	if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
 		int ret;
@@ -2398,18 +2384,12 @@ void blk_mq_free_queue(struct request_queue *q)
 {
 	struct blk_mq_tag_set	*set = q->tag_set;
 
-	mutex_lock(&all_q_mutex);
-	list_del_init(&q->all_q_node);
-	mutex_unlock(&all_q_mutex);
-
 	blk_mq_del_queue_tag_set(q);
-
 	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
 }
 
 /* Basically redo blk_mq_init_queue with queue frozen */
-static void blk_mq_queue_reinit(struct request_queue *q,
-				const struct cpumask *online_mask)
+static void blk_mq_queue_reinit(struct request_queue *q)
 {
 	WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
 
@@ -2422,76 +2402,12 @@ static void blk_mq_queue_reinit(struct request_queue *q,
 	 * involves free and re-allocate memory, worthy doing?)
 	 */
 
-	blk_mq_map_swqueue(q, online_mask);
+	blk_mq_map_swqueue(q);
 
 	blk_mq_sysfs_register(q);
 	blk_mq_debugfs_register_hctxs(q);
 }
 
-/*
- * New online cpumask which is going to be set in this hotplug event.
- * Declare this cpumasks as global as cpu-hotplug operation is invoked
- * one-by-one and dynamically allocating this could result in a failure.
- */
-static struct cpumask cpuhp_online_new;
-
-static void blk_mq_queue_reinit_work(void)
-{
-	struct request_queue *q;
-
-	mutex_lock(&all_q_mutex);
-	/*
-	 * We need to freeze and reinit all existing queues.  Freezing
-	 * involves synchronous wait for an RCU grace period and doing it
-	 * one by one may take a long time.  Start freezing all queues in
-	 * one swoop and then wait for the completions so that freezing can
-	 * take place in parallel.
-	 */
-	list_for_each_entry(q, &all_q_list, all_q_node)
-		blk_freeze_queue_start(q);
-	list_for_each_entry(q, &all_q_list, all_q_node)
-		blk_mq_freeze_queue_wait(q);
-
-	list_for_each_entry(q, &all_q_list, all_q_node)
-		blk_mq_queue_reinit(q, &cpuhp_online_new);
-
-	list_for_each_entry(q, &all_q_list, all_q_node)
-		blk_mq_unfreeze_queue(q);
-
-	mutex_unlock(&all_q_mutex);
-}
-
-static int blk_mq_queue_reinit_dead(unsigned int cpu)
-{
-	cpumask_copy(&cpuhp_online_new, cpu_online_mask);
-	blk_mq_queue_reinit_work();
-	return 0;
-}
-
-/*
- * Before hotadded cpu starts handling requests, new mappings must be
- * established.  Otherwise, these requests in hw queue might never be
- * dispatched.
- *
- * For example, there is a single hw queue (hctx) and two CPU queues (ctx0
- * for CPU0, and ctx1 for CPU1).
- *
- * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list
- * and set bit0 in pending bitmap as ctx1->index_hw is still zero.
- *
- * And then while running hw queue, blk_mq_flush_busy_ctxs() finds bit0 is set
- * in pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list.
- * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list is
- * ignored.
- */
-static int blk_mq_queue_reinit_prepare(unsigned int cpu)
-{
-	cpumask_copy(&cpuhp_online_new, cpu_online_mask);
-	cpumask_set_cpu(cpu, &cpuhp_online_new);
-	blk_mq_queue_reinit_work();
-	return 0;
-}
-
 static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
 {
 	int i;
@@ -2702,7 +2618,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 	blk_mq_update_queue_map(set);
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
 		blk_mq_realloc_hw_ctxs(set, q);
-		blk_mq_queue_reinit(q, cpu_online_mask);
+		blk_mq_queue_reinit(q);
 	}
 
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
@@ -2918,24 +2834,10 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
 }
 EXPORT_SYMBOL_GPL(blk_mq_poll);
 
-void blk_mq_disable_hotplug(void)
-{
-	mutex_lock(&all_q_mutex);
-}
-
-void blk_mq_enable_hotplug(void)
-{
-	mutex_unlock(&all_q_mutex);
-}
-
 static int __init blk_mq_init(void)
 {
 	cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
 				blk_mq_hctx_notify_dead);
-
-	cpuhp_setup_state_nocalls(CPUHP_BLK_MQ_PREPARE, "block/mq:prepare",
-				  blk_mq_queue_reinit_prepare,
-				  blk_mq_queue_reinit_dead);
 	return 0;
 }
 subsys_initcall(blk_mq_init);

+ 0 - 5
block/blk-mq.h

@@ -56,11 +56,6 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 				bool at_head);
 void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 				struct list_head *list);
-/*
- * CPU hotplug helpers
- */
-void blk_mq_enable_hotplug(void);
-void blk_mq_disable_hotplug(void);
 
 /*
  * CPU -> queue mappings

+ 1 - 1
drivers/base/platform-msi.c

@@ -195,7 +195,7 @@ struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
 
 	domain = msi_create_irq_domain(fwnode, info, parent);
 	if (domain)
-		domain->bus_token = DOMAIN_BUS_PLATFORM_MSI;
+		irq_domain_update_bus_token(domain, DOMAIN_BUS_PLATFORM_MSI);
 
 	return domain;
 }

+ 15 - 7
drivers/iommu/amd_iommu.c

@@ -4384,21 +4384,29 @@ static void ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
 }
 
 static struct irq_chip amd_ir_chip = {
-	.irq_ack = ir_ack_apic_edge,
-	.irq_set_affinity = amd_ir_set_affinity,
-	.irq_set_vcpu_affinity = amd_ir_set_vcpu_affinity,
-	.irq_compose_msi_msg = ir_compose_msi_msg,
+	.name			= "AMD-IR",
+	.irq_ack		= ir_ack_apic_edge,
+	.irq_set_affinity	= amd_ir_set_affinity,
+	.irq_set_vcpu_affinity	= amd_ir_set_vcpu_affinity,
+	.irq_compose_msi_msg	= ir_compose_msi_msg,
 };
 
 int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
 {
-	iommu->ir_domain = irq_domain_add_tree(NULL, &amd_ir_domain_ops, iommu);
+	struct fwnode_handle *fn;
+
+	fn = irq_domain_alloc_named_id_fwnode("AMD-IR", iommu->index);
+	if (!fn)
+		return -ENOMEM;
+	iommu->ir_domain = irq_domain_create_tree(fn, &amd_ir_domain_ops, iommu);
+	irq_domain_free_fwnode(fn);
 	if (!iommu->ir_domain)
 		return -ENOMEM;
 
 	iommu->ir_domain->parent = arch_get_ir_parent_domain();
-	iommu->msi_domain = arch_create_msi_irq_domain(iommu->ir_domain);
-
+	iommu->msi_domain = arch_create_remap_msi_irq_domain(iommu->ir_domain,
+							     "AMD-IR-MSI",
+							     iommu->index);
 	return 0;
 }
 

+ 21 - 10
drivers/iommu/intel_irq_remapping.c

@@ -500,8 +500,9 @@ static void iommu_enable_irq_remapping(struct intel_iommu *iommu)
 static int intel_setup_irq_remapping(struct intel_iommu *iommu)
 {
 	struct ir_table *ir_table;
-	struct page *pages;
+	struct fwnode_handle *fn;
 	unsigned long *bitmap;
+	struct page *pages;
 
 	if (iommu->ir_table)
 		return 0;
@@ -525,15 +526,24 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
 		goto out_free_pages;
 	}
 
-	iommu->ir_domain = irq_domain_add_hierarchy(arch_get_ir_parent_domain(),
-						    0, INTR_REMAP_TABLE_ENTRIES,
-						    NULL, &intel_ir_domain_ops,
-						    iommu);
+	fn = irq_domain_alloc_named_id_fwnode("INTEL-IR", iommu->seq_id);
+	if (!fn)
+		goto out_free_bitmap;
+
+	iommu->ir_domain =
+		irq_domain_create_hierarchy(arch_get_ir_parent_domain(),
+					    0, INTR_REMAP_TABLE_ENTRIES,
+					    fn, &intel_ir_domain_ops,
+					    iommu);
+	irq_domain_free_fwnode(fn);
 	if (!iommu->ir_domain) {
 		pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id);
 		goto out_free_bitmap;
 	}
-	iommu->ir_msi_domain = arch_create_msi_irq_domain(iommu->ir_domain);
+	iommu->ir_msi_domain =
+		arch_create_remap_msi_irq_domain(iommu->ir_domain,
+						 "INTEL-IR-MSI",
+						 iommu->seq_id);
 
 	ir_table->base = page_address(pages);
 	ir_table->bitmap = bitmap;
@@ -1205,10 +1215,11 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
 }
 
 static struct irq_chip intel_ir_chip = {
-	.irq_ack = ir_ack_apic_edge,
-	.irq_set_affinity = intel_ir_set_affinity,
-	.irq_compose_msi_msg = intel_ir_compose_msi_msg,
-	.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
+	.name			= "INTEL-IR",
+	.irq_ack		= ir_ack_apic_edge,
+	.irq_set_affinity	= intel_ir_set_affinity,
+	.irq_compose_msi_msg	= intel_ir_compose_msi_msg,
+	.irq_set_vcpu_affinity	= intel_ir_set_vcpu_affinity,
 };
 
 static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data,

+ 6 - 0
drivers/irqchip/Kconfig

@@ -268,6 +268,12 @@ config IRQ_MXS
 	select IRQ_DOMAIN
 	select STMP_DEVICE
 
+config MVEBU_GICP
+	bool
+
+config MVEBU_ICU
+	bool
+
 config MVEBU_ODMI
 	bool
 	select GENERIC_MSI_IRQ_DOMAIN

+ 3 - 1
drivers/irqchip/Makefile

@@ -69,10 +69,12 @@ obj-$(CONFIG_ARCH_SA1100)		+= irq-sa11x0.o
 obj-$(CONFIG_INGENIC_IRQ)		+= irq-ingenic.o
 obj-$(CONFIG_IMX_GPCV2)			+= irq-imx-gpcv2.o
 obj-$(CONFIG_PIC32_EVIC)		+= irq-pic32-evic.o
+obj-$(CONFIG_MVEBU_GICP)		+= irq-mvebu-gicp.o
+obj-$(CONFIG_MVEBU_ICU)			+= irq-mvebu-icu.o
 obj-$(CONFIG_MVEBU_ODMI)		+= irq-mvebu-odmi.o
 obj-$(CONFIG_MVEBU_PIC)			+= irq-mvebu-pic.o
 obj-$(CONFIG_LS_SCFG_MSI)		+= irq-ls-scfg-msi.o
 obj-$(CONFIG_EZNPS_GIC)			+= irq-eznps.o
-obj-$(CONFIG_ARCH_ASPEED)		+= irq-aspeed-vic.o
+obj-$(CONFIG_ARCH_ASPEED)		+= irq-aspeed-vic.o irq-aspeed-i2c-ic.o
 obj-$(CONFIG_STM32_EXTI) 		+= irq-stm32-exti.o
 obj-$(CONFIG_QCOM_IRQ_COMBINER)		+= qcom-irq-combiner.o

+ 130 - 18
drivers/irqchip/irq-armada-370-xp.c

@@ -34,25 +34,104 @@
 #include <asm/smp_plat.h>
 #include <asm/mach/irq.h>
 
-/* Interrupt Controller Registers Map */
-#define ARMADA_370_XP_INT_SET_MASK_OFFS		(0x48)
-#define ARMADA_370_XP_INT_CLEAR_MASK_OFFS	(0x4C)
-#define ARMADA_370_XP_INT_FABRIC_MASK_OFFS	(0x54)
-#define ARMADA_370_XP_INT_CAUSE_PERF(cpu)	(1 << cpu)
+/*
+ * Overall diagram of the Armada XP interrupt controller:
+ *
+ *    To CPU 0                 To CPU 1
+ *
+ *       /\                       /\
+ *       ||                       ||
+ * +---------------+     +---------------+
+ * |               |	 |               |
+ * |    per-CPU    |	 |    per-CPU    |
+ * |  mask/unmask  |	 |  mask/unmask  |
+ * |     CPU0      |	 |     CPU1      |
+ * |               |	 |               |
+ * +---------------+	 +---------------+
+ *        /\                       /\
+ *        ||                       ||
+ *        \\_______________________//
+ *                     ||
+ *            +-------------------+
+ *            |                   |
+ *            | Global interrupt  |
+ *            |    mask/unmask    |
+ *            |                   |
+ *            +-------------------+
+ *                     /\
+ *                     ||
+ *               interrupt from
+ *                   device
+ *
+ * The "global interrupt mask/unmask" is modified using the
+ * ARMADA_370_XP_INT_SET_ENABLE_OFFS and
+ * ARMADA_370_XP_INT_CLEAR_ENABLE_OFFS registers, which are relative
+ * to "main_int_base".
+ *
+ * The "per-CPU mask/unmask" is modified using the
+ * ARMADA_370_XP_INT_SET_MASK_OFFS and
+ * ARMADA_370_XP_INT_CLEAR_MASK_OFFS registers, which are relative to
+ * "per_cpu_int_base". This base address points to a special address,
+ * which automatically accesses the registers of the current CPU.
+ *
+ * The per-CPU mask/unmask can also be adjusted using the global
+ * per-interrupt ARMADA_370_XP_INT_SOURCE_CTL register, which we use
+ * to configure interrupt affinity.
+ *
+ * Due to this model, all interrupts need to be mask/unmasked at two
+ * different levels: at the global level and at the per-CPU level.
+ *
+ * This driver takes the following approach to deal with this:
+ *
+ *  - For global interrupts:
+ *
+ *    At ->map() time, a global interrupt is unmasked at the per-CPU
+ *    mask/unmask level. It is therefore unmasked at this level for
+ *    the current CPU, running the ->map() code. This allows to have
+ *    the interrupt unmasked at this level in non-SMP
+ *    configurations. In SMP configurations, the ->set_affinity()
+ *    callback is called, which using the
+ *    ARMADA_370_XP_INT_SOURCE_CTL() readjusts the per-CPU mask/unmask
+ *    for the interrupt.
+ *
+ *    The ->mask() and ->unmask() operations only mask/unmask the
+ *    interrupt at the "global" level.
+ *
+ *    So, a global interrupt is enabled at the per-CPU level as soon
+ *    as it is mapped. At run time, the masking/unmasking takes place
+ *    at the global level.
+ *
+ *  - For per-CPU interrupts
+ *
+ *    At ->map() time, a per-CPU interrupt is unmasked at the global
+ *    mask/unmask level.
+ *
+ *    The ->mask() and ->unmask() operations mask/unmask the interrupt
+ *    at the per-CPU level.
+ *
+ *    So, a per-CPU interrupt is enabled at the global level as soon
+ *    as it is mapped. At run time, the masking/unmasking takes place
+ *    at the per-CPU level.
+ */
 
+/* Registers relative to main_int_base */
 #define ARMADA_370_XP_INT_CONTROL		(0x00)
+#define ARMADA_370_XP_SW_TRIG_INT_OFFS		(0x04)
 #define ARMADA_370_XP_INT_SET_ENABLE_OFFS	(0x30)
 #define ARMADA_370_XP_INT_CLEAR_ENABLE_OFFS	(0x34)
 #define ARMADA_370_XP_INT_SOURCE_CTL(irq)	(0x100 + irq*4)
 #define ARMADA_370_XP_INT_SOURCE_CPU_MASK	0xF
 #define ARMADA_370_XP_INT_IRQ_FIQ_MASK(cpuid)	((BIT(0) | BIT(8)) << cpuid)
 
-#define ARMADA_370_XP_CPU_INTACK_OFFS		(0x44)
+/* Registers relative to per_cpu_int_base */
+#define ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS	(0x08)
+#define ARMADA_370_XP_IN_DRBEL_MSK_OFFS		(0x0c)
 #define ARMADA_375_PPI_CAUSE			(0x10)
-
-#define ARMADA_370_XP_SW_TRIG_INT_OFFS           (0x4)
-#define ARMADA_370_XP_IN_DRBEL_MSK_OFFS          (0xc)
-#define ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS        (0x8)
+#define ARMADA_370_XP_CPU_INTACK_OFFS		(0x44)
+#define ARMADA_370_XP_INT_SET_MASK_OFFS		(0x48)
+#define ARMADA_370_XP_INT_CLEAR_MASK_OFFS	(0x4C)
+#define ARMADA_370_XP_INT_FABRIC_MASK_OFFS	(0x54)
+#define ARMADA_370_XP_INT_CAUSE_PERF(cpu)	(1 << cpu)
 
 #define ARMADA_370_XP_MAX_PER_CPU_IRQS		(28)
 
@@ -281,13 +360,11 @@ static int armada_370_xp_mpic_irq_map(struct irq_domain *h,
 		irq_set_percpu_devid(virq);
 		irq_set_chip_and_handler(virq, &armada_370_xp_irq_chip,
 					handle_percpu_devid_irq);
-
 	} else {
 		irq_set_chip_and_handler(virq, &armada_370_xp_irq_chip,
 					handle_level_irq);
 	}
 	irq_set_probe(virq);
-	irq_clear_status_flags(virq, IRQ_NOAUTOEN);
 
 	return 0;
 }
@@ -345,16 +422,40 @@ static void armada_mpic_send_doorbell(const struct cpumask *mask,
 		ARMADA_370_XP_SW_TRIG_INT_OFFS);
 }
 
+static void armada_xp_mpic_reenable_percpu(void)
+{
+	unsigned int irq;
+
+	/* Re-enable per-CPU interrupts that were enabled before suspend */
+	for (irq = 0; irq < ARMADA_370_XP_MAX_PER_CPU_IRQS; irq++) {
+		struct irq_data *data;
+		int virq;
+
+		virq = irq_linear_revmap(armada_370_xp_mpic_domain, irq);
+		if (virq == 0)
+			continue;
+
+		data = irq_get_irq_data(virq);
+
+		if (!irq_percpu_is_enabled(virq))
+			continue;
+
+		armada_370_xp_irq_unmask(data);
+	}
+}
+
 static int armada_xp_mpic_starting_cpu(unsigned int cpu)
 {
 	armada_xp_mpic_perf_init();
 	armada_xp_mpic_smp_cpu_init();
+	armada_xp_mpic_reenable_percpu();
 	return 0;
 }
 
 static int mpic_cascaded_starting_cpu(unsigned int cpu)
 {
 	armada_xp_mpic_perf_init();
+	armada_xp_mpic_reenable_percpu();
 	enable_percpu_irq(parent_irq, IRQ_TYPE_NONE);
 	return 0;
 }
@@ -502,16 +603,27 @@ static void armada_370_xp_mpic_resume(void)
 		if (virq == 0)
 			continue;
 
-		if (!is_percpu_irq(irq))
+		data = irq_get_irq_data(virq);
+
+		if (!is_percpu_irq(irq)) {
+			/* Non per-CPU interrupts */
 			writel(irq, per_cpu_int_base +
 			       ARMADA_370_XP_INT_CLEAR_MASK_OFFS);
-		else
+			if (!irqd_irq_disabled(data))
+				armada_370_xp_irq_unmask(data);
+		} else {
+			/* Per-CPU interrupts */
 			writel(irq, main_int_base +
 			       ARMADA_370_XP_INT_SET_ENABLE_OFFS);
 
-		data = irq_get_irq_data(virq);
-		if (!irqd_irq_disabled(data))
-			armada_370_xp_irq_unmask(data);
+			/*
+			 * Re-enable on the current CPU,
+			 * armada_xp_mpic_reenable_percpu() will take
+			 * care of secondary CPUs when they come up.
+			 */
+			if (irq_percpu_is_enabled(virq))
+				armada_370_xp_irq_unmask(data);
+		}
 	}
 
 	/* Reconfigure doorbells for IPIs and MSIs */
@@ -563,7 +675,7 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node,
 		irq_domain_add_linear(node, nr_irqs,
 				&armada_370_xp_mpic_irq_ops, NULL);
 	BUG_ON(!armada_370_xp_mpic_domain);
-	armada_370_xp_mpic_domain->bus_token = DOMAIN_BUS_WIRED;
+	irq_domain_update_bus_token(armada_370_xp_mpic_domain, DOMAIN_BUS_WIRED);
 
 	/* Setup for the boot CPU */
 	armada_xp_mpic_perf_init();

+ 115 - 0
drivers/irqchip/irq-aspeed-i2c-ic.c

@@ -0,0 +1,115 @@
+/*
+ *  Aspeed 24XX/25XX I2C Interrupt Controller.
+ *
+ *  Copyright (C) 2012-2017 ASPEED Technology Inc.
+ *  Copyright 2017 IBM Corporation
+ *  Copyright 2017 Google, Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/io.h>
+
+
+#define ASPEED_I2C_IC_NUM_BUS 14
+
+struct aspeed_i2c_ic {
+	void __iomem		*base;
+	int			parent_irq;
+	struct irq_domain	*irq_domain;
+};
+
+/*
+ * The aspeed chip provides a single hardware interrupt for all of the I2C
+ * busses, so we use a dummy interrupt chip to translate this single interrupt
+ * into multiple interrupts, each associated with a single I2C bus.
+ */
+static void aspeed_i2c_ic_irq_handler(struct irq_desc *desc)
+{
+	struct aspeed_i2c_ic *i2c_ic = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned long bit, status;
+	unsigned int bus_irq;
+
+	chained_irq_enter(chip, desc);
+	status = readl(i2c_ic->base);
+	for_each_set_bit(bit, &status, ASPEED_I2C_IC_NUM_BUS) {
+		bus_irq = irq_find_mapping(i2c_ic->irq_domain, bit);
+		generic_handle_irq(bus_irq);
+	}
+	chained_irq_exit(chip, desc);
+}
+
+/*
+ * Set simple handler and mark IRQ as valid. Nothing interesting to do here
+ * since we are using a dummy interrupt chip.
+ */
+static int aspeed_i2c_ic_map_irq_domain(struct irq_domain *domain,
+					unsigned int irq, irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &dummy_irq_chip, handle_simple_irq);
+	irq_set_chip_data(irq, domain->host_data);
+
+	return 0;
+}
+
+static const struct irq_domain_ops aspeed_i2c_ic_irq_domain_ops = {
+	.map = aspeed_i2c_ic_map_irq_domain,
+};
+
+static int __init aspeed_i2c_ic_of_init(struct device_node *node,
+					struct device_node *parent)
+{
+	struct aspeed_i2c_ic *i2c_ic;
+	int ret = 0;
+
+	i2c_ic = kzalloc(sizeof(*i2c_ic), GFP_KERNEL);
+	if (!i2c_ic)
+		return -ENOMEM;
+
+	i2c_ic->base = of_iomap(node, 0);
+	if (IS_ERR(i2c_ic->base)) {
+		ret = PTR_ERR(i2c_ic->base);
+		goto err_free_ic;
+	}
+
+	i2c_ic->parent_irq = irq_of_parse_and_map(node, 0);
+	if (i2c_ic->parent_irq < 0) {
+		ret = i2c_ic->parent_irq;
+		goto err_iounmap;
+	}
+
+	i2c_ic->irq_domain = irq_domain_add_linear(node, ASPEED_I2C_IC_NUM_BUS,
+						   &aspeed_i2c_ic_irq_domain_ops,
+						   NULL);
+	if (!i2c_ic->irq_domain) {
+		ret = -ENOMEM;
+		goto err_iounmap;
+	}
+
+	i2c_ic->irq_domain->name = "aspeed-i2c-domain";
+
+	irq_set_chained_handler_and_data(i2c_ic->parent_irq,
+					 aspeed_i2c_ic_irq_handler, i2c_ic);
+
+	pr_info("i2c controller registered, irq %d\n", i2c_ic->parent_irq);
+
+	return 0;
+
+err_iounmap:
+	iounmap(i2c_ic->base);
+err_free_ic:
+	kfree(i2c_ic);
+	return ret;
+}
+
+IRQCHIP_DECLARE(ast2400_i2c_ic, "aspeed,ast2400-i2c-ic", aspeed_i2c_ic_of_init);
+IRQCHIP_DECLARE(ast2500_i2c_ic, "aspeed,ast2500-i2c-ic", aspeed_i2c_ic_of_init);

+ 3 - 2
drivers/irqchip/irq-aspeed-vic.c

@@ -186,7 +186,7 @@ static int avic_map(struct irq_domain *d, unsigned int irq,
 	return 0;
 }
 
-static struct irq_domain_ops avic_dom_ops = {
+static const struct irq_domain_ops avic_dom_ops = {
 	.map = avic_map,
 	.xlate = irq_domain_xlate_onetwocell,
 };
@@ -227,4 +227,5 @@ static int __init avic_of_init(struct device_node *node,
 	return 0;
 }
 
-IRQCHIP_DECLARE(aspeed_new_vic, "aspeed,ast2400-vic", avic_of_init);
+IRQCHIP_DECLARE(ast2400_vic, "aspeed,ast2400-vic", avic_of_init);
+IRQCHIP_DECLARE(ast2500_vic, "aspeed,ast2500-vic", avic_of_init);

+ 1 - 1
drivers/irqchip/irq-gic-v2m.c

@@ -280,7 +280,7 @@ static int gicv2m_allocate_domains(struct irq_domain *parent)
 		return -ENOMEM;
 	}
 
-	inner_domain->bus_token = DOMAIN_BUS_NEXUS;
+	irq_domain_update_bus_token(inner_domain, DOMAIN_BUS_NEXUS);
 	inner_domain->parent = parent;
 	pci_domain = pci_msi_create_irq_domain(v2m->fwnode,
 					       &gicv2m_msi_domain_info,

+ 17 - 18
drivers/irqchip/irq-gic-v3-its-pci-msi.c

@@ -41,27 +41,22 @@ static struct irq_chip its_msi_irq_chip = {
 	.irq_write_msi_msg	= pci_msi_domain_write_msg,
 };
 
-struct its_pci_alias {
-	struct pci_dev	*pdev;
-	u32		count;
-};
-
-static int its_pci_msi_vec_count(struct pci_dev *pdev)
+static int its_pci_msi_vec_count(struct pci_dev *pdev, void *data)
 {
-	int msi, msix;
+	int msi, msix, *count = data;
 
 	msi = max(pci_msi_vec_count(pdev), 0);
 	msix = max(pci_msix_vec_count(pdev), 0);
+	*count += max(msi, msix);
 
-	return max(msi, msix);
+	return 0;
 }
 
 static int its_get_pci_alias(struct pci_dev *pdev, u16 alias, void *data)
 {
-	struct its_pci_alias *dev_alias = data;
+	struct pci_dev **alias_dev = data;
 
-	if (pdev != dev_alias->pdev)
-		dev_alias->count += its_pci_msi_vec_count(pdev);
+	*alias_dev = pdev;
 
 	return 0;
 }
@@ -69,9 +64,9 @@ static int its_get_pci_alias(struct pci_dev *pdev, u16 alias, void *data)
 static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev,
 			       int nvec, msi_alloc_info_t *info)
 {
-	struct pci_dev *pdev;
-	struct its_pci_alias dev_alias;
+	struct pci_dev *pdev, *alias_dev;
 	struct msi_domain_info *msi_info;
+	int alias_count = 0;
 
 	if (!dev_is_pci(dev))
 		return -EINVAL;
@@ -79,16 +74,20 @@ static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev,
 	msi_info = msi_get_domain_info(domain->parent);
 
 	pdev = to_pci_dev(dev);
-	dev_alias.pdev = pdev;
-	dev_alias.count = nvec;
-
-	pci_for_each_dma_alias(pdev, its_get_pci_alias, &dev_alias);
+	/*
+	 * If pdev is downstream of any aliasing bridges, take an upper
+	 * bound of how many other vectors could map to the same DevID.
+	 */
+	pci_for_each_dma_alias(pdev, its_get_pci_alias, &alias_dev);
+	if (alias_dev != pdev && alias_dev->subordinate)
+		pci_walk_bus(alias_dev->subordinate, its_pci_msi_vec_count,
+			     &alias_count);
 
 	/* ITS specific DeviceID, as the core ITS ignores dev. */
 	info->scratchpad[0].ul = pci_msi_domain_get_msi_rid(domain, pdev);
 
 	return msi_info->ops->msi_prepare(domain->parent,
-					  dev, dev_alias.count, info);
+					  dev, max(nvec, alias_count), info);
 }
 
 static struct msi_domain_ops its_pci_msi_ops = {

+ 1 - 1
drivers/irqchip/irq-gic-v3-its-platform-msi.c

@@ -86,7 +86,7 @@ static struct msi_domain_info its_pmsi_domain_info = {
 	.chip	= &its_pmsi_irq_chip,
 };
 
-static struct of_device_id its_device_id[] = {
+static const struct of_device_id its_device_id[] = {
 	{	.compatible	= "arm,gic-v3-its",	},
 	{},
 };

+ 93 - 20
drivers/irqchip/irq-gic-v3-its.c

@@ -644,9 +644,12 @@ static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 	if (cpu >= nr_cpu_ids)
 		return -EINVAL;
 
-	target_col = &its_dev->its->collections[cpu];
-	its_send_movi(its_dev, target_col, id);
-	its_dev->event_map.col_map[id] = cpu;
+	/* don't set the affinity when the target cpu is same as current one */
+	if (cpu != its_dev->event_map.col_map[id]) {
+		target_col = &its_dev->its->collections[cpu];
+		its_send_movi(its_dev, target_col, id);
+		its_dev->event_map.col_map[id] = cpu;
+	}
 
 	return IRQ_SET_MASK_OK_DONE;
 }
@@ -688,9 +691,11 @@ static struct irq_chip its_irq_chip = {
  */
 #define IRQS_PER_CHUNK_SHIFT	5
 #define IRQS_PER_CHUNK		(1 << IRQS_PER_CHUNK_SHIFT)
+#define ITS_MAX_LPI_NRBITS	16 /* 64K LPIs */
 
 static unsigned long *lpi_bitmap;
 static u32 lpi_chunks;
+static u32 lpi_id_bits;
 static DEFINE_SPINLOCK(lpi_lock);
 
 static int its_lpi_to_chunk(int lpi)
@@ -786,17 +791,13 @@ static void its_lpi_free(struct event_lpi_map *map)
 }
 
 /*
- * We allocate 64kB for PROPBASE. That gives us at most 64K LPIs to
+ * We allocate memory for PROPBASE to cover 2 ^ lpi_id_bits LPIs to
  * deal with (one configuration byte per interrupt). PENDBASE has to
  * be 64kB aligned (one bit per LPI, plus 8192 bits for SPI/PPI/SGI).
  */
-#define LPI_PROPBASE_SZ		SZ_64K
-#define LPI_PENDBASE_SZ		(LPI_PROPBASE_SZ / 8 + SZ_1K)
-
-/*
- * This is how many bits of ID we need, including the useless ones.
- */
-#define LPI_NRBITS		ilog2(LPI_PROPBASE_SZ + SZ_8K)
+#define LPI_NRBITS		lpi_id_bits
+#define LPI_PROPBASE_SZ		ALIGN(BIT(LPI_NRBITS), SZ_64K)
+#define LPI_PENDBASE_SZ		ALIGN(BIT(LPI_NRBITS) / 8, SZ_64K)
 
 #define LPI_PROP_DEFAULT_PRIO	0xa0
 
@@ -804,6 +805,7 @@ static int __init its_alloc_lpi_tables(void)
 {
 	phys_addr_t paddr;
 
+	lpi_id_bits = min_t(u32, gic_rdists->id_bits, ITS_MAX_LPI_NRBITS);
 	gic_rdists->prop_page = alloc_pages(GFP_NOWAIT,
 					   get_order(LPI_PROPBASE_SZ));
 	if (!gic_rdists->prop_page) {
@@ -822,7 +824,7 @@ static int __init its_alloc_lpi_tables(void)
 	/* Make sure the GIC will observe the written configuration */
 	gic_flush_dcache_to_poc(page_address(gic_rdists->prop_page), LPI_PROPBASE_SZ);
 
-	return 0;
+	return its_lpi_init(lpi_id_bits);
 }
 
 static const char *its_base_type_string[] = {
@@ -1097,7 +1099,7 @@ static void its_cpu_init_lpis(void)
 		 * hence the 'max(LPI_PENDBASE_SZ, SZ_64K)' below.
 		 */
 		pend_page = alloc_pages(GFP_NOWAIT | __GFP_ZERO,
-					get_order(max(LPI_PENDBASE_SZ, SZ_64K)));
+					get_order(max_t(u32, LPI_PENDBASE_SZ, SZ_64K)));
 		if (!pend_page) {
 			pr_err("Failed to allocate PENDBASE for CPU%d\n",
 			       smp_processor_id());
@@ -1661,7 +1663,7 @@ static int its_init_domain(struct fwnode_handle *handle, struct its_node *its)
 	}
 
 	inner_domain->parent = its_parent;
-	inner_domain->bus_token = DOMAIN_BUS_NEXUS;
+	irq_domain_update_bus_token(inner_domain, DOMAIN_BUS_NEXUS);
 	inner_domain->flags |= IRQ_DOMAIN_FLAG_MSI_REMAP;
 	info->ops = &its_msi_domain_ops;
 	info->data = its;
@@ -1801,7 +1803,7 @@ int its_cpu_init(void)
 	return 0;
 }
 
-static struct of_device_id its_device_id[] = {
+static const struct of_device_id its_device_id[] = {
 	{	.compatible	= "arm,gic-v3-its",	},
 	{},
 };
@@ -1833,6 +1835,78 @@ static int __init its_of_probe(struct device_node *node)
 
 #define ACPI_GICV3_ITS_MEM_SIZE (SZ_128K)
 
+#if defined(CONFIG_ACPI_NUMA) && (ACPI_CA_VERSION >= 0x20170531)
+struct its_srat_map {
+	/* numa node id */
+	u32	numa_node;
+	/* GIC ITS ID */
+	u32	its_id;
+};
+
+static struct its_srat_map its_srat_maps[MAX_NUMNODES] __initdata;
+static int its_in_srat __initdata;
+
+static int __init acpi_get_its_numa_node(u32 its_id)
+{
+	int i;
+
+	for (i = 0; i < its_in_srat; i++) {
+		if (its_id == its_srat_maps[i].its_id)
+			return its_srat_maps[i].numa_node;
+	}
+	return NUMA_NO_NODE;
+}
+
+static int __init gic_acpi_parse_srat_its(struct acpi_subtable_header *header,
+			 const unsigned long end)
+{
+	int node;
+	struct acpi_srat_gic_its_affinity *its_affinity;
+
+	its_affinity = (struct acpi_srat_gic_its_affinity *)header;
+	if (!its_affinity)
+		return -EINVAL;
+
+	if (its_affinity->header.length < sizeof(*its_affinity)) {
+		pr_err("SRAT: Invalid header length %d in ITS affinity\n",
+			its_affinity->header.length);
+		return -EINVAL;
+	}
+
+	if (its_in_srat >= MAX_NUMNODES) {
+		pr_err("SRAT: ITS affinity exceeding max count[%d]\n",
+				MAX_NUMNODES);
+		return -EINVAL;
+	}
+
+	node = acpi_map_pxm_to_node(its_affinity->proximity_domain);
+
+	if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) {
+		pr_err("SRAT: Invalid NUMA node %d in ITS affinity\n", node);
+		return 0;
+	}
+
+	its_srat_maps[its_in_srat].numa_node = node;
+	its_srat_maps[its_in_srat].its_id = its_affinity->its_id;
+	its_in_srat++;
+	pr_info("SRAT: PXM %d -> ITS %d -> Node %d\n",
+		its_affinity->proximity_domain, its_affinity->its_id, node);
+
+	return 0;
+}
+
+static void __init acpi_table_parse_srat_its(void)
+{
+	acpi_table_parse_entries(ACPI_SIG_SRAT,
+			sizeof(struct acpi_table_srat),
+			ACPI_SRAT_TYPE_GIC_ITS_AFFINITY,
+			gic_acpi_parse_srat_its, 0);
+}
+#else
+static void __init acpi_table_parse_srat_its(void)	{ }
+static int __init acpi_get_its_numa_node(u32 its_id) { return NUMA_NO_NODE; }
+#endif
+
 static int __init gic_acpi_parse_madt_its(struct acpi_subtable_header *header,
 					  const unsigned long end)
 {
@@ -1861,7 +1935,8 @@ static int __init gic_acpi_parse_madt_its(struct acpi_subtable_header *header,
 		goto dom_err;
 	}
 
-	err = its_probe_one(&res, dom_handle, NUMA_NO_NODE);
+	err = its_probe_one(&res, dom_handle,
+			acpi_get_its_numa_node(its_entry->translation_id));
 	if (!err)
 		return 0;
 
@@ -1873,6 +1948,7 @@ dom_err:
 
 static void __init its_acpi_probe(void)
 {
+	acpi_table_parse_srat_its();
 	acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR,
 			      gic_acpi_parse_madt_its, 0);
 }
@@ -1898,8 +1974,5 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
 	}
 
 	gic_rdists = rdists;
-	its_alloc_lpi_tables();
-	its_lpi_init(rdists->id_bits);
-
-	return 0;
+	return its_alloc_lpi_tables();
 }

+ 3 - 0
drivers/irqchip/irq-gic-v3.c

@@ -645,6 +645,9 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 	int enabled;
 	u64 val;
 
+	if (cpu >= nr_cpu_ids)
+		return -EINVAL;
+
 	if (gic_irq_in_rdist(d))
 		return -EINVAL;
 

+ 1 - 1
drivers/irqchip/irq-i8259.c

@@ -307,7 +307,7 @@ static int i8259A_irq_domain_map(struct irq_domain *d, unsigned int virq,
 	return 0;
 }
 
-static struct irq_domain_ops i8259A_ops = {
+static const struct irq_domain_ops i8259A_ops = {
 	.map = i8259A_irq_domain_map,
 	.xlate = irq_domain_xlate_onecell,
 };

+ 1 - 1
drivers/irqchip/irq-imx-gpcv2.c

@@ -200,7 +200,7 @@ static int imx_gpcv2_domain_alloc(struct irq_domain *domain,
 					    &parent_fwspec);
 }
 
-static struct irq_domain_ops gpcv2_irqchip_data_domain_ops = {
+static const struct irq_domain_ops gpcv2_irqchip_data_domain_ops = {
 	.translate	= imx_gpcv2_domain_translate,
 	.alloc		= imx_gpcv2_domain_alloc,
 	.free		= irq_domain_free_irqs_common,

+ 1 - 1
drivers/irqchip/irq-mbigen.c

@@ -228,7 +228,7 @@ static int mbigen_irq_domain_alloc(struct irq_domain *domain,
 	return 0;
 }
 
-static struct irq_domain_ops mbigen_domain_ops = {
+static const struct irq_domain_ops mbigen_domain_ops = {
 	.translate	= mbigen_domain_translate,
 	.alloc		= mbigen_irq_domain_alloc,
 	.free		= irq_domain_free_irqs_common,

+ 1 - 1
drivers/irqchip/irq-mips-cpu.c

@@ -240,7 +240,7 @@ static void mips_cpu_register_ipi_domain(struct device_node *of_node)
 					      ipi_domain_state);
 	if (!ipi_domain)
 		panic("Failed to add MIPS CPU IPI domain");
-	ipi_domain->bus_token = DOMAIN_BUS_IPI;
+	irq_domain_update_bus_token(ipi_domain, DOMAIN_BUS_IPI);
 }
 
 #else /* !CONFIG_GENERIC_IRQ_IPI */

+ 2 - 2
drivers/irqchip/irq-mips-gic.c

@@ -874,7 +874,7 @@ int gic_ipi_domain_match(struct irq_domain *d, struct device_node *node,
 	}
 }
 
-static struct irq_domain_ops gic_ipi_domain_ops = {
+static const struct irq_domain_ops gic_ipi_domain_ops = {
 	.xlate = gic_ipi_domain_xlate,
 	.alloc = gic_ipi_domain_alloc,
 	.free = gic_ipi_domain_free,
@@ -960,7 +960,7 @@ static void __init __gic_init(unsigned long gic_base_addr,
 		panic("Failed to add GIC IPI domain");
 
 	gic_ipi_domain->name = "mips-gic-ipi";
-	gic_ipi_domain->bus_token = DOMAIN_BUS_IPI;
+	irq_domain_update_bus_token(gic_ipi_domain, DOMAIN_BUS_IPI);
 
 	if (node &&
 	    !of_property_read_u32_array(node, "mti,reserved-ipi-vectors", v, 2)) {

+ 279 - 0
drivers/irqchip/irq-mvebu-gicp.c

@@ -0,0 +1,279 @@
+/*
+ * Copyright (C) 2017 Marvell
+ *
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+#include "irq-mvebu-gicp.h"
+
+#define GICP_SETSPI_NSR_OFFSET	0x0
+#define GICP_CLRSPI_NSR_OFFSET	0x8
+
+struct mvebu_gicp_spi_range {
+	unsigned int start;
+	unsigned int count;
+};
+
+struct mvebu_gicp {
+	struct mvebu_gicp_spi_range *spi_ranges;
+	unsigned int spi_ranges_cnt;
+	unsigned int spi_cnt;
+	unsigned long *spi_bitmap;
+	spinlock_t spi_lock;
+	struct resource *res;
+	struct device *dev;
+};
+
+static int gicp_idx_to_spi(struct mvebu_gicp *gicp, int idx)
+{
+	int i;
+
+	for (i = 0; i < gicp->spi_ranges_cnt; i++) {
+		struct mvebu_gicp_spi_range *r = &gicp->spi_ranges[i];
+
+		if (idx < r->count)
+			return r->start + idx;
+
+		idx -= r->count;
+	}
+
+	return -EINVAL;
+}
+
+int mvebu_gicp_get_doorbells(struct device_node *dn, phys_addr_t *setspi,
+			     phys_addr_t *clrspi)
+{
+	struct platform_device *pdev;
+	struct mvebu_gicp *gicp;
+
+	pdev = of_find_device_by_node(dn);
+	if (!pdev)
+		return -ENODEV;
+
+	gicp = platform_get_drvdata(pdev);
+	if (!gicp)
+		return -ENODEV;
+
+	*setspi = gicp->res->start + GICP_SETSPI_NSR_OFFSET;
+	*clrspi = gicp->res->start + GICP_CLRSPI_NSR_OFFSET;
+
+	return 0;
+}
+
+static void gicp_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct mvebu_gicp *gicp = data->chip_data;
+	phys_addr_t setspi = gicp->res->start + GICP_SETSPI_NSR_OFFSET;
+
+	msg->data = data->hwirq;
+	msg->address_lo = lower_32_bits(setspi);
+	msg->address_hi = upper_32_bits(setspi);
+}
+
+static struct irq_chip gicp_irq_chip = {
+	.name			= "GICP",
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_set_affinity	= irq_chip_set_affinity_parent,
+	.irq_set_type		= irq_chip_set_type_parent,
+	.irq_compose_msi_msg	= gicp_compose_msi_msg,
+};
+
+static int gicp_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				 unsigned int nr_irqs, void *args)
+{
+	struct mvebu_gicp *gicp = domain->host_data;
+	struct irq_fwspec fwspec;
+	unsigned int hwirq;
+	int ret;
+
+	spin_lock(&gicp->spi_lock);
+	hwirq = find_first_zero_bit(gicp->spi_bitmap, gicp->spi_cnt);
+	if (hwirq == gicp->spi_cnt) {
+		spin_unlock(&gicp->spi_lock);
+		return -ENOSPC;
+	}
+	__set_bit(hwirq, gicp->spi_bitmap);
+	spin_unlock(&gicp->spi_lock);
+
+	fwspec.fwnode = domain->parent->fwnode;
+	fwspec.param_count = 3;
+	fwspec.param[0] = GIC_SPI;
+	fwspec.param[1] = gicp_idx_to_spi(gicp, hwirq) - 32;
+	/*
+	 * Assume edge rising for now, it will be properly set when
+	 * ->set_type() is called
+	 */
+	fwspec.param[2] = IRQ_TYPE_EDGE_RISING;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
+	if (ret) {
+		dev_err(gicp->dev, "Cannot allocate parent IRQ\n");
+		goto free_hwirq;
+	}
+
+	ret = irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
+					    &gicp_irq_chip, gicp);
+	if (ret)
+		goto free_irqs_parent;
+
+	return 0;
+
+free_irqs_parent:
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+free_hwirq:
+	spin_lock(&gicp->spi_lock);
+	__clear_bit(hwirq, gicp->spi_bitmap);
+	spin_unlock(&gicp->spi_lock);
+	return ret;
+}
+
+static void gicp_irq_domain_free(struct irq_domain *domain,
+				 unsigned int virq, unsigned int nr_irqs)
+{
+	struct mvebu_gicp *gicp = domain->host_data;
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+
+	if (d->hwirq >= gicp->spi_cnt) {
+		dev_err(gicp->dev, "Invalid hwirq %lu\n", d->hwirq);
+		return;
+	}
+
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+
+	spin_lock(&gicp->spi_lock);
+	__clear_bit(d->hwirq, gicp->spi_bitmap);
+	spin_unlock(&gicp->spi_lock);
+}
+
+static const struct irq_domain_ops gicp_domain_ops = {
+	.alloc	= gicp_irq_domain_alloc,
+	.free	= gicp_irq_domain_free,
+};
+
+static struct irq_chip gicp_msi_irq_chip = {
+	.name		= "GICP",
+	.irq_set_type	= irq_chip_set_type_parent,
+};
+
+static struct msi_domain_ops gicp_msi_ops = {
+};
+
+static struct msi_domain_info gicp_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS),
+	.ops	= &gicp_msi_ops,
+	.chip	= &gicp_msi_irq_chip,
+};
+
+static int mvebu_gicp_probe(struct platform_device *pdev)
+{
+	struct mvebu_gicp *gicp;
+	struct irq_domain *inner_domain, *plat_domain, *parent_domain;
+	struct device_node *node = pdev->dev.of_node;
+	struct device_node *irq_parent_dn;
+	int ret, i;
+
+	gicp = devm_kzalloc(&pdev->dev, sizeof(*gicp), GFP_KERNEL);
+	if (!gicp)
+		return -ENOMEM;
+
+	gicp->dev = &pdev->dev;
+
+	gicp->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!gicp->res)
+		return -ENODEV;
+
+	ret = of_property_count_u32_elems(node, "marvell,spi-ranges");
+	if (ret < 0)
+		return ret;
+
+	gicp->spi_ranges_cnt = ret / 2;
+
+	gicp->spi_ranges =
+		devm_kzalloc(&pdev->dev,
+			     gicp->spi_ranges_cnt *
+			     sizeof(struct mvebu_gicp_spi_range),
+			     GFP_KERNEL);
+	if (!gicp->spi_ranges)
+		return -ENOMEM;
+
+	for (i = 0; i < gicp->spi_ranges_cnt; i++) {
+		of_property_read_u32_index(node, "marvell,spi-ranges",
+					   i * 2,
+					   &gicp->spi_ranges[i].start);
+
+		of_property_read_u32_index(node, "marvell,spi-ranges",
+					   i * 2 + 1,
+					   &gicp->spi_ranges[i].count);
+
+		gicp->spi_cnt += gicp->spi_ranges[i].count;
+	}
+
+	gicp->spi_bitmap = devm_kzalloc(&pdev->dev,
+				BITS_TO_LONGS(gicp->spi_cnt) * sizeof(long),
+				GFP_KERNEL);
+	if (!gicp->spi_bitmap)
+		return -ENOMEM;
+
+	irq_parent_dn = of_irq_find_parent(node);
+	if (!irq_parent_dn) {
+		dev_err(&pdev->dev, "failed to find parent IRQ node\n");
+		return -ENODEV;
+	}
+
+	parent_domain = irq_find_host(irq_parent_dn);
+	if (!parent_domain) {
+		dev_err(&pdev->dev, "failed to find parent IRQ domain\n");
+		return -ENODEV;
+	}
+
+	inner_domain = irq_domain_create_hierarchy(parent_domain, 0,
+						   gicp->spi_cnt,
+						   of_node_to_fwnode(node),
+						   &gicp_domain_ops, gicp);
+	if (!inner_domain)
+		return -ENOMEM;
+
+
+	plat_domain = platform_msi_create_irq_domain(of_node_to_fwnode(node),
+						     &gicp_msi_domain_info,
+						     inner_domain);
+	if (!plat_domain) {
+		irq_domain_remove(inner_domain);
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, gicp);
+
+	return 0;
+}
+
+static const struct of_device_id mvebu_gicp_of_match[] = {
+	{ .compatible = "marvell,ap806-gicp", },
+	{},
+};
+
+static struct platform_driver mvebu_gicp_driver = {
+	.probe  = mvebu_gicp_probe,
+	.driver = {
+		.name = "mvebu-gicp",
+		.of_match_table = mvebu_gicp_of_match,
+	},
+};
+builtin_platform_driver(mvebu_gicp_driver);

+ 11 - 0
drivers/irqchip/irq-mvebu-gicp.h

@@ -0,0 +1,11 @@
+#ifndef __MVEBU_GICP_H__
+#define __MVEBU_GICP_H__
+
+#include <linux/types.h>
+
+struct device_node;
+
+int mvebu_gicp_get_doorbells(struct device_node *dn, phys_addr_t *setspi,
+			     phys_addr_t *clrspi);
+
+#endif /* __MVEBU_GICP_H__ */

+ 289 - 0
drivers/irqchip/irq-mvebu-icu.c

@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2017 Marvell
+ *
+ * Hanna Hawa <hannah@marvell.com>
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+#include <dt-bindings/interrupt-controller/mvebu-icu.h>
+
+#include "irq-mvebu-gicp.h"
+
+/* ICU registers */
+#define ICU_SETSPI_NSR_AL	0x10
+#define ICU_SETSPI_NSR_AH	0x14
+#define ICU_CLRSPI_NSR_AL	0x18
+#define ICU_CLRSPI_NSR_AH	0x1c
+#define ICU_INT_CFG(x)          (0x100 + 4 * (x))
+#define   ICU_INT_ENABLE	BIT(24)
+#define   ICU_IS_EDGE		BIT(28)
+#define   ICU_GROUP_SHIFT	29
+
+/* ICU definitions */
+#define ICU_MAX_IRQS		207
+#define ICU_SATA0_ICU_ID	109
+#define ICU_SATA1_ICU_ID	107
+
+struct mvebu_icu {
+	struct irq_chip irq_chip;
+	void __iomem *base;
+	struct irq_domain *domain;
+	struct device *dev;
+};
+
+struct mvebu_icu_irq_data {
+	struct mvebu_icu *icu;
+	unsigned int icu_group;
+	unsigned int type;
+};
+
+static void mvebu_icu_write_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+	struct irq_data *d = irq_get_irq_data(desc->irq);
+	struct mvebu_icu_irq_data *icu_irqd = d->chip_data;
+	struct mvebu_icu *icu = icu_irqd->icu;
+	unsigned int icu_int;
+
+	if (msg->address_lo || msg->address_hi) {
+		/* Configure the ICU with irq number & type */
+		icu_int = msg->data | ICU_INT_ENABLE;
+		if (icu_irqd->type & IRQ_TYPE_EDGE_RISING)
+			icu_int |= ICU_IS_EDGE;
+		icu_int |= icu_irqd->icu_group << ICU_GROUP_SHIFT;
+	} else {
+		/* De-configure the ICU */
+		icu_int = 0;
+	}
+
+	writel_relaxed(icu_int, icu->base + ICU_INT_CFG(d->hwirq));
+
+	/*
+	 * The SATA unit has 2 ports, and a dedicated ICU entry per
+	 * port. The ahci sata driver supports only one irq interrupt
+	 * per SATA unit. To solve this conflict, we configure the 2
+	 * SATA wired interrupts in the south bridge into 1 GIC
+	 * interrupt in the north bridge. Even if only a single port
+	 * is enabled, if sata node is enabled, both interrupts are
+	 * configured (regardless of which port is actually in use).
+	 */
+	if (d->hwirq == ICU_SATA0_ICU_ID || d->hwirq == ICU_SATA1_ICU_ID) {
+		writel_relaxed(icu_int,
+			       icu->base + ICU_INT_CFG(ICU_SATA0_ICU_ID));
+		writel_relaxed(icu_int,
+			       icu->base + ICU_INT_CFG(ICU_SATA1_ICU_ID));
+	}
+}
+
+static int
+mvebu_icu_irq_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec,
+			       unsigned long *hwirq, unsigned int *type)
+{
+	struct mvebu_icu *icu = d->host_data;
+	unsigned int icu_group;
+
+	/* Check the count of the parameters in dt */
+	if (WARN_ON(fwspec->param_count < 3)) {
+		dev_err(icu->dev, "wrong ICU parameter count %d\n",
+			fwspec->param_count);
+		return -EINVAL;
+	}
+
+	/* Only ICU group type is handled */
+	icu_group = fwspec->param[0];
+	if (icu_group != ICU_GRP_NSR && icu_group != ICU_GRP_SR &&
+	    icu_group != ICU_GRP_SEI && icu_group != ICU_GRP_REI) {
+		dev_err(icu->dev, "wrong ICU group type %x\n", icu_group);
+		return -EINVAL;
+	}
+
+	*hwirq = fwspec->param[1];
+	if (*hwirq >= ICU_MAX_IRQS) {
+		dev_err(icu->dev, "invalid interrupt number %ld\n", *hwirq);
+		return -EINVAL;
+	}
+
+	/* Mask the type to prevent wrong DT configuration */
+	*type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK;
+
+	return 0;
+}
+
+static int
+mvebu_icu_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+			   unsigned int nr_irqs, void *args)
+{
+	int err;
+	unsigned long hwirq;
+	struct irq_fwspec *fwspec = args;
+	struct mvebu_icu *icu = platform_msi_get_host_data(domain);
+	struct mvebu_icu_irq_data *icu_irqd;
+
+	icu_irqd = kmalloc(sizeof(*icu_irqd), GFP_KERNEL);
+	if (!icu_irqd)
+		return -ENOMEM;
+
+	err = mvebu_icu_irq_domain_translate(domain, fwspec, &hwirq,
+					     &icu_irqd->type);
+	if (err) {
+		dev_err(icu->dev, "failed to translate ICU parameters\n");
+		goto free_irqd;
+	}
+
+	icu_irqd->icu_group = fwspec->param[0];
+	icu_irqd->icu = icu;
+
+	err = platform_msi_domain_alloc(domain, virq, nr_irqs);
+	if (err) {
+		dev_err(icu->dev, "failed to allocate ICU interrupt in parent domain\n");
+		goto free_irqd;
+	}
+
+	/* Make sure there is no interrupt left pending by the firmware */
+	err = irq_set_irqchip_state(virq, IRQCHIP_STATE_PENDING, false);
+	if (err)
+		goto free_msi;
+
+	err = irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
+					    &icu->irq_chip, icu_irqd);
+	if (err) {
+		dev_err(icu->dev, "failed to set the data to IRQ domain\n");
+		goto free_msi;
+	}
+
+	return 0;
+
+free_msi:
+	platform_msi_domain_free(domain, virq, nr_irqs);
+free_irqd:
+	kfree(icu_irqd);
+	return err;
+}
+
+static void
+mvebu_icu_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+			  unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_get_irq_data(virq);
+	struct mvebu_icu_irq_data *icu_irqd = d->chip_data;
+
+	kfree(icu_irqd);
+
+	platform_msi_domain_free(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops mvebu_icu_domain_ops = {
+	.translate = mvebu_icu_irq_domain_translate,
+	.alloc     = mvebu_icu_irq_domain_alloc,
+	.free      = mvebu_icu_irq_domain_free,
+};
+
+static int mvebu_icu_probe(struct platform_device *pdev)
+{
+	struct mvebu_icu *icu;
+	struct device_node *node = pdev->dev.of_node;
+	struct device_node *gicp_dn;
+	struct resource *res;
+	phys_addr_t setspi, clrspi;
+	u32 i, icu_int;
+	int ret;
+
+	icu = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_icu),
+			   GFP_KERNEL);
+	if (!icu)
+		return -ENOMEM;
+
+	icu->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	icu->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(icu->base)) {
+		dev_err(&pdev->dev, "Failed to map icu base address.\n");
+		return PTR_ERR(icu->base);
+	}
+
+	icu->irq_chip.name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
+					    "ICU.%x",
+					    (unsigned int)res->start);
+	if (!icu->irq_chip.name)
+		return -ENOMEM;
+
+	icu->irq_chip.irq_mask = irq_chip_mask_parent;
+	icu->irq_chip.irq_unmask = irq_chip_unmask_parent;
+	icu->irq_chip.irq_eoi = irq_chip_eoi_parent;
+	icu->irq_chip.irq_set_type = irq_chip_set_type_parent;
+#ifdef CONFIG_SMP
+	icu->irq_chip.irq_set_affinity = irq_chip_set_affinity_parent;
+#endif
+
+	/*
+	 * We're probed after MSI domains have been resolved, so force
+	 * resolution here.
+	 */
+	pdev->dev.msi_domain = of_msi_get_domain(&pdev->dev, node,
+						 DOMAIN_BUS_PLATFORM_MSI);
+	if (!pdev->dev.msi_domain)
+		return -EPROBE_DEFER;
+
+	gicp_dn = irq_domain_get_of_node(pdev->dev.msi_domain);
+	if (!gicp_dn)
+		return -ENODEV;
+
+	ret = mvebu_gicp_get_doorbells(gicp_dn, &setspi, &clrspi);
+	if (ret)
+		return ret;
+
+	/* Set Clear/Set ICU SPI message address in AP */
+	writel_relaxed(upper_32_bits(setspi), icu->base + ICU_SETSPI_NSR_AH);
+	writel_relaxed(lower_32_bits(setspi), icu->base + ICU_SETSPI_NSR_AL);
+	writel_relaxed(upper_32_bits(clrspi), icu->base + ICU_CLRSPI_NSR_AH);
+	writel_relaxed(lower_32_bits(clrspi), icu->base + ICU_CLRSPI_NSR_AL);
+
+	/*
+	 * Clean all ICU interrupts with type SPI_NSR, required to
+	 * avoid unpredictable SPI assignments done by firmware.
+	 */
+	for (i = 0 ; i < ICU_MAX_IRQS ; i++) {
+		icu_int = readl(icu->base + ICU_INT_CFG(i));
+		if ((icu_int >> ICU_GROUP_SHIFT) == ICU_GRP_NSR)
+			writel_relaxed(0x0, icu->base + ICU_INT_CFG(i));
+	}
+
+	icu->domain =
+		platform_msi_create_device_domain(&pdev->dev, ICU_MAX_IRQS,
+						  mvebu_icu_write_msg,
+						  &mvebu_icu_domain_ops, icu);
+	if (!icu->domain) {
+		dev_err(&pdev->dev, "Failed to create ICU domain\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id mvebu_icu_of_match[] = {
+	{ .compatible = "marvell,cp110-icu", },
+	{},
+};
+
+static struct platform_driver mvebu_icu_driver = {
+	.probe  = mvebu_icu_probe,
+	.driver = {
+		.name = "mvebu-icu",
+		.of_match_table = mvebu_icu_of_match,
+	},
+};
+builtin_platform_driver(mvebu_icu_driver);

+ 1 - 1
drivers/irqchip/irq-or1k-pic.c

@@ -70,7 +70,7 @@ static struct or1k_pic_dev or1k_pic_level = {
 		.name = "or1k-PIC-level",
 		.irq_unmask = or1k_pic_unmask,
 		.irq_mask = or1k_pic_mask,
-		.irq_mask_ack = or1k_pic_mask,
+		.irq_mask_ack = or1k_pic_mask_ack,
 	},
 	.handle = handle_level_irq,
 	.flags = IRQ_LEVEL | IRQ_NOPROBE,

+ 1 - 1
drivers/irqchip/irq-renesas-h8300h.c

@@ -67,7 +67,7 @@ static int irq_map(struct irq_domain *h, unsigned int virq,
        return 0;
 }
 
-static struct irq_domain_ops irq_ops = {
+static const struct irq_domain_ops irq_ops = {
        .map    = irq_map,
        .xlate  = irq_domain_xlate_onecell,
 };

+ 1 - 1
drivers/irqchip/irq-renesas-h8s.c

@@ -73,7 +73,7 @@ static __init int irq_map(struct irq_domain *h, unsigned int virq,
        return 0;
 }
 
-static struct irq_domain_ops irq_ops = {
+static const struct irq_domain_ops irq_ops = {
        .map    = irq_map,
        .xlate  = irq_domain_xlate_onecell,
 };

+ 54 - 14
drivers/irqchip/irq-sunxi-nmi.c

@@ -25,6 +25,29 @@
 
 #define SUNXI_NMI_SRC_TYPE_MASK	0x00000003
 
+#define SUNXI_NMI_IRQ_BIT	BIT(0)
+
+#define SUN6I_R_INTC_CTRL	0x0c
+#define SUN6I_R_INTC_PENDING	0x10
+#define SUN6I_R_INTC_ENABLE	0x40
+
+/*
+ * For deprecated sun6i-a31-sc-nmi compatible.
+ * Registers are offset by 0x0c.
+ */
+#define SUN6I_R_INTC_NMI_OFFSET	0x0c
+#define SUN6I_NMI_CTRL		(SUN6I_R_INTC_CTRL - SUN6I_R_INTC_NMI_OFFSET)
+#define SUN6I_NMI_PENDING	(SUN6I_R_INTC_PENDING - SUN6I_R_INTC_NMI_OFFSET)
+#define SUN6I_NMI_ENABLE	(SUN6I_R_INTC_ENABLE - SUN6I_R_INTC_NMI_OFFSET)
+
+#define SUN7I_NMI_CTRL		0x00
+#define SUN7I_NMI_PENDING	0x04
+#define SUN7I_NMI_ENABLE	0x08
+
+#define SUN9I_NMI_CTRL		0x00
+#define SUN9I_NMI_ENABLE	0x04
+#define SUN9I_NMI_PENDING	0x08
+
 enum {
 	SUNXI_SRC_TYPE_LEVEL_LOW = 0,
 	SUNXI_SRC_TYPE_EDGE_FALLING,
@@ -38,22 +61,28 @@ struct sunxi_sc_nmi_reg_offs {
 	u32 enable;
 };
 
-static struct sunxi_sc_nmi_reg_offs sun7i_reg_offs = {
-	.ctrl	= 0x00,
-	.pend	= 0x04,
-	.enable	= 0x08,
+static const struct sunxi_sc_nmi_reg_offs sun6i_r_intc_reg_offs __initconst = {
+	.ctrl	= SUN6I_R_INTC_CTRL,
+	.pend	= SUN6I_R_INTC_PENDING,
+	.enable	= SUN6I_R_INTC_ENABLE,
 };
 
-static struct sunxi_sc_nmi_reg_offs sun6i_reg_offs = {
-	.ctrl	= 0x00,
-	.pend	= 0x04,
-	.enable	= 0x34,
+static const struct sunxi_sc_nmi_reg_offs sun6i_reg_offs __initconst = {
+	.ctrl	= SUN6I_NMI_CTRL,
+	.pend	= SUN6I_NMI_PENDING,
+	.enable	= SUN6I_NMI_ENABLE,
 };
 
-static struct sunxi_sc_nmi_reg_offs sun9i_reg_offs = {
-	.ctrl	= 0x00,
-	.pend	= 0x08,
-	.enable	= 0x04,
+static const struct sunxi_sc_nmi_reg_offs sun7i_reg_offs __initconst = {
+	.ctrl	= SUN7I_NMI_CTRL,
+	.pend	= SUN7I_NMI_PENDING,
+	.enable	= SUN7I_NMI_ENABLE,
+};
+
+static const struct sunxi_sc_nmi_reg_offs sun9i_reg_offs __initconst = {
+	.ctrl	= SUN9I_NMI_CTRL,
+	.pend	= SUN9I_NMI_PENDING,
+	.enable	= SUN9I_NMI_ENABLE,
 };
 
 static inline void sunxi_sc_nmi_write(struct irq_chip_generic *gc, u32 off,
@@ -128,7 +157,7 @@ static int sunxi_sc_nmi_set_type(struct irq_data *data, unsigned int flow_type)
 }
 
 static int __init sunxi_sc_nmi_irq_init(struct device_node *node,
-					struct sunxi_sc_nmi_reg_offs *reg_offs)
+					const struct sunxi_sc_nmi_reg_offs *reg_offs)
 {
 	struct irq_domain *domain;
 	struct irq_chip_generic *gc;
@@ -187,8 +216,11 @@ static int __init sunxi_sc_nmi_irq_init(struct device_node *node,
 	gc->chip_types[1].regs.type		= reg_offs->ctrl;
 	gc->chip_types[1].handler		= handle_edge_irq;
 
+	/* Disable any active interrupts */
 	sunxi_sc_nmi_write(gc, reg_offs->enable, 0);
-	sunxi_sc_nmi_write(gc, reg_offs->pend, 0x1);
+
+	/* Clear any pending NMI interrupts */
+	sunxi_sc_nmi_write(gc, reg_offs->pend, SUNXI_NMI_IRQ_BIT);
 
 	irq_set_chained_handler_and_data(irq, sunxi_sc_nmi_handle_irq, domain);
 
@@ -200,6 +232,14 @@ fail_irqd_remove:
 	return ret;
 }
 
+static int __init sun6i_r_intc_irq_init(struct device_node *node,
+					struct device_node *parent)
+{
+	return sunxi_sc_nmi_irq_init(node, &sun6i_r_intc_reg_offs);
+}
+IRQCHIP_DECLARE(sun6i_r_intc, "allwinner,sun6i-a31-r-intc",
+		sun6i_r_intc_irq_init);
+
 static int __init sun6i_sc_nmi_irq_init(struct device_node *node,
 					struct device_node *parent)
 {

+ 1 - 6
drivers/irqchip/qcom-irq-combiner.c

@@ -288,9 +288,4 @@ static struct platform_driver qcom_irq_combiner_probe = {
 	},
 	.probe = combiner_probe,
 };
-
-static int __init register_qcom_irq_combiner(void)
-{
-	return platform_driver_register(&qcom_irq_combiner_probe);
-}
-device_initcall(register_qcom_irq_combiner);
+builtin_platform_driver(qcom_irq_combiner_probe);

+ 1 - 1
drivers/nvme/host/pci.c

@@ -1695,7 +1695,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	int result, nr_io_queues;
 	unsigned long size;
 
-	nr_io_queues = num_online_cpus();
+	nr_io_queues = num_present_cpus();
 	result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
 	if (result < 0)
 		return result;

+ 7 - 1
drivers/pci/host/vmd.c

@@ -554,6 +554,7 @@ static int vmd_find_free_domain(void)
 static int vmd_enable_domain(struct vmd_dev *vmd)
 {
 	struct pci_sysdata *sd = &vmd->sysdata;
+	struct fwnode_handle *fn;
 	struct resource *res;
 	u32 upper_bits;
 	unsigned long flags;
@@ -617,8 +618,13 @@ static int vmd_enable_domain(struct vmd_dev *vmd)
 
 	sd->node = pcibus_to_node(vmd->dev->bus);
 
-	vmd->irq_domain = pci_msi_create_irq_domain(NULL, &vmd_msi_domain_info,
+	fn = irq_domain_alloc_named_id_fwnode("VMD-MSI", vmd->sysdata.domain);
+	if (!fn)
+		return -ENODEV;
+
+	vmd->irq_domain = pci_msi_create_irq_domain(fn, &vmd_msi_domain_info,
 						    x86_vector_domain);
+	irq_domain_free_fwnode(fn);
 	if (!vmd->irq_domain)
 		return -ENODEV;
 

+ 1 - 1
drivers/pci/msi.c

@@ -1463,7 +1463,7 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode,
 	if (!domain)
 		return NULL;
 
-	domain->bus_token = DOMAIN_BUS_PCI_MSI;
+	irq_domain_update_bus_token(domain, DOMAIN_BUS_PCI_MSI);
 	return domain;
 }
 EXPORT_SYMBOL_GPL(pci_msi_create_irq_domain);

+ 1 - 1
drivers/staging/fsl-mc/bus/fsl-mc-msi.c

@@ -170,7 +170,7 @@ struct irq_domain *fsl_mc_msi_create_irq_domain(struct fwnode_handle *fwnode,
 
 	domain = msi_create_irq_domain(fwnode, info, parent);
 	if (domain)
-		domain->bus_token = DOMAIN_BUS_FSL_MC_MSI;
+		irq_domain_update_bus_token(domain, DOMAIN_BUS_FSL_MC_MSI);
 
 	return domain;
 }

+ 5 - 1
drivers/xen/events/events_base.c

@@ -1343,8 +1343,12 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
 			    bool force)
 {
 	unsigned tcpu = cpumask_first_and(dest, cpu_online_mask);
+	int ret = rebind_irq_to_cpu(data->irq, tcpu);
 
-	return rebind_irq_to_cpu(data->irq, tcpu);
+	if (!ret)
+		irq_data_update_effective_affinity(data, cpumask_of(tcpu));
+
+	return ret;
 }
 
 static void enable_dynirq(struct irq_data *data)

+ 15 - 0
include/dt-bindings/interrupt-controller/mvebu-icu.h

@@ -0,0 +1,15 @@
+/*
+ * This header provides constants for the MVEBU ICU driver.
+ */
+
+#ifndef _DT_BINDINGS_INTERRUPT_CONTROLLER_MVEBU_ICU_H
+#define _DT_BINDINGS_INTERRUPT_CONTROLLER_MVEBU_ICU_H
+
+/* interrupt specifier cell 0 */
+
+#define ICU_GRP_NSR		0x0
+#define ICU_GRP_SR		0x1
+#define ICU_GRP_SEI		0x4
+#define ICU_GRP_REI		0x5
+
+#endif

+ 1 - 1
include/linux/cpuhotplug.h

@@ -58,7 +58,6 @@ enum cpuhp_state {
 	CPUHP_XEN_EVTCHN_PREPARE,
 	CPUHP_ARM_SHMOBILE_SCU_PREPARE,
 	CPUHP_SH_SH3X_PREPARE,
-	CPUHP_BLK_MQ_PREPARE,
 	CPUHP_NET_FLOW_PREPARE,
 	CPUHP_TOPOLOGY_PREPARE,
 	CPUHP_NET_IUCV_PREPARE,
@@ -124,6 +123,7 @@ enum cpuhp_state {
 	CPUHP_AP_ONLINE_IDLE,
 	CPUHP_AP_SMPBOOT_THREADS,
 	CPUHP_AP_X86_VDSO_VMA_ONLINE,
+	CPUHP_AP_IRQ_AFFINITY_ONLINE,
 	CPUHP_AP_PERF_ONLINE,
 	CPUHP_AP_PERF_X86_ONLINE,
 	CPUHP_AP_PERF_X86_UNCORE_ONLINE,

+ 6 - 0
include/linux/interrupt.h

@@ -703,6 +703,12 @@ static inline void init_irq_proc(void)
 }
 #endif
 
+#ifdef CONFIG_IRQ_TIMINGS
+void irq_timings_enable(void);
+void irq_timings_disable(void);
+u64 irq_timings_next_event(u64 now);
+#endif
+
 struct seq_file;
 int show_interrupts(struct seq_file *p, void *v);
 int arch_show_interrupts(struct seq_file *p, int prec);

+ 89 - 0
include/linux/irq.h

@@ -22,6 +22,7 @@
 #include <linux/topology.h>
 #include <linux/wait.h>
 #include <linux/io.h>
+#include <linux/slab.h>
 
 #include <asm/irq.h>
 #include <asm/ptrace.h>
@@ -136,6 +137,9 @@ struct irq_domain;
  * @affinity:		IRQ affinity on SMP. If this is an IPI
  *			related irq, then this is the mask of the
  *			CPUs to which an IPI can be sent.
+ * @effective_affinity:	The effective IRQ affinity on SMP as some irq
+ *			chips do not allow multi CPU destinations.
+ *			A subset of @affinity.
  * @msi_desc:		MSI descriptor
  * @ipi_offset:		Offset of first IPI target cpu in @affinity. Optional.
  */
@@ -147,6 +151,9 @@ struct irq_common_data {
 	void			*handler_data;
 	struct msi_desc		*msi_desc;
 	cpumask_var_t		affinity;
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+	cpumask_var_t		effective_affinity;
+#endif
 #ifdef CONFIG_GENERIC_IRQ_IPI
 	unsigned int		ipi_offset;
 #endif
@@ -199,6 +206,10 @@ struct irq_data {
  * IRQD_WAKEUP_ARMED		- Wakeup mode armed
  * IRQD_FORWARDED_TO_VCPU	- The interrupt is forwarded to a VCPU
  * IRQD_AFFINITY_MANAGED	- Affinity is auto-managed by the kernel
+ * IRQD_IRQ_STARTED		- Startup state of the interrupt
+ * IRQD_MANAGED_SHUTDOWN	- Interrupt was shutdown due to empty affinity
+ *				  mask. Applies only to affinity managed irqs.
+ * IRQD_SINGLE_TARGET		- IRQ allows only a single affinity target
  */
 enum {
 	IRQD_TRIGGER_MASK		= 0xf,
@@ -216,6 +227,9 @@ enum {
 	IRQD_WAKEUP_ARMED		= (1 << 19),
 	IRQD_FORWARDED_TO_VCPU		= (1 << 20),
 	IRQD_AFFINITY_MANAGED		= (1 << 21),
+	IRQD_IRQ_STARTED		= (1 << 22),
+	IRQD_MANAGED_SHUTDOWN		= (1 << 23),
+	IRQD_SINGLE_TARGET		= (1 << 24),
 };
 
 #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
@@ -264,6 +278,20 @@ static inline bool irqd_is_level_type(struct irq_data *d)
 	return __irqd_to_state(d) & IRQD_LEVEL;
 }
 
+/*
+ * Must only be called of irqchip.irq_set_affinity() or low level
+ * hieararchy domain allocation functions.
+ */
+static inline void irqd_set_single_target(struct irq_data *d)
+{
+	__irqd_to_state(d) |= IRQD_SINGLE_TARGET;
+}
+
+static inline bool irqd_is_single_target(struct irq_data *d)
+{
+	return __irqd_to_state(d) & IRQD_SINGLE_TARGET;
+}
+
 static inline bool irqd_is_wakeup_set(struct irq_data *d)
 {
 	return __irqd_to_state(d) & IRQD_WAKEUP_STATE;
@@ -329,6 +357,16 @@ static inline void irqd_clr_activated(struct irq_data *d)
 	__irqd_to_state(d) &= ~IRQD_ACTIVATED;
 }
 
+static inline bool irqd_is_started(struct irq_data *d)
+{
+	return __irqd_to_state(d) & IRQD_IRQ_STARTED;
+}
+
+static inline bool irqd_is_managed_and_shutdown(struct irq_data *d)
+{
+	return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN;
+}
+
 #undef __irqd_to_state
 
 static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
@@ -478,14 +516,21 @@ extern int irq_set_affinity_locked(struct irq_data *data,
 				   const struct cpumask *cpumask, bool force);
 extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info);
 
+#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_IRQ_MIGRATION)
 extern void irq_migrate_all_off_this_cpu(void);
+extern int irq_affinity_online_cpu(unsigned int cpu);
+#else
+# define irq_affinity_online_cpu	NULL
+#endif
 
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
 void irq_move_irq(struct irq_data *data);
 void irq_move_masked_irq(struct irq_data *data);
+void irq_force_complete_move(struct irq_desc *desc);
 #else
 static inline void irq_move_irq(struct irq_data *data) { }
 static inline void irq_move_masked_irq(struct irq_data *data) { }
+static inline void irq_force_complete_move(struct irq_desc *desc) { }
 #endif
 
 extern int no_irq_affinity;
@@ -727,6 +772,29 @@ static inline struct cpumask *irq_data_get_affinity_mask(struct irq_data *d)
 	return d->common->affinity;
 }
 
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+static inline
+struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
+{
+	return d->common->effective_affinity;
+}
+static inline void irq_data_update_effective_affinity(struct irq_data *d,
+						      const struct cpumask *m)
+{
+	cpumask_copy(d->common->effective_affinity, m);
+}
+#else
+static inline void irq_data_update_effective_affinity(struct irq_data *d,
+						      const struct cpumask *m)
+{
+}
+static inline
+struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
+{
+	return d->common->affinity;
+}
+#endif
+
 unsigned int arch_dynirq_lower_bound(unsigned int from);
 
 int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
@@ -951,6 +1019,14 @@ int irq_setup_alt_chip(struct irq_data *d, unsigned int type);
 void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
 			     unsigned int clr, unsigned int set);
 
+struct irq_chip_generic *
+devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct,
+			    unsigned int irq_base, void __iomem *reg_base,
+			    irq_flow_handler_t handler);
+int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc,
+				u32 msk, enum irq_gc_flags flags,
+				unsigned int clr, unsigned int set);
+
 struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq);
 
 int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
@@ -967,6 +1043,19 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
 					 handler, clr, set, flags);	\
 })
 
+static inline void irq_free_generic_chip(struct irq_chip_generic *gc)
+{
+	kfree(gc);
+}
+
+static inline void irq_destroy_generic_chip(struct irq_chip_generic *gc,
+					    u32 msk, unsigned int clr,
+					    unsigned int set)
+{
+	irq_remove_generic_chip(gc, msk, clr, set);
+	irq_free_generic_chip(gc);
+}
+
 static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
 {
 	return container_of(d->chip, struct irq_chip_type, chip);

+ 4 - 0
include/linux/irqdesc.h

@@ -46,6 +46,7 @@ struct pt_regs;
  * @rcu:		rcu head for delayed free
  * @kobj:		kobject used to represent this struct in sysfs
  * @dir:		/proc/irq/ procfs entry
+ * @debugfs_file:	dentry for the debugfs file
  * @name:		flow handler name for /proc/interrupts output
  */
 struct irq_desc {
@@ -88,6 +89,9 @@ struct irq_desc {
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry	*dir;
 #endif
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+	struct dentry		*debugfs_file;
+#endif
 #ifdef CONFIG_SPARSE_IRQ
 	struct rcu_head		rcu;
 	struct kobject		kobj;

+ 39 - 4
include/linux/irqdomain.h

@@ -130,6 +130,7 @@ struct irq_domain_chip_generic;
  * @host_data: private data pointer for use by owner.  Not touched by irq_domain
  *             core code.
  * @flags: host per irq_domain flags
+ * @mapcount: The number of mapped interrupts
  *
  * Optional elements
  * @of_node: Pointer to device tree nodes associated with the irq_domain. Used
@@ -138,6 +139,7 @@ struct irq_domain_chip_generic;
  *      setting up one or more generic chips for interrupt controllers
  *      drivers using the generic chip library which uses this pointer.
  * @parent: Pointer to parent irq_domain to support hierarchy irq_domains
+ * @debugfs_file: dentry for the domain debugfs file
  *
  * Revmap data, used internally by irq_domain
  * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that
@@ -152,6 +154,7 @@ struct irq_domain {
 	const struct irq_domain_ops *ops;
 	void *host_data;
 	unsigned int flags;
+	unsigned int mapcount;
 
 	/* Optional data */
 	struct fwnode_handle *fwnode;
@@ -160,6 +163,9 @@ struct irq_domain {
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 	struct irq_domain *parent;
 #endif
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+	struct dentry		*debugfs_file;
+#endif
 
 	/* reverse map data. The linear map gets appended to the irq_domain */
 	irq_hw_number_t hwirq_max;
@@ -174,8 +180,8 @@ enum {
 	/* Irq domain is hierarchical */
 	IRQ_DOMAIN_FLAG_HIERARCHY	= (1 << 0),
 
-	/* Core calls alloc/free recursive through the domain hierarchy. */
-	IRQ_DOMAIN_FLAG_AUTO_RECURSIVE	= (1 << 1),
+	/* Irq domain name was allocated in __irq_domain_add() */
+	IRQ_DOMAIN_NAME_ALLOCATED	= (1 << 6),
 
 	/* Irq domain is an IPI domain with virq per cpu */
 	IRQ_DOMAIN_FLAG_IPI_PER_CPU	= (1 << 2),
@@ -203,7 +209,33 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d)
 }
 
 #ifdef CONFIG_IRQ_DOMAIN
-struct fwnode_handle *irq_domain_alloc_fwnode(void *data);
+struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id,
+						const char *name, void *data);
+
+enum {
+	IRQCHIP_FWNODE_REAL,
+	IRQCHIP_FWNODE_NAMED,
+	IRQCHIP_FWNODE_NAMED_ID,
+};
+
+static inline
+struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name)
+{
+	return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED, 0, name, NULL);
+}
+
+static inline
+struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id)
+{
+	return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED_ID, id, name,
+					 NULL);
+}
+
+static inline struct fwnode_handle *irq_domain_alloc_fwnode(void *data)
+{
+	return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_REAL, 0, NULL, data);
+}
+
 void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
 struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 				    irq_hw_number_t hwirq_max, int direct_max,
@@ -238,6 +270,9 @@ static inline bool is_fwnode_irqchip(struct fwnode_handle *fwnode)
 	return fwnode && fwnode->type == FWNODE_IRQCHIP;
 }
 
+extern void irq_domain_update_bus_token(struct irq_domain *domain,
+					enum irq_domain_bus_token bus_token);
+
 static inline
 struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode,
 					    enum irq_domain_bus_token bus_token)
@@ -410,7 +445,7 @@ static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
 				       NULL);
 }
 
-extern int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
+extern int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
 					   unsigned int irq_base,
 					   unsigned int nr_irqs, void *arg);
 extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,

+ 5 - 0
kernel/cpu.c

@@ -1252,6 +1252,11 @@ static struct cpuhp_step cpuhp_ap_states[] = {
 		.startup.single		= smpboot_unpark_threads,
 		.teardown.single	= NULL,
 	},
+	[CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
+		.name			= "irq/affinity:online",
+		.startup.single		= irq_affinity_online_cpu,
+		.teardown.single	= NULL,
+	},
 	[CPUHP_AP_PERF_ONLINE] = {
 		.name			= "perf:online",
 		.startup.single		= perf_event_init_cpu,

+ 18 - 0
kernel/irq/Kconfig

@@ -21,6 +21,10 @@ config GENERIC_IRQ_SHOW
 config GENERIC_IRQ_SHOW_LEVEL
        bool
 
+# Supports effective affinity mask
+config GENERIC_IRQ_EFFECTIVE_AFF_MASK
+       bool
+
 # Facility to allocate a hardware interrupt. This is legacy support
 # and should not be used in new code. Use irq domains instead.
 config GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
@@ -81,6 +85,9 @@ config GENERIC_MSI_IRQ_DOMAIN
 config HANDLE_DOMAIN_IRQ
 	bool
 
+config IRQ_TIMINGS
+	bool
+
 config IRQ_DOMAIN_DEBUG
 	bool "Expose hardware/virtual IRQ mapping via debugfs"
 	depends on IRQ_DOMAIN && DEBUG_FS
@@ -108,4 +115,15 @@ config SPARSE_IRQ
 
 	  If you don't know what to do here, say N.
 
+config GENERIC_IRQ_DEBUGFS
+	bool "Expose irq internals in debugfs"
+	depends on DEBUG_FS
+	default n
+	---help---
+
+	  Exposes internal state information through debugfs. Mostly for
+	  developers and debugging of hard to diagnose interrupt problems.
+
+	  If you don't know what to do here, say N.
+
 endmenu

+ 2 - 0
kernel/irq/Makefile

@@ -1,5 +1,6 @@
 
 obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o
+obj-$(CONFIG_IRQ_TIMINGS) += timings.o
 obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o
@@ -10,3 +11,4 @@ obj-$(CONFIG_PM_SLEEP) += pm.o
 obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o
 obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o
 obj-$(CONFIG_SMP) += affinity.o
+obj-$(CONFIG_GENERIC_IRQ_DEBUGFS) += debugfs.o

+ 63 - 13
kernel/irq/affinity.c

@@ -1,4 +1,7 @@
-
+/*
+ * Copyright (C) 2016 Thomas Gleixner.
+ * Copyright (C) 2016-2017 Christoph Hellwig.
+ */
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
@@ -35,13 +38,54 @@ static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
 	}
 }
 
-static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
+static cpumask_var_t *alloc_node_to_present_cpumask(void)
+{
+	cpumask_var_t *masks;
+	int node;
+
+	masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
+	if (!masks)
+		return NULL;
+
+	for (node = 0; node < nr_node_ids; node++) {
+		if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
+			goto out_unwind;
+	}
+
+	return masks;
+
+out_unwind:
+	while (--node >= 0)
+		free_cpumask_var(masks[node]);
+	kfree(masks);
+	return NULL;
+}
+
+static void free_node_to_present_cpumask(cpumask_var_t *masks)
+{
+	int node;
+
+	for (node = 0; node < nr_node_ids; node++)
+		free_cpumask_var(masks[node]);
+	kfree(masks);
+}
+
+static void build_node_to_present_cpumask(cpumask_var_t *masks)
+{
+	int cpu;
+
+	for_each_present_cpu(cpu)
+		cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
+}
+
+static int get_nodes_in_cpumask(cpumask_var_t *node_to_present_cpumask,
+				const struct cpumask *mask, nodemask_t *nodemsk)
 {
 	int n, nodes = 0;
 
 	/* Calculate the number of nodes in the supplied affinity mask */
-	for_each_online_node(n) {
-		if (cpumask_intersects(mask, cpumask_of_node(n))) {
+	for_each_node(n) {
+		if (cpumask_intersects(mask, node_to_present_cpumask[n])) {
 			node_set(n, *nodemsk);
 			nodes++;
 		}
@@ -64,7 +108,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 	int last_affv = affv + affd->pre_vectors;
 	nodemask_t nodemsk = NODE_MASK_NONE;
 	struct cpumask *masks;
-	cpumask_var_t nmsk;
+	cpumask_var_t nmsk, *node_to_present_cpumask;
 
 	if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
 		return NULL;
@@ -73,13 +117,19 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 	if (!masks)
 		goto out;
 
+	node_to_present_cpumask = alloc_node_to_present_cpumask();
+	if (!node_to_present_cpumask)
+		goto out;
+
 	/* Fill out vectors at the beginning that don't need affinity */
 	for (curvec = 0; curvec < affd->pre_vectors; curvec++)
 		cpumask_copy(masks + curvec, irq_default_affinity);
 
 	/* Stabilize the cpumasks */
 	get_online_cpus();
-	nodes = get_nodes_in_cpumask(cpu_online_mask, &nodemsk);
+	build_node_to_present_cpumask(node_to_present_cpumask);
+	nodes = get_nodes_in_cpumask(node_to_present_cpumask, cpu_present_mask,
+				     &nodemsk);
 
 	/*
 	 * If the number of nodes in the mask is greater than or equal the
@@ -87,7 +137,8 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 	 */
 	if (affv <= nodes) {
 		for_each_node_mask(n, nodemsk) {
-			cpumask_copy(masks + curvec, cpumask_of_node(n));
+			cpumask_copy(masks + curvec,
+				     node_to_present_cpumask[n]);
 			if (++curvec == last_affv)
 				break;
 		}
@@ -101,7 +152,7 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
 		vecs_per_node = (affv - (curvec - affd->pre_vectors)) / nodes;
 
 		/* Get the cpus on this node which are in the mask */
-		cpumask_and(nmsk, cpu_online_mask, cpumask_of_node(n));
+		cpumask_and(nmsk, cpu_present_mask, node_to_present_cpumask[n]);
 
 		/* Calculate the number of cpus per vector */
 		ncpus = cpumask_weight(nmsk);
@@ -133,6 +184,7 @@ done:
 	/* Fill out vectors at the end that don't need affinity */
 	for (; curvec < nvecs; curvec++)
 		cpumask_copy(masks + curvec, irq_default_affinity);
+	free_node_to_present_cpumask(node_to_present_cpumask);
 out:
 	free_cpumask_var(nmsk);
 	return masks;
@@ -147,12 +199,10 @@ int irq_calc_affinity_vectors(int maxvec, const struct irq_affinity *affd)
 {
 	int resv = affd->pre_vectors + affd->post_vectors;
 	int vecs = maxvec - resv;
-	int cpus;
+	int ret;
 
-	/* Stabilize the cpumasks */
 	get_online_cpus();
-	cpus = cpumask_weight(cpu_online_mask);
+	ret = min_t(int, cpumask_weight(cpu_present_mask), vecs) + resv;
 	put_online_cpus();
-
-	return min(cpus, vecs) + resv;
+	return ret;
 }

+ 2 - 2
kernel/irq/autoprobe.c

@@ -53,7 +53,7 @@ unsigned long probe_irq_on(void)
 			if (desc->irq_data.chip->irq_set_type)
 				desc->irq_data.chip->irq_set_type(&desc->irq_data,
 							 IRQ_TYPE_PROBE);
-			irq_startup(desc, false);
+			irq_startup(desc, IRQ_NORESEND, IRQ_START_FORCE);
 		}
 		raw_spin_unlock_irq(&desc->lock);
 	}
@@ -70,7 +70,7 @@ unsigned long probe_irq_on(void)
 		raw_spin_lock_irq(&desc->lock);
 		if (!desc->action && irq_settings_can_probe(desc)) {
 			desc->istate |= IRQS_AUTODETECT | IRQS_WAITING;
-			if (irq_startup(desc, false))
+			if (irq_startup(desc, IRQ_NORESEND, IRQ_START_FORCE))
 				desc->istate |= IRQS_PENDING;
 		}
 		raw_spin_unlock_irq(&desc->lock);

+ 157 - 38
kernel/irq/chip.c

@@ -185,47 +185,162 @@ static void irq_state_set_masked(struct irq_desc *desc)
 	irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
 }
 
-int irq_startup(struct irq_desc *desc, bool resend)
+static void irq_state_clr_started(struct irq_desc *desc)
 {
-	int ret = 0;
+	irqd_clear(&desc->irq_data, IRQD_IRQ_STARTED);
+}
 
-	irq_state_clr_disabled(desc);
-	desc->depth = 0;
+static void irq_state_set_started(struct irq_desc *desc)
+{
+	irqd_set(&desc->irq_data, IRQD_IRQ_STARTED);
+}
+
+enum {
+	IRQ_STARTUP_NORMAL,
+	IRQ_STARTUP_MANAGED,
+	IRQ_STARTUP_ABORT,
+};
+
+#ifdef CONFIG_SMP
+static int
+__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
+{
+	struct irq_data *d = irq_desc_get_irq_data(desc);
+
+	if (!irqd_affinity_is_managed(d))
+		return IRQ_STARTUP_NORMAL;
+
+	irqd_clr_managed_shutdown(d);
 
-	irq_domain_activate_irq(&desc->irq_data);
-	if (desc->irq_data.chip->irq_startup) {
-		ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
+	if (cpumask_any_and(aff, cpu_online_mask) > nr_cpu_ids) {
+		/*
+		 * Catch code which fiddles with enable_irq() on a managed
+		 * and potentially shutdown IRQ. Chained interrupt
+		 * installment or irq auto probing should not happen on
+		 * managed irqs either. Emit a warning, break the affinity
+		 * and start it up as a normal interrupt.
+		 */
+		if (WARN_ON_ONCE(force))
+			return IRQ_STARTUP_NORMAL;
+		/*
+		 * The interrupt was requested, but there is no online CPU
+		 * in it's affinity mask. Put it into managed shutdown
+		 * state and let the cpu hotplug mechanism start it up once
+		 * a CPU in the mask becomes available.
+		 */
+		irqd_set_managed_shutdown(d);
+		return IRQ_STARTUP_ABORT;
+	}
+	return IRQ_STARTUP_MANAGED;
+}
+#else
+static int
+__irq_startup_managed(struct irq_desc *desc, struct cpumask *aff, bool force)
+{
+	return IRQ_STARTUP_NORMAL;
+}
+#endif
+
+static int __irq_startup(struct irq_desc *desc)
+{
+	struct irq_data *d = irq_desc_get_irq_data(desc);
+	int ret = 0;
+
+	irq_domain_activate_irq(d);
+	if (d->chip->irq_startup) {
+		ret = d->chip->irq_startup(d);
+		irq_state_clr_disabled(desc);
 		irq_state_clr_masked(desc);
 	} else {
 		irq_enable(desc);
 	}
+	irq_state_set_started(desc);
+	return ret;
+}
+
+int irq_startup(struct irq_desc *desc, bool resend, bool force)
+{
+	struct irq_data *d = irq_desc_get_irq_data(desc);
+	struct cpumask *aff = irq_data_get_affinity_mask(d);
+	int ret = 0;
+
+	desc->depth = 0;
+
+	if (irqd_is_started(d)) {
+		irq_enable(desc);
+	} else {
+		switch (__irq_startup_managed(desc, aff, force)) {
+		case IRQ_STARTUP_NORMAL:
+			ret = __irq_startup(desc);
+			irq_setup_affinity(desc);
+			break;
+		case IRQ_STARTUP_MANAGED:
+			ret = __irq_startup(desc);
+			irq_set_affinity_locked(d, aff, false);
+			break;
+		case IRQ_STARTUP_ABORT:
+			return 0;
+		}
+	}
 	if (resend)
 		check_irq_resend(desc);
+
 	return ret;
 }
 
+static void __irq_disable(struct irq_desc *desc, bool mask);
+
 void irq_shutdown(struct irq_desc *desc)
 {
-	irq_state_set_disabled(desc);
-	desc->depth = 1;
-	if (desc->irq_data.chip->irq_shutdown)
-		desc->irq_data.chip->irq_shutdown(&desc->irq_data);
-	else if (desc->irq_data.chip->irq_disable)
-		desc->irq_data.chip->irq_disable(&desc->irq_data);
-	else
-		desc->irq_data.chip->irq_mask(&desc->irq_data);
+	if (irqd_is_started(&desc->irq_data)) {
+		desc->depth = 1;
+		if (desc->irq_data.chip->irq_shutdown) {
+			desc->irq_data.chip->irq_shutdown(&desc->irq_data);
+			irq_state_set_disabled(desc);
+			irq_state_set_masked(desc);
+		} else {
+			__irq_disable(desc, true);
+		}
+		irq_state_clr_started(desc);
+	}
+	/*
+	 * This must be called even if the interrupt was never started up,
+	 * because the activation can happen before the interrupt is
+	 * available for request/startup. It has it's own state tracking so
+	 * it's safe to call it unconditionally.
+	 */
 	irq_domain_deactivate_irq(&desc->irq_data);
-	irq_state_set_masked(desc);
 }
 
 void irq_enable(struct irq_desc *desc)
 {
-	irq_state_clr_disabled(desc);
-	if (desc->irq_data.chip->irq_enable)
-		desc->irq_data.chip->irq_enable(&desc->irq_data);
-	else
-		desc->irq_data.chip->irq_unmask(&desc->irq_data);
-	irq_state_clr_masked(desc);
+	if (!irqd_irq_disabled(&desc->irq_data)) {
+		unmask_irq(desc);
+	} else {
+		irq_state_clr_disabled(desc);
+		if (desc->irq_data.chip->irq_enable) {
+			desc->irq_data.chip->irq_enable(&desc->irq_data);
+			irq_state_clr_masked(desc);
+		} else {
+			unmask_irq(desc);
+		}
+	}
+}
+
+static void __irq_disable(struct irq_desc *desc, bool mask)
+{
+	if (irqd_irq_disabled(&desc->irq_data)) {
+		if (mask)
+			mask_irq(desc);
+	} else {
+		irq_state_set_disabled(desc);
+		if (desc->irq_data.chip->irq_disable) {
+			desc->irq_data.chip->irq_disable(&desc->irq_data);
+			irq_state_set_masked(desc);
+		} else if (mask) {
+			mask_irq(desc);
+		}
+	}
 }
 
 /**
@@ -250,13 +365,7 @@ void irq_enable(struct irq_desc *desc)
  */
 void irq_disable(struct irq_desc *desc)
 {
-	irq_state_set_disabled(desc);
-	if (desc->irq_data.chip->irq_disable) {
-		desc->irq_data.chip->irq_disable(&desc->irq_data);
-		irq_state_set_masked(desc);
-	} else if (irq_settings_disable_unlazy(desc)) {
-		mask_irq(desc);
-	}
+	__irq_disable(desc, irq_settings_disable_unlazy(desc));
 }
 
 void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu)
@@ -279,18 +388,21 @@ void irq_percpu_disable(struct irq_desc *desc, unsigned int cpu)
 
 static inline void mask_ack_irq(struct irq_desc *desc)
 {
-	if (desc->irq_data.chip->irq_mask_ack)
+	if (desc->irq_data.chip->irq_mask_ack) {
 		desc->irq_data.chip->irq_mask_ack(&desc->irq_data);
-	else {
-		desc->irq_data.chip->irq_mask(&desc->irq_data);
+		irq_state_set_masked(desc);
+	} else {
+		mask_irq(desc);
 		if (desc->irq_data.chip->irq_ack)
 			desc->irq_data.chip->irq_ack(&desc->irq_data);
 	}
-	irq_state_set_masked(desc);
 }
 
 void mask_irq(struct irq_desc *desc)
 {
+	if (irqd_irq_masked(&desc->irq_data))
+		return;
+
 	if (desc->irq_data.chip->irq_mask) {
 		desc->irq_data.chip->irq_mask(&desc->irq_data);
 		irq_state_set_masked(desc);
@@ -299,6 +411,9 @@ void mask_irq(struct irq_desc *desc)
 
 void unmask_irq(struct irq_desc *desc)
 {
+	if (!irqd_irq_masked(&desc->irq_data))
+		return;
+
 	if (desc->irq_data.chip->irq_unmask) {
 		desc->irq_data.chip->irq_unmask(&desc->irq_data);
 		irq_state_clr_masked(desc);
@@ -312,10 +427,7 @@ void unmask_threaded_irq(struct irq_desc *desc)
 	if (chip->flags & IRQCHIP_EOI_THREADED)
 		chip->irq_eoi(&desc->irq_data);
 
-	if (chip->irq_unmask) {
-		chip->irq_unmask(&desc->irq_data);
-		irq_state_clr_masked(desc);
-	}
+	unmask_irq(desc);
 }
 
 /*
@@ -851,7 +963,7 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
 		irq_settings_set_norequest(desc);
 		irq_settings_set_nothread(desc);
 		desc->action = &chained_action;
-		irq_startup(desc, true);
+		irq_startup(desc, IRQ_RESEND, IRQ_START_FORCE);
 	}
 }
 
@@ -903,6 +1015,13 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
 
 	if (!desc)
 		return;
+
+	/*
+	 * Warn when a driver sets the no autoenable flag on an already
+	 * active interrupt.
+	 */
+	WARN_ON_ONCE(!desc->depth && (set & _IRQ_NOAUTOEN));
+
 	irq_settings_clr_and_set(desc, clr, set);
 
 	irqd_clear(&desc->irq_data, IRQD_NO_BALANCING | IRQD_PER_CPU |

+ 126 - 24
kernel/irq/cpuhotplug.c

@@ -14,37 +14,99 @@
 
 #include "internals.h"
 
+/* For !GENERIC_IRQ_EFFECTIVE_AFF_MASK this looks at general affinity mask */
+static inline bool irq_needs_fixup(struct irq_data *d)
+{
+	const struct cpumask *m = irq_data_get_effective_affinity_mask(d);
+
+	return cpumask_test_cpu(smp_processor_id(), m);
+}
+
 static bool migrate_one_irq(struct irq_desc *desc)
 {
 	struct irq_data *d = irq_desc_get_irq_data(desc);
-	const struct cpumask *affinity = d->common->affinity;
-	struct irq_chip *c;
-	bool ret = false;
+	struct irq_chip *chip = irq_data_get_irq_chip(d);
+	bool maskchip = !irq_can_move_pcntxt(d) && !irqd_irq_masked(d);
+	const struct cpumask *affinity;
+	bool brokeaff = false;
+	int err;
 
 	/*
-	 * If this is a per-CPU interrupt, or the affinity does not
-	 * include this CPU, then we have nothing to do.
+	 * IRQ chip might be already torn down, but the irq descriptor is
+	 * still in the radix tree. Also if the chip has no affinity setter,
+	 * nothing can be done here.
 	 */
-	if (irqd_is_per_cpu(d) ||
-	    !cpumask_test_cpu(smp_processor_id(), affinity))
+	if (!chip || !chip->irq_set_affinity) {
+		pr_debug("IRQ %u: Unable to migrate away\n", d->irq);
 		return false;
+	}
+
+	/*
+	 * No move required, if:
+	 * - Interrupt is per cpu
+	 * - Interrupt is not started
+	 * - Affinity mask does not include this CPU.
+	 *
+	 * Note: Do not check desc->action as this might be a chained
+	 * interrupt.
+	 */
+	if (irqd_is_per_cpu(d) || !irqd_is_started(d) || !irq_needs_fixup(d)) {
+		/*
+		 * If an irq move is pending, abort it if the dying CPU is
+		 * the sole target.
+		 */
+		irq_fixup_move_pending(desc, false);
+		return false;
+	}
+
+	/*
+	 * Complete an eventually pending irq move cleanup. If this
+	 * interrupt was moved in hard irq context, then the vectors need
+	 * to be cleaned up. It can't wait until this interrupt actually
+	 * happens and this CPU was involved.
+	 */
+	irq_force_complete_move(desc);
+
+	/*
+	 * If there is a setaffinity pending, then try to reuse the pending
+	 * mask, so the last change of the affinity does not get lost. If
+	 * there is no move pending or the pending mask does not contain
+	 * any online CPU, use the current affinity mask.
+	 */
+	if (irq_fixup_move_pending(desc, true))
+		affinity = irq_desc_get_pending_mask(desc);
+	else
+		affinity = irq_data_get_affinity_mask(d);
+
+	/* Mask the chip for interrupts which cannot move in process context */
+	if (maskchip && chip->irq_mask)
+		chip->irq_mask(d);
 
 	if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+		/*
+		 * If the interrupt is managed, then shut it down and leave
+		 * the affinity untouched.
+		 */
+		if (irqd_affinity_is_managed(d)) {
+			irqd_set_managed_shutdown(d);
+			irq_shutdown(desc);
+			return false;
+		}
 		affinity = cpu_online_mask;
-		ret = true;
+		brokeaff = true;
 	}
 
-	c = irq_data_get_irq_chip(d);
-	if (!c->irq_set_affinity) {
-		pr_debug("IRQ%u: unable to set affinity\n", d->irq);
-	} else {
-		int r = irq_do_set_affinity(d, affinity, false);
-		if (r)
-			pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
-					    d->irq, r);
+	err = irq_do_set_affinity(d, affinity, true);
+	if (err) {
+		pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
+				    d->irq, err);
+		brokeaff = false;
 	}
 
-	return ret;
+	if (maskchip && chip->irq_unmask)
+		chip->irq_unmask(d);
+
+	return brokeaff;
 }
 
 /**
@@ -59,11 +121,8 @@ static bool migrate_one_irq(struct irq_desc *desc)
  */
 void irq_migrate_all_off_this_cpu(void)
 {
-	unsigned int irq;
 	struct irq_desc *desc;
-	unsigned long flags;
-
-	local_irq_save(flags);
+	unsigned int irq;
 
 	for_each_active_irq(irq) {
 		bool affinity_broken;
@@ -73,10 +132,53 @@ void irq_migrate_all_off_this_cpu(void)
 		affinity_broken = migrate_one_irq(desc);
 		raw_spin_unlock(&desc->lock);
 
-		if (affinity_broken)
-			pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n",
+		if (affinity_broken) {
+			pr_warn_ratelimited("IRQ %u: no longer affine to CPU%u\n",
 					    irq, smp_processor_id());
+		}
+	}
+}
+
+static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu)
+{
+	struct irq_data *data = irq_desc_get_irq_data(desc);
+	const struct cpumask *affinity = irq_data_get_affinity_mask(data);
+
+	if (!irqd_affinity_is_managed(data) || !desc->action ||
+	    !irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity))
+		return;
+
+	if (irqd_is_managed_and_shutdown(data)) {
+		irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
+		return;
+	}
+
+	/*
+	 * If the interrupt can only be directed to a single target
+	 * CPU then it is already assigned to a CPU in the affinity
+	 * mask. No point in trying to move it around.
+	 */
+	if (!irqd_is_single_target(data))
+		irq_set_affinity_locked(data, affinity, false);
+}
+
+/**
+ * irq_affinity_online_cpu - Restore affinity for managed interrupts
+ * @cpu:	Upcoming CPU for which interrupts should be restored
+ */
+int irq_affinity_online_cpu(unsigned int cpu)
+{
+	struct irq_desc *desc;
+	unsigned int irq;
+
+	irq_lock_sparse();
+	for_each_active_irq(irq) {
+		desc = irq_to_desc(irq);
+		raw_spin_lock_irq(&desc->lock);
+		irq_restore_affinity_of_irq(desc, cpu);
+		raw_spin_unlock_irq(&desc->lock);
 	}
+	irq_unlock_sparse();
 
-	local_irq_restore(flags);
+	return 0;
 }

+ 213 - 0
kernel/irq/debugfs.c

@@ -0,0 +1,213 @@
+/*
+ * Copyright 2017 Thomas Gleixner <tglx@linutronix.de>
+ *
+ * This file is licensed under the GPL V2.
+ */
+#include <linux/irqdomain.h>
+#include <linux/irq.h>
+
+#include "internals.h"
+
+static struct dentry *irq_dir;
+
+struct irq_bit_descr {
+	unsigned int	mask;
+	char		*name;
+};
+#define BIT_MASK_DESCR(m)	{ .mask = m, .name = #m }
+
+static void irq_debug_show_bits(struct seq_file *m, int ind, unsigned int state,
+				const struct irq_bit_descr *sd, int size)
+{
+	int i;
+
+	for (i = 0; i < size; i++, sd++) {
+		if (state & sd->mask)
+			seq_printf(m, "%*s%s\n", ind + 12, "", sd->name);
+	}
+}
+
+#ifdef CONFIG_SMP
+static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc)
+{
+	struct irq_data *data = irq_desc_get_irq_data(desc);
+	struct cpumask *msk;
+
+	msk = irq_data_get_affinity_mask(data);
+	seq_printf(m, "affinity: %*pbl\n", cpumask_pr_args(msk));
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+	msk = irq_data_get_effective_affinity_mask(data);
+	seq_printf(m, "effectiv: %*pbl\n", cpumask_pr_args(msk));
+#endif
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+	msk = desc->pending_mask;
+	seq_printf(m, "pending:  %*pbl\n", cpumask_pr_args(msk));
+#endif
+}
+#else
+static void irq_debug_show_masks(struct seq_file *m, struct irq_desc *desc) { }
+#endif
+
+static const struct irq_bit_descr irqchip_flags[] = {
+	BIT_MASK_DESCR(IRQCHIP_SET_TYPE_MASKED),
+	BIT_MASK_DESCR(IRQCHIP_EOI_IF_HANDLED),
+	BIT_MASK_DESCR(IRQCHIP_MASK_ON_SUSPEND),
+	BIT_MASK_DESCR(IRQCHIP_ONOFFLINE_ENABLED),
+	BIT_MASK_DESCR(IRQCHIP_SKIP_SET_WAKE),
+	BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE),
+	BIT_MASK_DESCR(IRQCHIP_EOI_THREADED),
+};
+
+static void
+irq_debug_show_chip(struct seq_file *m, struct irq_data *data, int ind)
+{
+	struct irq_chip *chip = data->chip;
+
+	if (!chip) {
+		seq_printf(m, "chip: None\n");
+		return;
+	}
+	seq_printf(m, "%*schip:    %s\n", ind, "", chip->name);
+	seq_printf(m, "%*sflags:   0x%lx\n", ind + 1, "", chip->flags);
+	irq_debug_show_bits(m, ind, chip->flags, irqchip_flags,
+			    ARRAY_SIZE(irqchip_flags));
+}
+
+static void
+irq_debug_show_data(struct seq_file *m, struct irq_data *data, int ind)
+{
+	seq_printf(m, "%*sdomain:  %s\n", ind, "",
+		   data->domain ? data->domain->name : "");
+	seq_printf(m, "%*shwirq:   0x%lx\n", ind + 1, "", data->hwirq);
+	irq_debug_show_chip(m, data, ind + 1);
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	if (!data->parent_data)
+		return;
+	seq_printf(m, "%*sparent:\n", ind + 1, "");
+	irq_debug_show_data(m, data->parent_data, ind + 4);
+#endif
+}
+
+static const struct irq_bit_descr irqdata_states[] = {
+	BIT_MASK_DESCR(IRQ_TYPE_EDGE_RISING),
+	BIT_MASK_DESCR(IRQ_TYPE_EDGE_FALLING),
+	BIT_MASK_DESCR(IRQ_TYPE_LEVEL_HIGH),
+	BIT_MASK_DESCR(IRQ_TYPE_LEVEL_LOW),
+	BIT_MASK_DESCR(IRQD_LEVEL),
+
+	BIT_MASK_DESCR(IRQD_ACTIVATED),
+	BIT_MASK_DESCR(IRQD_IRQ_STARTED),
+	BIT_MASK_DESCR(IRQD_IRQ_DISABLED),
+	BIT_MASK_DESCR(IRQD_IRQ_MASKED),
+	BIT_MASK_DESCR(IRQD_IRQ_INPROGRESS),
+
+	BIT_MASK_DESCR(IRQD_PER_CPU),
+	BIT_MASK_DESCR(IRQD_NO_BALANCING),
+
+	BIT_MASK_DESCR(IRQD_SINGLE_TARGET),
+	BIT_MASK_DESCR(IRQD_MOVE_PCNTXT),
+	BIT_MASK_DESCR(IRQD_AFFINITY_SET),
+	BIT_MASK_DESCR(IRQD_SETAFFINITY_PENDING),
+	BIT_MASK_DESCR(IRQD_AFFINITY_MANAGED),
+	BIT_MASK_DESCR(IRQD_MANAGED_SHUTDOWN),
+
+	BIT_MASK_DESCR(IRQD_FORWARDED_TO_VCPU),
+
+	BIT_MASK_DESCR(IRQD_WAKEUP_STATE),
+	BIT_MASK_DESCR(IRQD_WAKEUP_ARMED),
+};
+
+static const struct irq_bit_descr irqdesc_states[] = {
+	BIT_MASK_DESCR(_IRQ_NOPROBE),
+	BIT_MASK_DESCR(_IRQ_NOREQUEST),
+	BIT_MASK_DESCR(_IRQ_NOTHREAD),
+	BIT_MASK_DESCR(_IRQ_NOAUTOEN),
+	BIT_MASK_DESCR(_IRQ_NESTED_THREAD),
+	BIT_MASK_DESCR(_IRQ_PER_CPU_DEVID),
+	BIT_MASK_DESCR(_IRQ_IS_POLLED),
+	BIT_MASK_DESCR(_IRQ_DISABLE_UNLAZY),
+};
+
+static const struct irq_bit_descr irqdesc_istates[] = {
+	BIT_MASK_DESCR(IRQS_AUTODETECT),
+	BIT_MASK_DESCR(IRQS_SPURIOUS_DISABLED),
+	BIT_MASK_DESCR(IRQS_POLL_INPROGRESS),
+	BIT_MASK_DESCR(IRQS_ONESHOT),
+	BIT_MASK_DESCR(IRQS_REPLAY),
+	BIT_MASK_DESCR(IRQS_WAITING),
+	BIT_MASK_DESCR(IRQS_PENDING),
+	BIT_MASK_DESCR(IRQS_SUSPENDED),
+};
+
+
+static int irq_debug_show(struct seq_file *m, void *p)
+{
+	struct irq_desc *desc = m->private;
+	struct irq_data *data;
+
+	raw_spin_lock_irq(&desc->lock);
+	data = irq_desc_get_irq_data(desc);
+	seq_printf(m, "handler:  %pf\n", desc->handle_irq);
+	seq_printf(m, "status:   0x%08x\n", desc->status_use_accessors);
+	irq_debug_show_bits(m, 0, desc->status_use_accessors, irqdesc_states,
+			    ARRAY_SIZE(irqdesc_states));
+	seq_printf(m, "istate:   0x%08x\n", desc->istate);
+	irq_debug_show_bits(m, 0, desc->istate, irqdesc_istates,
+			    ARRAY_SIZE(irqdesc_istates));
+	seq_printf(m, "ddepth:   %u\n", desc->depth);
+	seq_printf(m, "wdepth:   %u\n", desc->wake_depth);
+	seq_printf(m, "dstate:   0x%08x\n", irqd_get(data));
+	irq_debug_show_bits(m, 0, irqd_get(data), irqdata_states,
+			    ARRAY_SIZE(irqdata_states));
+	seq_printf(m, "node:     %d\n", irq_data_get_node(data));
+	irq_debug_show_masks(m, desc);
+	irq_debug_show_data(m, data, 0);
+	raw_spin_unlock_irq(&desc->lock);
+	return 0;
+}
+
+static int irq_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, irq_debug_show, inode->i_private);
+}
+
+static const struct file_operations dfs_irq_ops = {
+	.open		= irq_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc)
+{
+	char name [10];
+
+	if (!irq_dir || !desc || desc->debugfs_file)
+		return;
+
+	sprintf(name, "%d", irq);
+	desc->debugfs_file = debugfs_create_file(name, 0444, irq_dir, desc,
+						 &dfs_irq_ops);
+}
+
+static int __init irq_debugfs_init(void)
+{
+	struct dentry *root_dir;
+	int irq;
+
+	root_dir = debugfs_create_dir("irq", NULL);
+	if (!root_dir)
+		return -ENOMEM;
+
+	irq_domain_debugfs_init(root_dir);
+
+	irq_dir = debugfs_create_dir("irqs", root_dir);
+
+	irq_lock_sparse();
+	for_each_active_irq(irq)
+		irq_add_debugfs_entry(irq, irq_to_desc(irq));
+	irq_unlock_sparse();
+
+	return 0;
+}
+__initcall(irq_debugfs_init);

+ 86 - 0
kernel/irq/devres.c

@@ -4,6 +4,8 @@
 #include <linux/gfp.h>
 #include <linux/irq.h>
 
+#include "internals.h"
+
 /*
  * Device resource management aware IRQ request/free implementation.
  */
@@ -198,3 +200,87 @@ int __devm_irq_alloc_descs(struct device *dev, int irq, unsigned int from,
 	return base;
 }
 EXPORT_SYMBOL_GPL(__devm_irq_alloc_descs);
+
+#ifdef CONFIG_GENERIC_IRQ_CHIP
+/**
+ * devm_irq_alloc_generic_chip - Allocate and initialize a generic chip
+ *                               for a managed device
+ * @dev:	Device to allocate the generic chip for
+ * @name:	Name of the irq chip
+ * @num_ct:	Number of irq_chip_type instances associated with this
+ * @irq_base:	Interrupt base nr for this chip
+ * @reg_base:	Register base address (virtual)
+ * @handler:	Default flow handler associated with this chip
+ *
+ * Returns an initialized irq_chip_generic structure. The chip defaults
+ * to the primary (index 0) irq_chip_type and @handler
+ */
+struct irq_chip_generic *
+devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct,
+			    unsigned int irq_base, void __iomem *reg_base,
+			    irq_flow_handler_t handler)
+{
+	struct irq_chip_generic *gc;
+	unsigned long sz = sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
+
+	gc = devm_kzalloc(dev, sz, GFP_KERNEL);
+	if (gc)
+		irq_init_generic_chip(gc, name, num_ct,
+				      irq_base, reg_base, handler);
+
+	return gc;
+}
+EXPORT_SYMBOL_GPL(devm_irq_alloc_generic_chip);
+
+struct irq_generic_chip_devres {
+	struct irq_chip_generic *gc;
+	u32 msk;
+	unsigned int clr;
+	unsigned int set;
+};
+
+static void devm_irq_remove_generic_chip(struct device *dev, void *res)
+{
+	struct irq_generic_chip_devres *this = res;
+
+	irq_remove_generic_chip(this->gc, this->msk, this->clr, this->set);
+}
+
+/**
+ * devm_irq_setup_generic_chip - Setup a range of interrupts with a generic
+ *                               chip for a managed device
+ *
+ * @dev:	Device to setup the generic chip for
+ * @gc:		Generic irq chip holding all data
+ * @msk:	Bitmask holding the irqs to initialize relative to gc->irq_base
+ * @flags:	Flags for initialization
+ * @clr:	IRQ_* bits to clear
+ * @set:	IRQ_* bits to set
+ *
+ * Set up max. 32 interrupts starting from gc->irq_base. Note, this
+ * initializes all interrupts to the primary irq_chip_type and its
+ * associated handler.
+ */
+int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc,
+				u32 msk, enum irq_gc_flags flags,
+				unsigned int clr, unsigned int set)
+{
+	struct irq_generic_chip_devres *dr;
+
+	dr = devres_alloc(devm_irq_remove_generic_chip,
+			  sizeof(*dr), GFP_KERNEL);
+	if (!dr)
+		return -ENOMEM;
+
+	irq_setup_generic_chip(gc, msk, flags, clr, set);
+
+	dr->gc = gc;
+	dr->msk = msk;
+	dr->clr = clr;
+	dr->set = set;
+	devres_add(dev, dr);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devm_irq_setup_generic_chip);
+#endif /* CONFIG_GENERIC_IRQ_CHIP */

+ 3 - 4
kernel/irq/generic-chip.c

@@ -201,10 +201,9 @@ static void irq_writel_be(u32 val, void __iomem *addr)
 	iowrite32be(val, addr);
 }
 
-static void
-irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
-		      int num_ct, unsigned int irq_base,
-		      void __iomem *reg_base, irq_flow_handler_t handler)
+void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
+			   int num_ct, unsigned int irq_base,
+			   void __iomem *reg_base, irq_flow_handler_t handler)
 {
 	raw_spin_lock_init(&gc->lock);
 	gc->num_ct = num_ct;

+ 2 - 0
kernel/irq/handle.c

@@ -138,6 +138,8 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags
 	unsigned int irq = desc->irq_data.irq;
 	struct irqaction *action;
 
+	record_irq_time(desc);
+
 	for_each_action_of_desc(desc, action) {
 		irqreturn_t res;
 

+ 223 - 2
kernel/irq/internals.h

@@ -8,6 +8,7 @@
 #include <linux/irqdesc.h>
 #include <linux/kernel_stat.h>
 #include <linux/pm_runtime.h>
+#include <linux/sched/clock.h>
 
 #ifdef CONFIG_SPARSE_IRQ
 # define IRQ_BITMAP_BITS	(NR_IRQS + 8196)
@@ -57,6 +58,7 @@ enum {
 	IRQS_WAITING		= 0x00000080,
 	IRQS_PENDING		= 0x00000200,
 	IRQS_SUSPENDED		= 0x00000800,
+	IRQS_TIMINGS		= 0x00001000,
 };
 
 #include "debug.h"
@@ -66,7 +68,14 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned long flags);
 extern void __disable_irq(struct irq_desc *desc);
 extern void __enable_irq(struct irq_desc *desc);
 
-extern int irq_startup(struct irq_desc *desc, bool resend);
+#define IRQ_RESEND	true
+#define IRQ_NORESEND	false
+
+#define IRQ_START_FORCE	true
+#define IRQ_START_COND	false
+
+extern int irq_startup(struct irq_desc *desc, bool resend, bool force);
+
 extern void irq_shutdown(struct irq_desc *desc);
 extern void irq_enable(struct irq_desc *desc);
 extern void irq_disable(struct irq_desc *desc);
@@ -109,13 +118,19 @@ static inline void unregister_handler_proc(unsigned int irq,
 
 extern bool irq_can_set_affinity_usr(unsigned int irq);
 
-extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask);
+extern int irq_select_affinity_usr(unsigned int irq);
 
 extern void irq_set_thread_affinity(struct irq_desc *desc);
 
 extern int irq_do_set_affinity(struct irq_data *data,
 			       const struct cpumask *dest, bool force);
 
+#ifdef CONFIG_SMP
+extern int irq_setup_affinity(struct irq_desc *desc);
+#else
+static inline int irq_setup_affinity(struct irq_desc *desc) { return 0; }
+#endif
+
 /* Inline functions for support of irq chips on slow busses */
 static inline void chip_bus_lock(struct irq_desc *desc)
 {
@@ -169,6 +184,11 @@ irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags)
 
 #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
 
+static inline unsigned int irqd_get(struct irq_data *d)
+{
+	return __irqd_to_state(d);
+}
+
 /*
  * Manipulation functions for irq_data.state
  */
@@ -182,6 +202,16 @@ static inline void irqd_clr_move_pending(struct irq_data *d)
 	__irqd_to_state(d) &= ~IRQD_SETAFFINITY_PENDING;
 }
 
+static inline void irqd_set_managed_shutdown(struct irq_data *d)
+{
+	__irqd_to_state(d) |= IRQD_MANAGED_SHUTDOWN;
+}
+
+static inline void irqd_clr_managed_shutdown(struct irq_data *d)
+{
+	__irqd_to_state(d) &= ~IRQD_MANAGED_SHUTDOWN;
+}
+
 static inline void irqd_clear(struct irq_data *d, unsigned int mask)
 {
 	__irqd_to_state(d) &= ~mask;
@@ -226,3 +256,194 @@ irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { }
 static inline void
 irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { }
 #endif
+
+#ifdef CONFIG_IRQ_TIMINGS
+
+#define IRQ_TIMINGS_SHIFT	5
+#define IRQ_TIMINGS_SIZE	(1 << IRQ_TIMINGS_SHIFT)
+#define IRQ_TIMINGS_MASK	(IRQ_TIMINGS_SIZE - 1)
+
+/**
+ * struct irq_timings - irq timings storing structure
+ * @values: a circular buffer of u64 encoded <timestamp,irq> values
+ * @count: the number of elements in the array
+ */
+struct irq_timings {
+	u64	values[IRQ_TIMINGS_SIZE];
+	int	count;
+};
+
+DECLARE_PER_CPU(struct irq_timings, irq_timings);
+
+extern void irq_timings_free(int irq);
+extern int irq_timings_alloc(int irq);
+
+static inline void irq_remove_timings(struct irq_desc *desc)
+{
+	desc->istate &= ~IRQS_TIMINGS;
+
+	irq_timings_free(irq_desc_get_irq(desc));
+}
+
+static inline void irq_setup_timings(struct irq_desc *desc, struct irqaction *act)
+{
+	int irq = irq_desc_get_irq(desc);
+	int ret;
+
+	/*
+	 * We don't need the measurement because the idle code already
+	 * knows the next expiry event.
+	 */
+	if (act->flags & __IRQF_TIMER)
+		return;
+
+	/*
+	 * In case the timing allocation fails, we just want to warn,
+	 * not fail, so letting the system boot anyway.
+	 */
+	ret = irq_timings_alloc(irq);
+	if (ret) {
+		pr_warn("Failed to allocate irq timing stats for irq%d (%d)",
+			irq, ret);
+		return;
+	}
+
+	desc->istate |= IRQS_TIMINGS;
+}
+
+extern void irq_timings_enable(void);
+extern void irq_timings_disable(void);
+
+DECLARE_STATIC_KEY_FALSE(irq_timing_enabled);
+
+/*
+ * The interrupt number and the timestamp are encoded into a single
+ * u64 variable to optimize the size.
+ * 48 bit time stamp and 16 bit IRQ number is way sufficient.
+ *  Who cares an IRQ after 78 hours of idle time?
+ */
+static inline u64 irq_timing_encode(u64 timestamp, int irq)
+{
+	return (timestamp << 16) | irq;
+}
+
+static inline int irq_timing_decode(u64 value, u64 *timestamp)
+{
+	*timestamp = value >> 16;
+	return value & U16_MAX;
+}
+
+/*
+ * The function record_irq_time is only called in one place in the
+ * interrupts handler. We want this function always inline so the code
+ * inside is embedded in the function and the static key branching
+ * code can act at the higher level. Without the explicit
+ * __always_inline we can end up with a function call and a small
+ * overhead in the hotpath for nothing.
+ */
+static __always_inline void record_irq_time(struct irq_desc *desc)
+{
+	if (!static_branch_likely(&irq_timing_enabled))
+		return;
+
+	if (desc->istate & IRQS_TIMINGS) {
+		struct irq_timings *timings = this_cpu_ptr(&irq_timings);
+
+		timings->values[timings->count & IRQ_TIMINGS_MASK] =
+			irq_timing_encode(local_clock(),
+					  irq_desc_get_irq(desc));
+
+		timings->count++;
+	}
+}
+#else
+static inline void irq_remove_timings(struct irq_desc *desc) {}
+static inline void irq_setup_timings(struct irq_desc *desc,
+				     struct irqaction *act) {};
+static inline void record_irq_time(struct irq_desc *desc) {}
+#endif /* CONFIG_IRQ_TIMINGS */
+
+
+#ifdef CONFIG_GENERIC_IRQ_CHIP
+void irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
+			   int num_ct, unsigned int irq_base,
+			   void __iomem *reg_base, irq_flow_handler_t handler);
+#else
+static inline void
+irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
+		      int num_ct, unsigned int irq_base,
+		      void __iomem *reg_base, irq_flow_handler_t handler) { }
+#endif /* CONFIG_GENERIC_IRQ_CHIP */
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+static inline bool irq_can_move_pcntxt(struct irq_data *data)
+{
+	return irqd_can_move_in_process_context(data);
+}
+static inline bool irq_move_pending(struct irq_data *data)
+{
+	return irqd_is_setaffinity_pending(data);
+}
+static inline void
+irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
+{
+	cpumask_copy(desc->pending_mask, mask);
+}
+static inline void
+irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
+{
+	cpumask_copy(mask, desc->pending_mask);
+}
+static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
+{
+	return desc->pending_mask;
+}
+bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear);
+#else /* CONFIG_GENERIC_PENDING_IRQ */
+static inline bool irq_can_move_pcntxt(struct irq_data *data)
+{
+	return true;
+}
+static inline bool irq_move_pending(struct irq_data *data)
+{
+	return false;
+}
+static inline void
+irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
+{
+}
+static inline void
+irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
+{
+}
+static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc)
+{
+	return NULL;
+}
+static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear)
+{
+	return false;
+}
+#endif /* !CONFIG_GENERIC_PENDING_IRQ */
+
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+#include <linux/debugfs.h>
+
+void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *desc);
+static inline void irq_remove_debugfs_entry(struct irq_desc *desc)
+{
+	debugfs_remove(desc->debugfs_file);
+}
+# ifdef CONFIG_IRQ_DOMAIN
+void irq_domain_debugfs_init(struct dentry *root);
+# else
+static inline void irq_domain_debugfs_init(struct dentry *root);
+# endif
+#else /* CONFIG_GENERIC_IRQ_DEBUGFS */
+static inline void irq_add_debugfs_entry(unsigned int irq, struct irq_desc *d)
+{
+}
+static inline void irq_remove_debugfs_entry(struct irq_desc *d)
+{
+}
+#endif /* CONFIG_GENERIC_IRQ_DEBUGFS */

+ 26 - 10
kernel/irq/irqdesc.c

@@ -54,14 +54,25 @@ static void __init init_irq_default_affinity(void)
 #endif
 
 #ifdef CONFIG_SMP
-static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
+static int alloc_masks(struct irq_desc *desc, int node)
 {
 	if (!zalloc_cpumask_var_node(&desc->irq_common_data.affinity,
-				     gfp, node))
+				     GFP_KERNEL, node))
 		return -ENOMEM;
 
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+	if (!zalloc_cpumask_var_node(&desc->irq_common_data.effective_affinity,
+				     GFP_KERNEL, node)) {
+		free_cpumask_var(desc->irq_common_data.affinity);
+		return -ENOMEM;
+	}
+#endif
+
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-	if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
+	if (!zalloc_cpumask_var_node(&desc->pending_mask, GFP_KERNEL, node)) {
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+		free_cpumask_var(desc->irq_common_data.effective_affinity);
+#endif
 		free_cpumask_var(desc->irq_common_data.affinity);
 		return -ENOMEM;
 	}
@@ -86,7 +97,7 @@ static void desc_smp_init(struct irq_desc *desc, int node,
 
 #else
 static inline int
-alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) { return 0; }
+alloc_masks(struct irq_desc *desc, int node) { return 0; }
 static inline void
 desc_smp_init(struct irq_desc *desc, int node, const struct cpumask *affinity) { }
 #endif
@@ -105,6 +116,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
 	desc->irq_data.chip_data = NULL;
 	irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
 	irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
+	irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
 	desc->handle_irq = handle_bad_irq;
 	desc->depth = 1;
 	desc->irq_count = 0;
@@ -324,6 +336,9 @@ static void free_masks(struct irq_desc *desc)
 	free_cpumask_var(desc->pending_mask);
 #endif
 	free_cpumask_var(desc->irq_common_data.affinity);
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+	free_cpumask_var(desc->irq_common_data.effective_affinity);
+#endif
 }
 #else
 static inline void free_masks(struct irq_desc *desc) { }
@@ -344,9 +359,8 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
 				   struct module *owner)
 {
 	struct irq_desc *desc;
-	gfp_t gfp = GFP_KERNEL;
 
-	desc = kzalloc_node(sizeof(*desc), gfp, node);
+	desc = kzalloc_node(sizeof(*desc), GFP_KERNEL, node);
 	if (!desc)
 		return NULL;
 	/* allocate based on nr_cpu_ids */
@@ -354,7 +368,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
 	if (!desc->kstat_irqs)
 		goto err_desc;
 
-	if (alloc_masks(desc, gfp, node))
+	if (alloc_masks(desc, node))
 		goto err_kstat;
 
 	raw_spin_lock_init(&desc->lock);
@@ -394,6 +408,7 @@ static void free_desc(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
+	irq_remove_debugfs_entry(desc);
 	unregister_irq_proc(irq, desc);
 
 	/*
@@ -480,7 +495,8 @@ int __init early_irq_init(void)
 
 	/* Let arch update nr_irqs and return the nr of preallocated irqs */
 	initcnt = arch_probe_nr_irqs();
-	printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d %d\n", NR_IRQS, nr_irqs, initcnt);
+	printk(KERN_INFO "NR_IRQS: %d, nr_irqs: %d, preallocated irqs: %d\n",
+	       NR_IRQS, nr_irqs, initcnt);
 
 	if (WARN_ON(nr_irqs > IRQ_BITMAP_BITS))
 		nr_irqs = IRQ_BITMAP_BITS;
@@ -516,14 +532,14 @@ int __init early_irq_init(void)
 
 	init_irq_default_affinity();
 
-	printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS);
+	printk(KERN_INFO "NR_IRQS: %d\n", NR_IRQS);
 
 	desc = irq_desc;
 	count = ARRAY_SIZE(irq_desc);
 
 	for (i = 0; i < count; i++) {
 		desc[i].kstat_irqs = alloc_percpu(unsigned int);
-		alloc_masks(&desc[i], GFP_KERNEL, node);
+		alloc_masks(&desc[i], node);
 		raw_spin_lock_init(&desc[i].lock);
 		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
 		desc_set_defaults(i, &desc[i], node, NULL, NULL);

+ 279 - 80
kernel/irq/irqdomain.c

@@ -26,39 +26,69 @@ static struct irq_domain *irq_default_domain;
 static void irq_domain_check_hierarchy(struct irq_domain *domain);
 
 struct irqchip_fwid {
-	struct fwnode_handle fwnode;
-	char *name;
+	struct fwnode_handle	fwnode;
+	unsigned int		type;
+	char			*name;
 	void *data;
 };
 
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+static void debugfs_add_domain_dir(struct irq_domain *d);
+static void debugfs_remove_domain_dir(struct irq_domain *d);
+#else
+static inline void debugfs_add_domain_dir(struct irq_domain *d) { }
+static inline void debugfs_remove_domain_dir(struct irq_domain *d) { }
+#endif
+
 /**
  * irq_domain_alloc_fwnode - Allocate a fwnode_handle suitable for
  *                           identifying an irq domain
- * @data: optional user-provided data
+ * @type:	Type of irqchip_fwnode. See linux/irqdomain.h
+ * @name:	Optional user provided domain name
+ * @id:		Optional user provided id if name != NULL
+ * @data:	Optional user-provided data
  *
- * Allocate a struct device_node, and return a poiner to the embedded
+ * Allocate a struct irqchip_fwid, and return a poiner to the embedded
  * fwnode_handle (or NULL on failure).
+ *
+ * Note: The types IRQCHIP_FWNODE_NAMED and IRQCHIP_FWNODE_NAMED_ID are
+ * solely to transport name information to irqdomain creation code. The
+ * node is not stored. For other types the pointer is kept in the irq
+ * domain struct.
  */
-struct fwnode_handle *irq_domain_alloc_fwnode(void *data)
+struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id,
+						const char *name, void *data)
 {
 	struct irqchip_fwid *fwid;
-	char *name;
+	char *n;
 
 	fwid = kzalloc(sizeof(*fwid), GFP_KERNEL);
-	name = kasprintf(GFP_KERNEL, "irqchip@%p", data);
 
-	if (!fwid || !name) {
+	switch (type) {
+	case IRQCHIP_FWNODE_NAMED:
+		n = kasprintf(GFP_KERNEL, "%s", name);
+		break;
+	case IRQCHIP_FWNODE_NAMED_ID:
+		n = kasprintf(GFP_KERNEL, "%s-%d", name, id);
+		break;
+	default:
+		n = kasprintf(GFP_KERNEL, "irqchip@%p", data);
+		break;
+	}
+
+	if (!fwid || !n) {
 		kfree(fwid);
-		kfree(name);
+		kfree(n);
 		return NULL;
 	}
 
-	fwid->name = name;
+	fwid->type = type;
+	fwid->name = n;
 	fwid->data = data;
 	fwid->fwnode.type = FWNODE_IRQCHIP;
 	return &fwid->fwnode;
 }
-EXPORT_SYMBOL_GPL(irq_domain_alloc_fwnode);
+EXPORT_SYMBOL_GPL(__irq_domain_alloc_fwnode);
 
 /**
  * irq_domain_free_fwnode - Free a non-OF-backed fwnode_handle
@@ -97,26 +127,82 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
 				    void *host_data)
 {
 	struct device_node *of_node = to_of_node(fwnode);
+	struct irqchip_fwid *fwid;
 	struct irq_domain *domain;
 
+	static atomic_t unknown_domains;
+
 	domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
 			      GFP_KERNEL, of_node_to_nid(of_node));
 	if (WARN_ON(!domain))
 		return NULL;
 
+	if (fwnode && is_fwnode_irqchip(fwnode)) {
+		fwid = container_of(fwnode, struct irqchip_fwid, fwnode);
+
+		switch (fwid->type) {
+		case IRQCHIP_FWNODE_NAMED:
+		case IRQCHIP_FWNODE_NAMED_ID:
+			domain->name = kstrdup(fwid->name, GFP_KERNEL);
+			if (!domain->name) {
+				kfree(domain);
+				return NULL;
+			}
+			domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+			break;
+		default:
+			domain->fwnode = fwnode;
+			domain->name = fwid->name;
+			break;
+		}
+	} else if (of_node) {
+		char *name;
+
+		/*
+		 * DT paths contain '/', which debugfs is legitimately
+		 * unhappy about. Replace them with ':', which does
+		 * the trick and is not as offensive as '\'...
+		 */
+		name = kstrdup(of_node_full_name(of_node), GFP_KERNEL);
+		if (!name) {
+			kfree(domain);
+			return NULL;
+		}
+
+		strreplace(name, '/', ':');
+
+		domain->name = name;
+		domain->fwnode = fwnode;
+		domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+	}
+
+	if (!domain->name) {
+		if (fwnode) {
+			pr_err("Invalid fwnode type (%d) for irqdomain\n",
+			       fwnode->type);
+		}
+		domain->name = kasprintf(GFP_KERNEL, "unknown-%d",
+					 atomic_inc_return(&unknown_domains));
+		if (!domain->name) {
+			kfree(domain);
+			return NULL;
+		}
+		domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+	}
+
 	of_node_get(of_node);
 
 	/* Fill structure */
 	INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
 	domain->ops = ops;
 	domain->host_data = host_data;
-	domain->fwnode = fwnode;
 	domain->hwirq_max = hwirq_max;
 	domain->revmap_size = size;
 	domain->revmap_direct_max_irq = direct_max;
 	irq_domain_check_hierarchy(domain);
 
 	mutex_lock(&irq_domain_mutex);
+	debugfs_add_domain_dir(domain);
 	list_add(&domain->link, &irq_domain_list);
 	mutex_unlock(&irq_domain_mutex);
 
@@ -136,6 +222,7 @@ EXPORT_SYMBOL_GPL(__irq_domain_add);
 void irq_domain_remove(struct irq_domain *domain)
 {
 	mutex_lock(&irq_domain_mutex);
+	debugfs_remove_domain_dir(domain);
 
 	WARN_ON(!radix_tree_empty(&domain->revmap_tree));
 
@@ -152,10 +239,43 @@ void irq_domain_remove(struct irq_domain *domain)
 	pr_debug("Removed domain %s\n", domain->name);
 
 	of_node_put(irq_domain_get_of_node(domain));
+	if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED)
+		kfree(domain->name);
 	kfree(domain);
 }
 EXPORT_SYMBOL_GPL(irq_domain_remove);
 
+void irq_domain_update_bus_token(struct irq_domain *domain,
+				 enum irq_domain_bus_token bus_token)
+{
+	char *name;
+
+	if (domain->bus_token == bus_token)
+		return;
+
+	mutex_lock(&irq_domain_mutex);
+
+	domain->bus_token = bus_token;
+
+	name = kasprintf(GFP_KERNEL, "%s-%d", domain->name, bus_token);
+	if (!name) {
+		mutex_unlock(&irq_domain_mutex);
+		return;
+	}
+
+	debugfs_remove_domain_dir(domain);
+
+	if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED)
+		kfree(domain->name);
+	else
+		domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED;
+
+	domain->name = name;
+	debugfs_add_domain_dir(domain);
+
+	mutex_unlock(&irq_domain_mutex);
+}
+
 /**
  * irq_domain_add_simple() - Register an irq_domain and optionally map a range of irqs
  * @of_node: pointer to interrupt controller's device tree node.
@@ -344,6 +464,7 @@ void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
 
 	irq_data->domain = NULL;
 	irq_data->hwirq = 0;
+	domain->mapcount--;
 
 	/* Clear reverse map for this hwirq */
 	if (hwirq < domain->revmap_size) {
@@ -395,6 +516,7 @@ int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
 			domain->name = irq_data->chip->name;
 	}
 
+	domain->mapcount++;
 	if (hwirq < domain->revmap_size) {
 		domain->linear_revmap[hwirq] = virq;
 	} else {
@@ -746,13 +868,54 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
 EXPORT_SYMBOL_GPL(irq_find_mapping);
 
 #ifdef CONFIG_IRQ_DOMAIN_DEBUG
+static void virq_debug_show_one(struct seq_file *m, struct irq_desc *desc)
+{
+	struct irq_domain *domain;
+	struct irq_data *data;
+
+	domain = desc->irq_data.domain;
+	data = &desc->irq_data;
+
+	while (domain) {
+		unsigned int irq = data->irq;
+		unsigned long hwirq = data->hwirq;
+		struct irq_chip *chip;
+		bool direct;
+
+		if (data == &desc->irq_data)
+			seq_printf(m, "%5d  ", irq);
+		else
+			seq_printf(m, "%5d+ ", irq);
+		seq_printf(m, "0x%05lx  ", hwirq);
+
+		chip = irq_data_get_irq_chip(data);
+		seq_printf(m, "%-15s  ", (chip && chip->name) ? chip->name : "none");
+
+		seq_printf(m, data ? "0x%p  " : "  %p  ",
+			   irq_data_get_irq_chip_data(data));
+
+		seq_printf(m, "   %c    ", (desc->action && desc->action->handler) ? '*' : ' ');
+		direct = (irq == hwirq) && (irq < domain->revmap_direct_max_irq);
+		seq_printf(m, "%6s%-8s  ",
+			   (hwirq < domain->revmap_size) ? "LINEAR" : "RADIX",
+			   direct ? "(DIRECT)" : "");
+		seq_printf(m, "%s\n", domain->name);
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+		domain = domain->parent;
+		data = data->parent_data;
+#else
+		domain = NULL;
+#endif
+	}
+}
+
 static int virq_debug_show(struct seq_file *m, void *private)
 {
 	unsigned long flags;
 	struct irq_desc *desc;
 	struct irq_domain *domain;
 	struct radix_tree_iter iter;
-	void *data, **slot;
+	void **slot;
 	int i;
 
 	seq_printf(m, " %-16s  %-6s  %-10s  %-10s  %s\n",
@@ -760,15 +923,26 @@ static int virq_debug_show(struct seq_file *m, void *private)
 	mutex_lock(&irq_domain_mutex);
 	list_for_each_entry(domain, &irq_domain_list, link) {
 		struct device_node *of_node;
+		const char *name;
+
 		int count = 0;
+
 		of_node = irq_domain_get_of_node(domain);
+		if (of_node)
+			name = of_node_full_name(of_node);
+		else if (is_fwnode_irqchip(domain->fwnode))
+			name = container_of(domain->fwnode, struct irqchip_fwid,
+					    fwnode)->name;
+		else
+			name = "";
+
 		radix_tree_for_each_slot(slot, &domain->revmap_tree, &iter, 0)
 			count++;
 		seq_printf(m, "%c%-16s  %6u  %10u  %10u  %s\n",
 			   domain == irq_default_domain ? '*' : ' ', domain->name,
 			   domain->revmap_size + count, domain->revmap_size,
 			   domain->revmap_direct_max_irq,
-			   of_node ? of_node_full_name(of_node) : "");
+			   name);
 	}
 	mutex_unlock(&irq_domain_mutex);
 
@@ -782,30 +956,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
 			continue;
 
 		raw_spin_lock_irqsave(&desc->lock, flags);
-		domain = desc->irq_data.domain;
-
-		if (domain) {
-			struct irq_chip *chip;
-			int hwirq = desc->irq_data.hwirq;
-			bool direct;
-
-			seq_printf(m, "%5d  ", i);
-			seq_printf(m, "0x%05x  ", hwirq);
-
-			chip = irq_desc_get_chip(desc);
-			seq_printf(m, "%-15s  ", (chip && chip->name) ? chip->name : "none");
-
-			data = irq_desc_get_chip_data(desc);
-			seq_printf(m, data ? "0x%p  " : "  %p  ", data);
-
-			seq_printf(m, "   %c    ", (desc->action && desc->action->handler) ? '*' : ' ');
-			direct = (i == hwirq) && (i < domain->revmap_direct_max_irq);
-			seq_printf(m, "%6s%-8s  ",
-				   (hwirq < domain->revmap_size) ? "LINEAR" : "RADIX",
-				   direct ? "(DIRECT)" : "");
-			seq_printf(m, "%s\n", desc->irq_data.domain->name);
-		}
-
+		virq_debug_show_one(m, desc);
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 
@@ -973,6 +1124,7 @@ static void irq_domain_insert_irq(int virq)
 		struct irq_domain *domain = data->domain;
 		irq_hw_number_t hwirq = data->hwirq;
 
+		domain->mapcount++;
 		if (hwirq < domain->revmap_size) {
 			domain->linear_revmap[hwirq] = virq;
 		} else {
@@ -1002,6 +1154,7 @@ static void irq_domain_remove_irq(int virq)
 		struct irq_domain *domain = data->domain;
 		irq_hw_number_t hwirq = data->hwirq;
 
+		domain->mapcount--;
 		if (hwirq < domain->revmap_size) {
 			domain->linear_revmap[hwirq] = 0;
 		} else {
@@ -1189,43 +1342,18 @@ void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq,
 	irq_domain_free_irqs_common(domain, virq, nr_irqs);
 }
 
-static bool irq_domain_is_auto_recursive(struct irq_domain *domain)
-{
-	return domain->flags & IRQ_DOMAIN_FLAG_AUTO_RECURSIVE;
-}
-
-static void irq_domain_free_irqs_recursive(struct irq_domain *domain,
+static void irq_domain_free_irqs_hierarchy(struct irq_domain *domain,
 					   unsigned int irq_base,
 					   unsigned int nr_irqs)
 {
 	domain->ops->free(domain, irq_base, nr_irqs);
-	if (irq_domain_is_auto_recursive(domain)) {
-		BUG_ON(!domain->parent);
-		irq_domain_free_irqs_recursive(domain->parent, irq_base,
-					       nr_irqs);
-	}
 }
 
-int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
+int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
 				    unsigned int irq_base,
 				    unsigned int nr_irqs, void *arg)
 {
-	int ret = 0;
-	struct irq_domain *parent = domain->parent;
-	bool recursive = irq_domain_is_auto_recursive(domain);
-
-	BUG_ON(recursive && !parent);
-	if (recursive)
-		ret = irq_domain_alloc_irqs_recursive(parent, irq_base,
-						      nr_irqs, arg);
-	if (ret < 0)
-		return ret;
-
-	ret = domain->ops->alloc(domain, irq_base, nr_irqs, arg);
-	if (ret < 0 && recursive)
-		irq_domain_free_irqs_recursive(parent, irq_base, nr_irqs);
-
-	return ret;
+	return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
 }
 
 /**
@@ -1286,7 +1414,7 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 	}
 
 	mutex_lock(&irq_domain_mutex);
-	ret = irq_domain_alloc_irqs_recursive(domain, virq, nr_irqs, arg);
+	ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
 	if (ret < 0) {
 		mutex_unlock(&irq_domain_mutex);
 		goto out_free_irq_data;
@@ -1321,7 +1449,7 @@ void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs)
 	mutex_lock(&irq_domain_mutex);
 	for (i = 0; i < nr_irqs; i++)
 		irq_domain_remove_irq(virq + i);
-	irq_domain_free_irqs_recursive(data->domain, virq, nr_irqs);
+	irq_domain_free_irqs_hierarchy(data->domain, virq, nr_irqs);
 	mutex_unlock(&irq_domain_mutex);
 
 	irq_domain_free_irq_data(virq, nr_irqs);
@@ -1341,15 +1469,11 @@ int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
 				 unsigned int irq_base, unsigned int nr_irqs,
 				 void *arg)
 {
-	/* irq_domain_alloc_irqs_recursive() has called parent's alloc() */
-	if (irq_domain_is_auto_recursive(domain))
-		return 0;
+	if (!domain->parent)
+		return -ENOSYS;
 
-	domain = domain->parent;
-	if (domain)
-		return irq_domain_alloc_irqs_recursive(domain, irq_base,
-						       nr_irqs, arg);
-	return -ENOSYS;
+	return irq_domain_alloc_irqs_hierarchy(domain->parent, irq_base,
+					       nr_irqs, arg);
 }
 EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent);
 
@@ -1364,10 +1488,10 @@ EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent);
 void irq_domain_free_irqs_parent(struct irq_domain *domain,
 				 unsigned int irq_base, unsigned int nr_irqs)
 {
-	/* irq_domain_free_irqs_recursive() will call parent's free */
-	if (!irq_domain_is_auto_recursive(domain) && domain->parent)
-		irq_domain_free_irqs_recursive(domain->parent, irq_base,
-					       nr_irqs);
+	if (!domain->parent)
+		return;
+
+	irq_domain_free_irqs_hierarchy(domain->parent, irq_base, nr_irqs);
 }
 EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent);
 
@@ -1487,3 +1611,78 @@ static void irq_domain_check_hierarchy(struct irq_domain *domain)
 {
 }
 #endif	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
+
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+static struct dentry *domain_dir;
+
+static void
+irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind)
+{
+	seq_printf(m, "%*sname:   %s\n", ind, "", d->name);
+	seq_printf(m, "%*ssize:   %u\n", ind + 1, "",
+		   d->revmap_size + d->revmap_direct_max_irq);
+	seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount);
+	seq_printf(m, "%*sflags:  0x%08x\n", ind +1 , "", d->flags);
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	if (!d->parent)
+		return;
+	seq_printf(m, "%*sparent: %s\n", ind + 1, "", d->parent->name);
+	irq_domain_debug_show_one(m, d->parent, ind + 4);
+#endif
+}
+
+static int irq_domain_debug_show(struct seq_file *m, void *p)
+{
+	struct irq_domain *d = m->private;
+
+	/* Default domain? Might be NULL */
+	if (!d) {
+		if (!irq_default_domain)
+			return 0;
+		d = irq_default_domain;
+	}
+	irq_domain_debug_show_one(m, d, 0);
+	return 0;
+}
+
+static int irq_domain_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, irq_domain_debug_show, inode->i_private);
+}
+
+static const struct file_operations dfs_domain_ops = {
+	.open		= irq_domain_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void debugfs_add_domain_dir(struct irq_domain *d)
+{
+	if (!d->name || !domain_dir || d->debugfs_file)
+		return;
+	d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d,
+					      &dfs_domain_ops);
+}
+
+static void debugfs_remove_domain_dir(struct irq_domain *d)
+{
+	if (d->debugfs_file)
+		debugfs_remove(d->debugfs_file);
+}
+
+void __init irq_domain_debugfs_init(struct dentry *root)
+{
+	struct irq_domain *d;
+
+	domain_dir = debugfs_create_dir("domains", root);
+	if (!domain_dir)
+		return;
+
+	debugfs_create_file("default", 0444, domain_dir, NULL, &dfs_domain_ops);
+	mutex_lock(&irq_domain_mutex);
+	list_for_each_entry(d, &irq_domain_list, link)
+		debugfs_add_domain_dir(d);
+	mutex_unlock(&irq_domain_mutex);
+}
+#endif

+ 47 - 72
kernel/irq/manage.c

@@ -168,34 +168,6 @@ void irq_set_thread_affinity(struct irq_desc *desc)
 			set_bit(IRQTF_AFFINITY, &action->thread_flags);
 }
 
-#ifdef CONFIG_GENERIC_PENDING_IRQ
-static inline bool irq_can_move_pcntxt(struct irq_data *data)
-{
-	return irqd_can_move_in_process_context(data);
-}
-static inline bool irq_move_pending(struct irq_data *data)
-{
-	return irqd_is_setaffinity_pending(data);
-}
-static inline void
-irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
-{
-	cpumask_copy(desc->pending_mask, mask);
-}
-static inline void
-irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
-{
-	cpumask_copy(mask, desc->pending_mask);
-}
-#else
-static inline bool irq_can_move_pcntxt(struct irq_data *data) { return true; }
-static inline bool irq_move_pending(struct irq_data *data) { return false; }
-static inline void
-irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) { }
-static inline void
-irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
-#endif
-
 int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
 			bool force)
 {
@@ -345,15 +317,18 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier);
 /*
  * Generic version of the affinity autoselector.
  */
-static int setup_affinity(struct irq_desc *desc, struct cpumask *mask)
+int irq_setup_affinity(struct irq_desc *desc)
 {
 	struct cpumask *set = irq_default_affinity;
-	int node = irq_desc_get_node(desc);
+	int ret, node = irq_desc_get_node(desc);
+	static DEFINE_RAW_SPINLOCK(mask_lock);
+	static struct cpumask mask;
 
 	/* Excludes PER_CPU and NO_BALANCE interrupts */
 	if (!__irq_can_set_affinity(desc))
 		return 0;
 
+	raw_spin_lock(&mask_lock);
 	/*
 	 * Preserve the managed affinity setting and a userspace affinity
 	 * setup, but make sure that one of the targets is online.
@@ -367,46 +342,40 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask)
 			irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET);
 	}
 
-	cpumask_and(mask, cpu_online_mask, set);
+	cpumask_and(&mask, cpu_online_mask, set);
 	if (node != NUMA_NO_NODE) {
 		const struct cpumask *nodemask = cpumask_of_node(node);
 
 		/* make sure at least one of the cpus in nodemask is online */
-		if (cpumask_intersects(mask, nodemask))
-			cpumask_and(mask, mask, nodemask);
+		if (cpumask_intersects(&mask, nodemask))
+			cpumask_and(&mask, &mask, nodemask);
 	}
-	irq_do_set_affinity(&desc->irq_data, mask, false);
-	return 0;
+	ret = irq_do_set_affinity(&desc->irq_data, &mask, false);
+	raw_spin_unlock(&mask_lock);
+	return ret;
 }
 #else
 /* Wrapper for ALPHA specific affinity selector magic */
-static inline int setup_affinity(struct irq_desc *d, struct cpumask *mask)
+int irq_setup_affinity(struct irq_desc *desc)
 {
-	return irq_select_affinity(irq_desc_get_irq(d));
+	return irq_select_affinity(irq_desc_get_irq(desc));
 }
 #endif
 
 /*
- * Called when affinity is set via /proc/irq
+ * Called when a bogus affinity is set via /proc/irq
  */
-int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask)
+int irq_select_affinity_usr(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	unsigned long flags;
 	int ret;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
-	ret = setup_affinity(desc, mask);
+	ret = irq_setup_affinity(desc);
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 	return ret;
 }
-
-#else
-static inline int
-setup_affinity(struct irq_desc *desc, struct cpumask *mask)
-{
-	return 0;
-}
 #endif
 
 /**
@@ -533,9 +502,15 @@ void __enable_irq(struct irq_desc *desc)
 			goto err_out;
 		/* Prevent probing on this irq: */
 		irq_settings_set_noprobe(desc);
-		irq_enable(desc);
-		check_irq_resend(desc);
-		/* fall-through */
+		/*
+		 * Call irq_startup() not irq_enable() here because the
+		 * interrupt might be marked NOAUTOEN. So irq_startup()
+		 * needs to be invoked when it gets enabled the first
+		 * time. If it was already started up, then irq_startup()
+		 * will invoke irq_enable() under the hood.
+		 */
+		irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
+		break;
 	}
 	default:
 		desc->depth--;
@@ -1122,7 +1097,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	struct irqaction *old, **old_ptr;
 	unsigned long flags, thread_mask = 0;
 	int ret, nested, shared = 0;
-	cpumask_var_t mask;
 
 	if (!desc)
 		return -EINVAL;
@@ -1181,11 +1155,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		}
 	}
 
-	if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
-		ret = -ENOMEM;
-		goto out_thread;
-	}
-
 	/*
 	 * Drivers are often written to work w/o knowledge about the
 	 * underlying irq chip implementation, so a request for a
@@ -1250,7 +1219,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		 */
 		if (thread_mask == ~0UL) {
 			ret = -EBUSY;
-			goto out_mask;
+			goto out_unlock;
 		}
 		/*
 		 * The thread_mask for the action is or'ed to
@@ -1294,7 +1263,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		pr_err("Threaded irq requested with handler=NULL and !ONESHOT for irq %d\n",
 		       irq);
 		ret = -EINVAL;
-		goto out_mask;
+		goto out_unlock;
 	}
 
 	if (!shared) {
@@ -1302,7 +1271,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		if (ret) {
 			pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n",
 			       new->name, irq, desc->irq_data.chip->name);
-			goto out_mask;
+			goto out_unlock;
 		}
 
 		init_waitqueue_head(&desc->wait_for_threads);
@@ -1314,7 +1283,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 
 			if (ret) {
 				irq_release_resources(desc);
-				goto out_mask;
+				goto out_unlock;
 			}
 		}
 
@@ -1330,20 +1299,25 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		if (new->flags & IRQF_ONESHOT)
 			desc->istate |= IRQS_ONESHOT;
 
-		if (irq_settings_can_autoenable(desc))
-			irq_startup(desc, true);
-		else
-			/* Undo nested disables: */
-			desc->depth = 1;
-
 		/* Exclude IRQ from balancing if requested */
 		if (new->flags & IRQF_NOBALANCING) {
 			irq_settings_set_no_balancing(desc);
 			irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
 		}
 
-		/* Set default affinity mask once everything is setup */
-		setup_affinity(desc, mask);
+		if (irq_settings_can_autoenable(desc)) {
+			irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
+		} else {
+			/*
+			 * Shared interrupts do not go well with disabling
+			 * auto enable. The sharing interrupt might request
+			 * it while it's still disabled and then wait for
+			 * interrupts forever.
+			 */
+			WARN_ON_ONCE(new->flags & IRQF_SHARED);
+			/* Undo nested disables: */
+			desc->depth = 1;
+		}
 
 	} else if (new->flags & IRQF_TRIGGER_MASK) {
 		unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK;
@@ -1374,6 +1348,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
+	irq_setup_timings(desc, new);
+
 	/*
 	 * Strictly no need to wake it up, but hung_task complains
 	 * when no hard interrupt wakes the thread up.
@@ -1384,10 +1360,9 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		wake_up_process(new->secondary->thread);
 
 	register_irq_proc(irq, desc);
+	irq_add_debugfs_entry(irq, desc);
 	new->dir = NULL;
 	register_handler_proc(irq, new);
-	free_cpumask_var(mask);
-
 	return 0;
 
 mismatch:
@@ -1400,9 +1375,8 @@ mismatch:
 	}
 	ret = -EBUSY;
 
-out_mask:
+out_unlock:
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
-	free_cpumask_var(mask);
 
 out_thread:
 	if (new->thread) {
@@ -1502,6 +1476,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 		irq_settings_clr_disable_unlazy(desc);
 		irq_shutdown(desc);
 		irq_release_resources(desc);
+		irq_remove_timings(desc);
 	}
 
 #ifdef CONFIG_SMP

+ 30 - 0
kernel/irq/migration.c

@@ -4,6 +4,36 @@
 
 #include "internals.h"
 
+/**
+ * irq_fixup_move_pending - Cleanup irq move pending from a dying CPU
+ * @desc:		Interrupt descpriptor to clean up
+ * @force_clear:	If set clear the move pending bit unconditionally.
+ *			If not set, clear it only when the dying CPU is the
+ *			last one in the pending mask.
+ *
+ * Returns true if the pending bit was set and the pending mask contains an
+ * online CPU other than the dying CPU.
+ */
+bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear)
+{
+	struct irq_data *data = irq_desc_get_irq_data(desc);
+
+	if (!irqd_is_setaffinity_pending(data))
+		return false;
+
+	/*
+	 * The outgoing CPU might be the last online target in a pending
+	 * interrupt move. If that's the case clear the pending move bit.
+	 */
+	if (cpumask_any_and(desc->pending_mask, cpu_online_mask) >= nr_cpu_ids) {
+		irqd_clr_move_pending(data);
+		return false;
+	}
+	if (force_clear)
+		irqd_clr_move_pending(data);
+	return true;
+}
+
 void irq_move_masked_irq(struct irq_data *idata)
 {
 	struct irq_desc *desc = irq_data_to_desc(idata);

+ 10 - 3
kernel/irq/msi.c

@@ -265,13 +265,20 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode,
 					 struct msi_domain_info *info,
 					 struct irq_domain *parent)
 {
+	struct irq_domain *domain;
+
 	if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
 		msi_domain_update_dom_ops(info);
 	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
 		msi_domain_update_chip_ops(info);
 
-	return irq_domain_create_hierarchy(parent, IRQ_DOMAIN_FLAG_MSI, 0,
-					   fwnode, &msi_domain_ops, info);
+	domain = irq_domain_create_hierarchy(parent, IRQ_DOMAIN_FLAG_MSI, 0,
+					     fwnode, &msi_domain_ops, info);
+
+	if (domain && !domain->name && info->chip)
+		domain->name = info->chip->name;
+
+	return domain;
 }
 
 int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
@@ -308,7 +315,7 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
 
 		ops->set_desc(arg, desc);
 		/* Assumes the domain mutex is held! */
-		ret = irq_domain_alloc_irqs_recursive(domain, virq, 1, arg);
+		ret = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg);
 		if (ret)
 			break;
 

+ 94 - 16
kernel/irq/proc.c

@@ -37,19 +37,47 @@ static struct proc_dir_entry *root_irq_dir;
 
 #ifdef CONFIG_SMP
 
-static int show_irq_affinity(int type, struct seq_file *m, void *v)
+enum {
+	AFFINITY,
+	AFFINITY_LIST,
+	EFFECTIVE,
+	EFFECTIVE_LIST,
+};
+
+static int show_irq_affinity(int type, struct seq_file *m)
 {
 	struct irq_desc *desc = irq_to_desc((long)m->private);
-	const struct cpumask *mask = desc->irq_common_data.affinity;
+	const struct cpumask *mask;
 
+	switch (type) {
+	case AFFINITY:
+	case AFFINITY_LIST:
+		mask = desc->irq_common_data.affinity;
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-	if (irqd_is_setaffinity_pending(&desc->irq_data))
-		mask = desc->pending_mask;
+		if (irqd_is_setaffinity_pending(&desc->irq_data))
+			mask = desc->pending_mask;
 #endif
-	if (type)
+		break;
+	case EFFECTIVE:
+	case EFFECTIVE_LIST:
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+		mask = desc->irq_common_data.effective_affinity;
+		break;
+#else
+		return -EINVAL;
+#endif
+	};
+
+	switch (type) {
+	case AFFINITY_LIST:
+	case EFFECTIVE_LIST:
 		seq_printf(m, "%*pbl\n", cpumask_pr_args(mask));
-	else
+		break;
+	case AFFINITY:
+	case EFFECTIVE:
 		seq_printf(m, "%*pb\n", cpumask_pr_args(mask));
+		break;
+	}
 	return 0;
 }
 
@@ -80,12 +108,12 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
 int no_irq_affinity;
 static int irq_affinity_proc_show(struct seq_file *m, void *v)
 {
-	return show_irq_affinity(0, m, v);
+	return show_irq_affinity(AFFINITY, m);
 }
 
 static int irq_affinity_list_proc_show(struct seq_file *m, void *v)
 {
-	return show_irq_affinity(1, m, v);
+	return show_irq_affinity(AFFINITY_LIST, m);
 }
 
 
@@ -120,9 +148,11 @@ static ssize_t write_irq_affinity(int type, struct file *file,
 	 * one online CPU still has to be targeted.
 	 */
 	if (!cpumask_intersects(new_value, cpu_online_mask)) {
-		/* Special case for empty set - allow the architecture
-		   code to set default SMP affinity. */
-		err = irq_select_affinity_usr(irq, new_value) ? -EINVAL : count;
+		/*
+		 * Special case for empty set - allow the architecture code
+		 * to set default SMP affinity.
+		 */
+		err = irq_select_affinity_usr(irq) ? -EINVAL : count;
 	} else {
 		irq_set_affinity(irq, new_value);
 		err = count;
@@ -183,6 +213,44 @@ static const struct file_operations irq_affinity_list_proc_fops = {
 	.write		= irq_affinity_list_proc_write,
 };
 
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+static int irq_effective_aff_proc_show(struct seq_file *m, void *v)
+{
+	return show_irq_affinity(EFFECTIVE, m);
+}
+
+static int irq_effective_aff_list_proc_show(struct seq_file *m, void *v)
+{
+	return show_irq_affinity(EFFECTIVE_LIST, m);
+}
+
+static int irq_effective_aff_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, irq_effective_aff_proc_show, PDE_DATA(inode));
+}
+
+static int irq_effective_aff_list_proc_open(struct inode *inode,
+					    struct file *file)
+{
+	return single_open(file, irq_effective_aff_list_proc_show,
+			   PDE_DATA(inode));
+}
+
+static const struct file_operations irq_effective_aff_proc_fops = {
+	.open		= irq_effective_aff_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations irq_effective_aff_list_proc_fops = {
+	.open		= irq_effective_aff_list_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+#endif
+
 static int default_affinity_show(struct seq_file *m, void *v)
 {
 	seq_printf(m, "%*pb\n", cpumask_pr_args(irq_default_affinity));
@@ -324,6 +392,7 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
 void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 {
 	static DEFINE_MUTEX(register_lock);
+	void __maybe_unused *irqp = (void *)(unsigned long) irq;
 	char name [MAX_NAMELEN];
 
 	if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip))
@@ -349,20 +418,25 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 #ifdef CONFIG_SMP
 	/* create /proc/irq/<irq>/smp_affinity */
 	proc_create_data("smp_affinity", 0644, desc->dir,
-			 &irq_affinity_proc_fops, (void *)(long)irq);
+			 &irq_affinity_proc_fops, irqp);
 
 	/* create /proc/irq/<irq>/affinity_hint */
 	proc_create_data("affinity_hint", 0444, desc->dir,
-			 &irq_affinity_hint_proc_fops, (void *)(long)irq);
+			 &irq_affinity_hint_proc_fops, irqp);
 
 	/* create /proc/irq/<irq>/smp_affinity_list */
 	proc_create_data("smp_affinity_list", 0644, desc->dir,
-			 &irq_affinity_list_proc_fops, (void *)(long)irq);
+			 &irq_affinity_list_proc_fops, irqp);
 
 	proc_create_data("node", 0444, desc->dir,
-			 &irq_node_proc_fops, (void *)(long)irq);
+			 &irq_node_proc_fops, irqp);
+# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+	proc_create_data("effective_affinity", 0444, desc->dir,
+			 &irq_effective_aff_proc_fops, irqp);
+	proc_create_data("effective_affinity_list", 0444, desc->dir,
+			 &irq_effective_aff_list_proc_fops, irqp);
+# endif
 #endif
-
 	proc_create_data("spurious", 0444, desc->dir,
 			 &irq_spurious_proc_fops, (void *)(long)irq);
 
@@ -381,6 +455,10 @@ void unregister_irq_proc(unsigned int irq, struct irq_desc *desc)
 	remove_proc_entry("affinity_hint", desc->dir);
 	remove_proc_entry("smp_affinity_list", desc->dir);
 	remove_proc_entry("node", desc->dir);
+# ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
+	remove_proc_entry("effective_affinity", desc->dir);
+	remove_proc_entry("effective_affinity_list", desc->dir);
+# endif
 #endif
 	remove_proc_entry("spurious", desc->dir);
 

+ 369 - 0
kernel/irq/timings.c

@@ -0,0 +1,369 @@
+/*
+ * linux/kernel/irq/timings.c
+ *
+ * Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <linux/static_key.h>
+#include <linux/interrupt.h>
+#include <linux/idr.h>
+#include <linux/irq.h>
+#include <linux/math64.h>
+
+#include <trace/events/irq.h>
+
+#include "internals.h"
+
+DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
+
+DEFINE_PER_CPU(struct irq_timings, irq_timings);
+
+struct irqt_stat {
+	u64	next_evt;
+	u64	last_ts;
+	u64	variance;
+	u32	avg;
+	u32	nr_samples;
+	int	anomalies;
+	int	valid;
+};
+
+static DEFINE_IDR(irqt_stats);
+
+void irq_timings_enable(void)
+{
+	static_branch_enable(&irq_timing_enabled);
+}
+
+void irq_timings_disable(void)
+{
+	static_branch_disable(&irq_timing_enabled);
+}
+
+/**
+ * irqs_update - update the irq timing statistics with a new timestamp
+ *
+ * @irqs: an irqt_stat struct pointer
+ * @ts: the new timestamp
+ *
+ * The statistics are computed online, in other words, the code is
+ * designed to compute the statistics on a stream of values rather
+ * than doing multiple passes on the values to compute the average,
+ * then the variance. The integer division introduces a loss of
+ * precision but with an acceptable error margin regarding the results
+ * we would have with the double floating precision: we are dealing
+ * with nanosec, so big numbers, consequently the mantisse is
+ * negligeable, especially when converting the time in usec
+ * afterwards.
+ *
+ * The computation happens at idle time. When the CPU is not idle, the
+ * interrupts' timestamps are stored in the circular buffer, when the
+ * CPU goes idle and this routine is called, all the buffer's values
+ * are injected in the statistical model continuying to extend the
+ * statistics from the previous busy-idle cycle.
+ *
+ * The observations showed a device will trigger a burst of periodic
+ * interrupts followed by one or two peaks of longer time, for
+ * instance when a SD card device flushes its cache, then the periodic
+ * intervals occur again. A one second inactivity period resets the
+ * stats, that gives us the certitude the statistical values won't
+ * exceed 1x10^9, thus the computation won't overflow.
+ *
+ * Basically, the purpose of the algorithm is to watch the periodic
+ * interrupts and eliminate the peaks.
+ *
+ * An interrupt is considered periodically stable if the interval of
+ * its occurences follow the normal distribution, thus the values
+ * comply with:
+ *
+ *      avg - 3 x stddev < value < avg + 3 x stddev
+ *
+ * Which can be simplified to:
+ *
+ *      -3 x stddev < value - avg < 3 x stddev
+ *
+ *      abs(value - avg) < 3 x stddev
+ *
+ * In order to save a costly square root computation, we use the
+ * variance. For the record, stddev = sqrt(variance). The equation
+ * above becomes:
+ *
+ *      abs(value - avg) < 3 x sqrt(variance)
+ *
+ * And finally we square it:
+ *
+ *      (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
+ *
+ *      (value - avg) x (value - avg) < 9 x variance
+ *
+ * Statistically speaking, any values out of this interval is
+ * considered as an anomaly and is discarded. However, a normal
+ * distribution appears when the number of samples is 30 (it is the
+ * rule of thumb in statistics, cf. "30 samples" on Internet). When
+ * there are three consecutive anomalies, the statistics are resetted.
+ *
+ */
+static void irqs_update(struct irqt_stat *irqs, u64 ts)
+{
+	u64 old_ts = irqs->last_ts;
+	u64 variance = 0;
+	u64 interval;
+	s64 diff;
+
+	/*
+	 * The timestamps are absolute time values, we need to compute
+	 * the timing interval between two interrupts.
+	 */
+	irqs->last_ts = ts;
+
+	/*
+	 * The interval type is u64 in order to deal with the same
+	 * type in our computation, that prevent mindfuck issues with
+	 * overflow, sign and division.
+	 */
+	interval = ts - old_ts;
+
+	/*
+	 * The interrupt triggered more than one second apart, that
+	 * ends the sequence as predictible for our purpose. In this
+	 * case, assume we have the beginning of a sequence and the
+	 * timestamp is the first value. As it is impossible to
+	 * predict anything at this point, return.
+	 *
+	 * Note the first timestamp of the sequence will always fall
+	 * in this test because the old_ts is zero. That is what we
+	 * want as we need another timestamp to compute an interval.
+	 */
+	if (interval >= NSEC_PER_SEC) {
+		memset(irqs, 0, sizeof(*irqs));
+		irqs->last_ts = ts;
+		return;
+	}
+
+	/*
+	 * Pre-compute the delta with the average as the result is
+	 * used several times in this function.
+	 */
+	diff = interval - irqs->avg;
+
+	/*
+	 * Increment the number of samples.
+	 */
+	irqs->nr_samples++;
+
+	/*
+	 * Online variance divided by the number of elements if there
+	 * is more than one sample.  Normally the formula is division
+	 * by nr_samples - 1 but we assume the number of element will be
+	 * more than 32 and dividing by 32 instead of 31 is enough
+	 * precise.
+	 */
+	if (likely(irqs->nr_samples > 1))
+		variance = irqs->variance >> IRQ_TIMINGS_SHIFT;
+
+	/*
+	 * The rule of thumb in statistics for the normal distribution
+	 * is having at least 30 samples in order to have the model to
+	 * apply. Values outside the interval are considered as an
+	 * anomaly.
+	 */
+	if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) {
+		/*
+		 * After three consecutive anomalies, we reset the
+		 * stats as it is no longer stable enough.
+		 */
+		if (irqs->anomalies++ >= 3) {
+			memset(irqs, 0, sizeof(*irqs));
+			irqs->last_ts = ts;
+			return;
+		}
+	} else {
+		/*
+		 * The anomalies must be consecutives, so at this
+		 * point, we reset the anomalies counter.
+		 */
+		irqs->anomalies = 0;
+	}
+
+	/*
+	 * The interrupt is considered stable enough to try to predict
+	 * the next event on it.
+	 */
+	irqs->valid = 1;
+
+	/*
+	 * Online average algorithm:
+	 *
+	 *  new_average = average + ((value - average) / count)
+	 *
+	 * The variance computation depends on the new average
+	 * to be computed here first.
+	 *
+	 */
+	irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);
+
+	/*
+	 * Online variance algorithm:
+	 *
+	 *  new_variance = variance + (value - average) x (value - new_average)
+	 *
+	 * Warning: irqs->avg is updated with the line above, hence
+	 * 'interval - irqs->avg' is no longer equal to 'diff'
+	 */
+	irqs->variance = irqs->variance + (diff * (interval - irqs->avg));
+
+	/*
+	 * Update the next event
+	 */
+	irqs->next_evt = ts + irqs->avg;
+}
+
+/**
+ * irq_timings_next_event - Return when the next event is supposed to arrive
+ *
+ * During the last busy cycle, the number of interrupts is incremented
+ * and stored in the irq_timings structure. This information is
+ * necessary to:
+ *
+ * - know if the index in the table wrapped up:
+ *
+ *      If more than the array size interrupts happened during the
+ *      last busy/idle cycle, the index wrapped up and we have to
+ *      begin with the next element in the array which is the last one
+ *      in the sequence, otherwise it is a the index 0.
+ *
+ * - have an indication of the interrupts activity on this CPU
+ *   (eg. irq/sec)
+ *
+ * The values are 'consumed' after inserting in the statistical model,
+ * thus the count is reinitialized.
+ *
+ * The array of values **must** be browsed in the time direction, the
+ * timestamp must increase between an element and the next one.
+ *
+ * Returns a nanosec time based estimation of the earliest interrupt,
+ * U64_MAX otherwise.
+ */
+u64 irq_timings_next_event(u64 now)
+{
+	struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
+	struct irqt_stat *irqs;
+	struct irqt_stat __percpu *s;
+	u64 ts, next_evt = U64_MAX;
+	int i, irq = 0;
+
+	/*
+	 * This function must be called with the local irq disabled in
+	 * order to prevent the timings circular buffer to be updated
+	 * while we are reading it.
+	 */
+	WARN_ON_ONCE(!irqs_disabled());
+
+	/*
+	 * Number of elements in the circular buffer: If it happens it
+	 * was flushed before, then the number of elements could be
+	 * smaller than IRQ_TIMINGS_SIZE, so the count is used,
+	 * otherwise the array size is used as we wrapped. The index
+	 * begins from zero when we did not wrap. That could be done
+	 * in a nicer way with the proper circular array structure
+	 * type but with the cost of extra computation in the
+	 * interrupt handler hot path. We choose efficiency.
+	 *
+	 * Inject measured irq/timestamp to the statistical model
+	 * while decrementing the counter because we consume the data
+	 * from our circular buffer.
+	 */
+	for (i = irqts->count & IRQ_TIMINGS_MASK,
+		     irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
+	     irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
+
+		irq = irq_timing_decode(irqts->values[i], &ts);
+
+		s = idr_find(&irqt_stats, irq);
+		if (s) {
+			irqs = this_cpu_ptr(s);
+			irqs_update(irqs, ts);
+		}
+	}
+
+	/*
+	 * Look in the list of interrupts' statistics, the earliest
+	 * next event.
+	 */
+	idr_for_each_entry(&irqt_stats, s, i) {
+
+		irqs = this_cpu_ptr(s);
+
+		if (!irqs->valid)
+			continue;
+
+		if (irqs->next_evt <= now) {
+			irq = i;
+			next_evt = now;
+
+			/*
+			 * This interrupt mustn't use in the future
+			 * until new events occur and update the
+			 * statistics.
+			 */
+			irqs->valid = 0;
+			break;
+		}
+
+		if (irqs->next_evt < next_evt) {
+			irq = i;
+			next_evt = irqs->next_evt;
+		}
+	}
+
+	return next_evt;
+}
+
+void irq_timings_free(int irq)
+{
+	struct irqt_stat __percpu *s;
+
+	s = idr_find(&irqt_stats, irq);
+	if (s) {
+		free_percpu(s);
+		idr_remove(&irqt_stats, irq);
+	}
+}
+
+int irq_timings_alloc(int irq)
+{
+	struct irqt_stat __percpu *s;
+	int id;
+
+	/*
+	 * Some platforms can have the same private interrupt per cpu,
+	 * so this function may be be called several times with the
+	 * same interrupt number. Just bail out in case the per cpu
+	 * stat structure is already allocated.
+	 */
+	s = idr_find(&irqt_stats, irq);
+	if (s)
+		return 0;
+
+	s = alloc_percpu(*s);
+	if (!s)
+		return -ENOMEM;
+
+	idr_preload(GFP_KERNEL);
+	id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT);
+	idr_preload_end();
+
+	if (id < 0) {
+		free_percpu(s);
+		return id;
+	}
+
+	return 0;
+}