Browse Source

Merge tag 'mvebu-soc-3.16-2' of git://git.infradead.org/linux-mvebu into next/soc

Merge "mvebu SoC changes for v3.16 (incremental #2)" from Jason Cooper <jason@lakedaemon.net>:

 - mvebu
    - fix coherency on big-endian in -next
    - hardware IO coherency
    - L2/PCIe deadlock workaround
    - small coherency cleanups

* tag 'mvebu-soc-3.16-2' of git://git.infradead.org/linux-mvebu:
  ARM: mvebu: returns ll_get_cpuid() to ll_get_coherency_cpumask()
  ARM: mvebu: improve comments in coherency_ll.S
  ARM: mvebu: fix indentation of assembly instructions in coherency_ll.S
  ARM: mvebu: fix big endian booting after coherency code rework
  ARM: mvebu: coherency: fix registration of PCI bus notifier when !PCI
  ARM: mvebu: implement L2/PCIe deadlock workaround
  ARM: mvebu: use hardware I/O coherency also for PCI devices

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Arnd Bergmann 11 years ago
parent
commit
37f5f4e173
2 changed files with 106 additions and 33 deletions
  1. 57 5
      arch/arm/mach-mvebu/coherency.c
  2. 49 28
      arch/arm/mach-mvebu/coherency_ll.S

+ 57 - 5
arch/arm/mach-mvebu/coherency.c

@@ -29,8 +29,10 @@
 #include <linux/slab.h>
 #include <linux/mbus.h>
 #include <linux/clk.h>
+#include <linux/pci.h>
 #include <asm/smp_plat.h>
 #include <asm/cacheflush.h>
+#include <asm/mach/map.h>
 #include "armada-370-xp.h"
 #include "coherency.h"
 #include "mvebu-soc-id.h"
@@ -274,8 +276,8 @@ static struct dma_map_ops mvebu_hwcc_dma_ops = {
 	.set_dma_mask		= arm_dma_set_mask,
 };
 
-static int mvebu_hwcc_platform_notifier(struct notifier_block *nb,
-				       unsigned long event, void *__dev)
+static int mvebu_hwcc_notifier(struct notifier_block *nb,
+			       unsigned long event, void *__dev)
 {
 	struct device *dev = __dev;
 
@@ -286,8 +288,8 @@ static int mvebu_hwcc_platform_notifier(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block mvebu_hwcc_platform_nb = {
-	.notifier_call = mvebu_hwcc_platform_notifier,
+static struct notifier_block mvebu_hwcc_nb = {
+	.notifier_call = mvebu_hwcc_notifier,
 };
 
 static void __init armada_370_coherency_init(struct device_node *np)
@@ -308,9 +310,47 @@ static void __init armada_370_coherency_init(struct device_node *np)
 	set_cpu_coherent();
 }
 
+/*
+ * This ioremap hook is used on Armada 375/38x to ensure that PCIe
+ * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This
+ * is needed as a workaround for a deadlock issue between the PCIe
+ * interface and the cache controller.
+ */
+static void __iomem *
+armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
+			      unsigned int mtype, void *caller)
+{
+	struct resource pcie_mem;
+
+	mvebu_mbus_get_pcie_mem_aperture(&pcie_mem);
+
+	if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end)
+		mtype = MT_UNCACHED;
+
+	return __arm_ioremap_caller(phys_addr, size, mtype, caller);
+}
+
 static void __init armada_375_380_coherency_init(struct device_node *np)
 {
+	struct device_node *cache_dn;
+
 	coherency_cpu_base = of_iomap(np, 0);
+	arch_ioremap_caller = armada_pcie_wa_ioremap_caller;
+
+	/*
+	 * Add the PL310 property "arm,io-coherent". This makes sure the
+	 * outer sync operation is not used, which allows to
+	 * workaround the system erratum that causes deadlocks when
+	 * doing PCIe in an SMP situation on Armada 375 and Armada
+	 * 38x.
+	 */
+	for_each_compatible_node(cache_dn, NULL, "arm,pl310-cache") {
+		struct property *p;
+
+		p = kzalloc(sizeof(*p), GFP_KERNEL);
+		p->name = kstrdup("arm,io-coherent", GFP_KERNEL);
+		of_add_property(cache_dn, p);
+	}
 }
 
 static int coherency_type(void)
@@ -375,9 +415,21 @@ static int __init coherency_late_init(void)
 	}
 
 	bus_register_notifier(&platform_bus_type,
-			      &mvebu_hwcc_platform_nb);
+			      &mvebu_hwcc_nb);
 
 	return 0;
 }
 
 postcore_initcall(coherency_late_init);
+
+#if IS_ENABLED(CONFIG_PCI)
+static int __init coherency_pci_init(void)
+{
+	if (coherency_available())
+		bus_register_notifier(&pci_bus_type,
+				       &mvebu_hwcc_nb);
+	return 0;
+}
+
+arch_initcall(coherency_pci_init);
+#endif

+ 49 - 28
arch/arm/mach-mvebu/coherency_ll.S

@@ -24,52 +24,69 @@
 #include <asm/cp15.h>
 
 	.text
-/* Returns with the coherency address in r1 (r0 is untouched)*/
+/* Returns the coherency base address in r1 (r0 is untouched) */
 ENTRY(ll_get_coherency_base)
 	mrc	p15, 0, r1, c1, c0, 0
 	tst	r1, #CR_M @ Check MMU bit enabled
 	bne	1f
 
-	/* use physical address of the coherency register */
+	/*
+	 * MMU is disabled, use the physical address of the coherency
+	 * base address.
+	 */
 	adr	r1, 3f
 	ldr	r3, [r1]
 	ldr	r1, [r1, r3]
 	b	2f
 1:
-	/* use virtual address of the coherency register */
+	/*
+	 * MMU is enabled, use the virtual address of the coherency
+	 * base address.
+	 */
 	ldr	r1, =coherency_base
 	ldr	r1, [r1]
 2:
 	mov	pc, lr
 ENDPROC(ll_get_coherency_base)
 
-/* Returns with the CPU ID in r3 (r0 is untouched)*/
-ENTRY(ll_get_cpuid)
+/*
+ * Returns the coherency CPU mask in r3 (r0 is untouched). This
+ * coherency CPU mask can be used with the coherency fabric
+ * configuration and control registers. Note that the mask is already
+ * endian-swapped as appropriate so that the calling functions do not
+ * have to care about endianness issues while accessing the coherency
+ * fabric registers
+ */
+ENTRY(ll_get_coherency_cpumask)
 	mrc	15, 0, r3, cr0, cr0, 5
 	and	r3, r3, #15
 	mov	r2, #(1 << 24)
 	lsl	r3, r2, r3
-ARM_BE8(rev	r1, r1)
+ARM_BE8(rev	r3, r3)
 	mov	pc, lr
-ENDPROC(ll_get_cpuid)
+ENDPROC(ll_get_coherency_cpumask)
 
-/* ll_add_cpu_to_smp_group, ll_enable_coherency and
- * ll_disable_coherency use strex/ldrex whereas MMU can be off. The
- * Armada XP SoC has an exclusive monitor that can track transactions
- * to Device and/or SO and as such also when MMU is disabled the
- * exclusive transactions will be functional
+/*
+ * ll_add_cpu_to_smp_group(), ll_enable_coherency() and
+ * ll_disable_coherency() use the strex/ldrex instructions while the
+ * MMU can be disabled. The Armada XP SoC has an exclusive monitor
+ * that tracks transactions to Device and/or SO memory and thanks to
+ * that, exclusive transactions are functional even when the MMU is
+ * disabled.
  */
 
 ENTRY(ll_add_cpu_to_smp_group)
 	/*
-	 * r0 being untouched in ll_get_coherency_base and
-	 * ll_get_cpuid, we can use it to save lr modifing it with the
-	 * following bl
+	 * As r0 is not modified by ll_get_coherency_base() and
+	 * ll_get_coherency_cpumask(), we use it to temporarly save lr
+	 * and avoid it being modified by the branch and link
+	 * calls. This function is used very early in the secondary
+	 * CPU boot, and no stack is available at this point.
 	 */
-	mov r0, lr
+	mov 	r0, lr
 	bl	ll_get_coherency_base
-	bl	ll_get_cpuid
-	mov lr, r0
+	bl	ll_get_coherency_cpumask
+	mov 	lr, r0
 	add	r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET
 1:
 	ldrex	r2, [r0]
@@ -82,13 +99,15 @@ ENDPROC(ll_add_cpu_to_smp_group)
 
 ENTRY(ll_enable_coherency)
 	/*
-	 * r0 being untouched in ll_get_coherency_base and
-	 * ll_get_cpuid, we can use it to save lr modifing it with the
-	 * following bl
+	 * As r0 is not modified by ll_get_coherency_base() and
+	 * ll_get_coherency_cpumask(), we use it to temporarly save lr
+	 * and avoid it being modified by the branch and link
+	 * calls. This function is used very early in the secondary
+	 * CPU boot, and no stack is available at this point.
 	 */
 	mov r0, lr
 	bl	ll_get_coherency_base
-	bl	ll_get_cpuid
+	bl	ll_get_coherency_cpumask
 	mov lr, r0
 	add	r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET
 1:
@@ -104,14 +123,16 @@ ENDPROC(ll_enable_coherency)
 
 ENTRY(ll_disable_coherency)
 	/*
-	 * r0 being untouched in ll_get_coherency_base and
-	 * ll_get_cpuid, we can use it to save lr modifing it with the
-	 * following bl
+	 * As r0 is not modified by ll_get_coherency_base() and
+	 * ll_get_coherency_cpumask(), we use it to temporarly save lr
+	 * and avoid it being modified by the branch and link
+	 * calls. This function is used very early in the secondary
+	 * CPU boot, and no stack is available at this point.
 	 */
-	mov r0, lr
+	mov 	r0, lr
 	bl	ll_get_coherency_base
-	bl	ll_get_cpuid
-	mov lr, r0
+	bl	ll_get_coherency_cpumask
+	mov 	lr, r0
 	add	r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET
 1:
 	ldrex	r2, [r0]