浏览代码

Merge branch 'rpmsg-ti-linux-4.19.y' of git://git.ti.com/rpmsg/rpmsg into rpmsg-ti-linux-4.19.y-intg

Signed-off-by: Suman Anna <s-anna@ti.com>
Suman Anna 6 年之前
父节点
当前提交
7e2760dcfc

+ 70 - 0
arch/arm/boot/dts/dra7-ipu-common-early-boot.dtsi

@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * Common dtsi file that needs to be included in corresponding TI DRA7xx
+ * and AM57xx board dts files that have the IPU1 _and_ IPU2 remote processors
+ * booted early from TI U-Boot/SPL.
+ */
+
+/ {
+	reserved-memory {
+		mmu-early-page-tables@95700000 {
+			/* address need to match the usage within U-Boot */
+			reg = <0x0 0x95700000 0x0 0x100000>;
+			no-map;
+		};
+	};
+};
+
+/* IPU2 */
+&timer3 {
+	ti,no-idle-on-init;
+	ti,no-reset-on-init;
+};
+
+&timer4 {
+	ti,no-idle-on-init;
+	ti,no-reset-on-init;
+};
+
+&timer9 {
+	ti,no-idle-on-init;
+	ti,no-reset-on-init;
+};
+
+&mmu_ipu2{
+	ti,no-idle-on-init;
+	ti,no-reset-on-init;
+};
+
+&ipu2 {
+	ti,no-idle-on-init;
+	ti,no-reset-on-init;
+};
+
+/* IPU1 */
+&timer11 {
+	ti,no-idle-on-init;
+	ti,no-reset-on-init;
+};
+
+&timer7 {
+	ti,no-idle-on-init;
+	ti,no-reset-on-init;
+};
+
+&timer8 {
+	ti,no-idle-on-init;
+	ti,no-reset-on-init;
+};
+
+&mmu_ipu1{
+	ti,no-idle-on-init;
+	ti,no-reset-on-init;
+};
+
+&ipu1 {
+	ti,no-idle-on-init;
+	ti,no-reset-on-init;
+};

+ 6 - 6
arch/arm/boot/dts/dra72-evm-revc.dts

@@ -22,21 +22,21 @@
 		#size-cells = <2>;
 		ranges;
 
-		ipu2_cma_pool: ipu2_cma@95800000 {
+		ipu2_memory_region: ipu2-memory@95800000 {
 			compatible = "shared-dma-pool";
 			reg = <0x0 0x95800000 0x0 0x3800000>;
 			reusable;
 			status = "okay";
 		};
 
-		dsp1_cma_pool: dsp1_cma@99000000 {
+		dsp1_memory_region: dsp1-memory@99000000 {
 			compatible = "shared-dma-pool";
 			reg = <0x0 0x99000000 0x0 0x4000000>;
 			reusable;
 			status = "okay";
 		};
 
-		ipu1_cma_pool: ipu1_cma@9d000000 {
+		ipu1_memory_region: ipu1-memory@9d000000 {
 			compatible = "shared-dma-pool";
 			reg = <0x0 0x9d000000 0x0 0x2000000>;
 			reusable;
@@ -151,15 +151,15 @@
 
 &ipu2 {
 	status = "okay";
-	memory-region = <&ipu2_cma_pool>;
+	memory-region = <&ipu2_memory_region>;
 };
 
 &ipu1 {
 	status = "okay";
-	memory-region = <&ipu1_cma_pool>;
+	memory-region = <&ipu1_memory_region>;
 };
 
 &dsp1 {
 	status = "okay";
-	memory-region = <&dsp1_cma_pool>;
+	memory-region = <&dsp1_memory_region>;
 };

+ 8 - 8
arch/arm/boot/dts/dra76-evm.dts

@@ -33,28 +33,28 @@
 		#size-cells = <2>;
 		ranges;
 
-		ipu2_cma_pool: ipu2_cma@95800000 {
+		ipu2_memory_region: ipu2-memory@95800000 {
 			compatible = "shared-dma-pool";
 			reg = <0x0 0x95800000 0x0 0x3800000>;
 			reusable;
 			status = "okay";
 		};
 
-		dsp1_cma_pool: dsp1_cma@99000000 {
+		dsp1_memory_region: dsp1-memory@99000000 {
 			compatible = "shared-dma-pool";
 			reg = <0x0 0x99000000 0x0 0x4000000>;
 			reusable;
 			status = "okay";
 		};
 
-		ipu1_cma_pool: ipu1_cma@9d000000 {
+		ipu1_memory_region: ipu1-memory@9d000000 {
 			compatible = "shared-dma-pool";
 			reg = <0x0 0x9d000000 0x0 0x2000000>;
 			reusable;
 			status = "okay";
 		};
 
-		dsp2_cma_pool: dsp2_cma@9f000000 {
+		dsp2_memory_region: dsp2-memory@9f000000 {
 			compatible = "shared-dma-pool";
 			reg = <0x0 0x9f000000 0x0 0x800000>;
 			reusable;
@@ -584,20 +584,20 @@
 
 &ipu2 {
 	status = "okay";
-	memory-region = <&ipu2_cma_pool>;
+	memory-region = <&ipu2_memory_region>;
 };
 
 &ipu1 {
 	status = "okay";
-	memory-region = <&ipu1_cma_pool>;
+	memory-region = <&ipu1_memory_region>;
 };
 
 &dsp1 {
 	status = "okay";
-	memory-region = <&dsp1_cma_pool>;
+	memory-region = <&dsp1_memory_region>;
 };
 
 &dsp2 {
 	status = "okay";
-	memory-region = <&dsp2_cma_pool>;
+	memory-region = <&dsp2_memory_region>;
 };

+ 1 - 0
arch/arm/include/asm/dma-mapping.h

@@ -14,6 +14,7 @@
 #include <asm/xen/hypervisor.h>
 
 extern const struct dma_map_ops arm_dma_ops;
+extern const struct dma_map_ops arm_dma_m_ops;
 extern const struct dma_map_ops arm_coherent_dma_ops;
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)

+ 14 - 0
arch/arm/mach-omap2/pdata-quirks.c

@@ -58,7 +58,18 @@ static struct gfx_sgx_platform_data sgx_pdata = {
 };
 #endif
 
+static bool __maybe_unused omap_device_is_enabled(struct platform_device *pdev)
+{
+	struct omap_device *od = to_omap_device(pdev);
+
+	if (od->_state == OMAP_DEVICE_STATE_ENABLED)
+		return true;
+	else
+		return false;
+}
+
 #if IS_ENABLED(CONFIG_OMAP_IOMMU)
+
 int omap_iommu_set_pwrdm_constraint(struct platform_device *pdev, bool request,
 				    u8 *pwrst);
 #else
@@ -445,6 +456,7 @@ static void __init omap3_pandora_legacy_init(void)
 static struct omap_rproc_pdata omap4_ipu_dsp_pdata = {
 	.device_enable = omap_rproc_device_enable,
 	.device_shutdown = omap_rproc_device_shutdown,
+	.device_is_enabled = omap_device_is_enabled,
 };
 #endif
 
@@ -456,6 +468,7 @@ static struct iommu_platform_data omap4_iommu_pdata = {
 	.deassert_reset = omap_device_deassert_hardreset,
 	.device_enable = omap_device_enable,
 	.device_idle = omap_device_idle,
+	.device_is_enabled = omap_device_is_enabled,
 };
 #endif
 
@@ -485,6 +498,7 @@ static struct iommu_platform_data dra7_ipu1_dsp_iommu_pdata = {
 	.assert_reset = omap_device_assert_hardreset,
 	.deassert_reset = omap_device_deassert_hardreset,
 	.device_enable = omap_device_enable,
+	.device_is_enabled = omap_device_is_enabled,
 	.device_idle = omap_device_idle,
 	.set_pwrdm_constraint = omap_iommu_set_pwrdm_constraint,
 };

+ 45 - 8
arch/arm/mm/dma-mapping.c

@@ -50,6 +50,7 @@ struct arm_dma_alloc_args {
 	const void *caller;
 	bool want_vaddr;
 	int coherent_flag;
+	bool zero;
 };
 
 struct arm_dma_free_args {
@@ -203,6 +204,27 @@ const struct dma_map_ops arm_dma_ops = {
 };
 EXPORT_SYMBOL(arm_dma_ops);
 
+static void *arm_dma_malloc(struct device *dev, size_t size, dma_addr_t *handle,
+			    gfp_t gfp, unsigned long dma_attrs);
+
+const struct dma_map_ops arm_dma_m_ops = {
+	.alloc                  = arm_dma_malloc,
+	.free                   = arm_dma_free,
+	.mmap                   = arm_dma_mmap,
+	.get_sgtable            = arm_dma_get_sgtable,
+	.map_page               = arm_dma_map_page,
+	.unmap_page             = arm_dma_unmap_page,
+	.map_sg                 = arm_dma_map_sg,
+	.unmap_sg               = arm_dma_unmap_sg,
+	.sync_single_for_cpu    = arm_dma_sync_single_for_cpu,
+	.sync_single_for_device = arm_dma_sync_single_for_device,
+	.sync_sg_for_cpu        = arm_dma_sync_sg_for_cpu,
+	.sync_sg_for_device     = arm_dma_sync_sg_for_device,
+	.mapping_error		= arm_dma_mapping_error,
+	.dma_supported		= arm_dma_supported,
+};
+EXPORT_SYMBOL(arm_dma_m_ops);
+
 static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
 	dma_addr_t *handle, gfp_t gfp, unsigned long attrs);
 static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
@@ -356,7 +378,7 @@ static void __dma_free_buffer(struct page *page, size_t size)
 static void *__alloc_from_contiguous(struct device *dev, size_t size,
 				     pgprot_t prot, struct page **ret_page,
 				     const void *caller, bool want_vaddr,
-				     int coherent_flag, gfp_t gfp);
+				     int coherent_flag, gfp_t gfp, bool zero);
 
 static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
 				 pgprot_t prot, struct page **ret_page,
@@ -413,7 +435,7 @@ static int __init atomic_pool_init(void)
 	if (dev_get_cma_area(NULL))
 		ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot,
 				      &page, atomic_pool_init, true, NORMAL,
-				      GFP_KERNEL);
+				      GFP_KERNEL, true);
 	else
 		ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot,
 					   &page, atomic_pool_init, true);
@@ -587,7 +609,7 @@ static int __free_from_pool(void *start, size_t size)
 static void *__alloc_from_contiguous(struct device *dev, size_t size,
 				     pgprot_t prot, struct page **ret_page,
 				     const void *caller, bool want_vaddr,
-				     int coherent_flag, gfp_t gfp)
+				     int coherent_flag, gfp_t gfp, bool zero)
 {
 	unsigned long order = get_order(size);
 	size_t count = size >> PAGE_SHIFT;
@@ -598,7 +620,8 @@ static void *__alloc_from_contiguous(struct device *dev, size_t size,
 	if (!page)
 		return NULL;
 
-	__dma_clear_buffer(page, size, coherent_flag);
+	if (zero)
+		__dma_clear_buffer(page, size, coherent_flag);
 
 	if (!want_vaddr)
 		goto out;
@@ -675,7 +698,7 @@ static void *cma_allocator_alloc(struct arm_dma_alloc_args *args,
 	return __alloc_from_contiguous(args->dev, args->size, args->prot,
 				       ret_page, args->caller,
 				       args->want_vaddr, args->coherent_flag,
-				       args->gfp);
+				       args->gfp, args->zero);
 }
 
 static void cma_allocator_free(struct arm_dma_free_args *args)
@@ -728,7 +751,7 @@ static struct arm_dma_allocator remap_allocator = {
 
 static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 			 gfp_t gfp, pgprot_t prot, bool is_coherent,
-			 unsigned long attrs, const void *caller)
+			 unsigned long attrs, const void *caller, bool zero)
 {
 	u64 mask = get_coherent_dma_mask(dev);
 	struct page *page = NULL;
@@ -743,6 +766,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 		.caller = caller,
 		.want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0),
 		.coherent_flag = is_coherent ? COHERENT : NORMAL,
+		.zero = zero,
 	};
 
 #ifdef CONFIG_DMA_API_DEBUG
@@ -816,14 +840,27 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 	pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);
 
 	return __dma_alloc(dev, size, handle, gfp, prot, false,
-			   attrs, __builtin_return_address(0));
+			   attrs, __builtin_return_address(0), true);
+}
+
+/*
+ * Same as arm_dma_alloc except don't zero memory on alloc
+ */
+void *arm_dma_malloc(struct device *dev, size_t size, dma_addr_t *handle,
+		     gfp_t gfp, unsigned long attrs)
+{
+	pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);
+
+	return __dma_alloc(dev, size, handle, gfp, prot, false,
+			   attrs, __builtin_return_address(0),
+			   false);
 }
 
 static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
 	dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
 {
 	return __dma_alloc(dev, size, handle, gfp, PAGE_KERNEL, true,
-			   attrs, __builtin_return_address(0));
+			   attrs, __builtin_return_address(0), true);
 }
 
 static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,

+ 77 - 5
drivers/clocksource/timer-ti-dm.c

@@ -94,6 +94,13 @@ static void omap_dm_timer_write_reg(struct omap_dm_timer *timer, u32 reg,
 
 static void omap_timer_restore_context(struct omap_dm_timer *timer)
 {
+	/*
+	 * Do not restore the context during late attach. Kernel data
+	 * structure is not in sync with the register settings of the timer.
+	 */
+	if (timer->late_attach)
+		return;
+
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_WAKEUP_EN_REG,
 				timer->context.twer);
 	omap_dm_timer_write_reg(timer, OMAP_TIMER_COUNTER_REG,
@@ -194,6 +201,20 @@ static int omap_dm_timer_set_source(struct omap_dm_timer *timer, int source)
 	return ret;
 }
 
+static int omap_dm_timer_is_enabled(struct omap_dm_timer *timer)
+{
+	u32 val;
+
+	val = omap_dm_timer_read_reg(timer, OMAP_TIMER_CTRL_REG);
+
+	/* Check if timer ST bit is set or the Counter register is loaded */
+	if (val & OMAP_TIMER_CTRL_ST ||
+	    omap_dm_timer_read_reg(timer, OMAP_TIMER_COUNTER_REG))
+		return 1;
+	else
+		return 0;
+}
+
 static void omap_dm_timer_enable(struct omap_dm_timer *timer)
 {
 	int c;
@@ -247,6 +268,14 @@ static int omap_dm_timer_prepare(struct omap_dm_timer *timer)
 	__omap_dm_timer_enable_posted(timer);
 	omap_dm_timer_disable(timer);
 
+	/*
+	 * During late attach, do not set the timer source during prepare
+	 * as the timer might be clocked from a different source. It will
+	 * be set properly from remoteproc.
+	 */
+	if (timer->late_attach)
+		return 0;
+
 	rc = omap_dm_timer_set_source(timer, OMAP_TIMER_SRC_32_KHZ);
 
 	return rc;
@@ -503,6 +532,16 @@ static int omap_dm_timer_start(struct omap_dm_timer *timer)
 
 	/* Save the context */
 	timer->context.tclr = l;
+
+	/*
+	 * Now that timer has been started, call pm_runtime_put_noidle to
+	 * balance the pm_runtime device usage count to the proper value as
+	 * the regular case, and reset the late_attach flag.
+	 */
+	if (timer->late_attach)
+		pm_runtime_put_noidle(&timer->pdev->dev);
+	timer->late_attach = 0;
+
 	return 0;
 }
 
@@ -543,10 +582,18 @@ static int omap_dm_timer_set_load(struct omap_dm_timer *timer, int autoreload,
 		l |= OMAP_TIMER_CTRL_AR;
 	else
 		l &= ~OMAP_TIMER_CTRL_AR;
-	omap_dm_timer_write_reg(timer, OMAP_TIMER_CTRL_REG, l);
-	omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load);
 
-	omap_dm_timer_write_reg(timer, OMAP_TIMER_TRIGGER_REG, 0);
+	/*
+	 * If late attach is enabled, do not modify the dmtimer registers.
+	 * The registers would have been configured already.
+	 */
+	if (!timer->late_attach) {
+		omap_dm_timer_write_reg(timer, OMAP_TIMER_CTRL_REG, l);
+		omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load);
+
+		omap_dm_timer_write_reg(timer, OMAP_TIMER_TRIGGER_REG, 0);
+	}
+
 	/* Save the context */
 	timer->context.tclr = l;
 	timer->context.tldr = load;
@@ -568,13 +615,21 @@ int omap_dm_timer_set_load_start(struct omap_dm_timer *timer, int autoreload,
 	l = omap_dm_timer_read_reg(timer, OMAP_TIMER_CTRL_REG);
 	if (autoreload) {
 		l |= OMAP_TIMER_CTRL_AR;
-		omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG, load);
+		/*
+		 * If late attach is enabled, do not modify the dmtimer
+		 * registers. The registers would have been configured
+		 * already.
+		 */
+		if (!timer->late_attach)
+			omap_dm_timer_write_reg(timer, OMAP_TIMER_LOAD_REG,
+						load);
 	} else {
 		l &= ~OMAP_TIMER_CTRL_AR;
 	}
 	l |= OMAP_TIMER_CTRL_ST;
 
-	__omap_dm_timer_load_start(timer, l, load, timer->posted);
+	if (!timer->late_attach)
+		__omap_dm_timer_load_start(timer, l, load, timer->posted);
 
 	/* Save the context */
 	timer->context.tclr = l;
@@ -847,6 +902,16 @@ static int omap_dm_timer_probe(struct platform_device *pdev)
 			goto err_get_sync;
 		}
 		__omap_dm_timer_init_regs(timer);
+
+		if (omap_dm_timer_is_enabled(timer))
+			timer->late_attach = 1;
+		/*
+		 * Increase the pm_runtime usage count and prevent kernel power
+		 * management from idling or disabling the timer.
+		 */
+		if (timer->late_attach)
+			pm_runtime_get_noresume(dev);
+
 		pm_runtime_put(dev);
 	}
 
@@ -884,6 +949,12 @@ static int omap_dm_timer_remove(struct platform_device *pdev)
 		if (!strcmp(dev_name(&timer->pdev->dev),
 			    dev_name(&pdev->dev))) {
 			list_del(&timer->node);
+			/*
+			 * Reset device usage counter if late_attach is still
+			 * set
+			 */
+			if (timer->late_attach)
+				pm_runtime_put_noidle(&timer->pdev->dev);
 			ret = 0;
 			break;
 		}
@@ -905,6 +976,7 @@ const static struct omap_dm_timer_ops dmtimer_ops = {
 	.free = omap_dm_timer_free,
 	.enable = omap_dm_timer_enable,
 	.disable = omap_dm_timer_disable,
+	.is_enabled = omap_dm_timer_is_enabled,
 	.get_fclk = omap_dm_timer_get_fclk,
 	.start = omap_dm_timer_start,
 	.stop = omap_dm_timer_stop,

+ 1 - 1
drivers/iommu/omap-iommu-debug.c

@@ -197,7 +197,7 @@ static void dump_ioptable(struct seq_file *s)
 			continue;
 		}
 
-		iopte = iopte_offset(iopgd, 0);
+		iopte = iopte_get(obj, iopgd, 0);
 		for (j = 0; j < PTRS_PER_IOPTE; j++, iopte++) {
 			if (!*iopte)
 				continue;

+ 77 - 12
drivers/iommu/omap-iommu.c

@@ -44,6 +44,12 @@ static const struct iommu_ops omap_iommu_ops;
 /* bitmap of the page sizes currently supported */
 #define OMAP_IOMMU_PGSIZES	(SZ_4K | SZ_64K | SZ_1M | SZ_16M)
 
+/*
+ * total size of L1 and L2 page tables reserved/used by bootloader per rproc
+ * for early boot usecases, must match the value used in bootloader
+ */
+#define EARLY_PAGE_TABLES_SIZE	SZ_256K
+
 #define MMU_LOCK_BASE_SHIFT	10
 #define MMU_LOCK_BASE_MASK	(0x1f << MMU_LOCK_BASE_SHIFT)
 #define MMU_LOCK_BASE(x)	\
@@ -163,7 +169,7 @@ static int omap2_iommu_enable(struct omap_iommu *obj)
 	if (!obj->iopgd || !IS_ALIGNED((u32)obj->iopgd,  SZ_16K))
 		return -EINVAL;
 
-	pa = virt_to_phys(obj->iopgd);
+	pa = obj->iopgd_pa;
 	if (!IS_ALIGNED(pa, SZ_16K))
 		return -EINVAL;
 
@@ -198,6 +204,15 @@ static int iommu_enable(struct omap_iommu *obj)
 {
 	int ret;
 
+	/*
+	 * now that the threat of idling has passed, decrement the
+	 * device usage count to balance the increment done in probe,
+	 * the pm runtime device usage count will be managed normally
+	 * from here on
+	 */
+	if (obj->late_attach)
+		pm_runtime_put_noidle(obj->dev);
+
 	ret = pm_runtime_get_sync(obj->dev);
 	if (ret < 0)
 		pm_runtime_put_noidle(obj->dev);
@@ -537,7 +552,7 @@ static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd,
 	}
 
 pte_ready:
-	iopte = iopte_offset(iopgd, da);
+	iopte = iopte_get(obj, iopgd, da);
 	*pt_dma = iopgd_page_paddr(iopgd);
 	dev_vdbg(obj->dev,
 		 "%s: da:%08x pgd:%p *pgd:%08x pte:%p *pte:%08x\n",
@@ -696,7 +711,7 @@ iopgtable_lookup_entry(struct omap_iommu *obj, u32 da, u32 **ppgd, u32 **ppte)
 		goto out;
 
 	if (iopgd_is_table(*iopgd))
-		iopte = iopte_offset(iopgd, da);
+		iopte = iopte_get(obj, iopgd, da);
 out:
 	*ppgd = iopgd;
 	*ppte = iopte;
@@ -716,13 +731,13 @@ static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da)
 
 	if (iopgd_is_table(*iopgd)) {
 		int i;
-		u32 *iopte = iopte_offset(iopgd, da);
+		u32 *iopte = iopte_get(obj, iopgd, da);
 
 		bytes = IOPTE_SIZE;
 		if (*iopte & IOPTE_LARGE) {
 			nent *= 16;
 			/* rewind to the 1st entry */
-			iopte = iopte_offset(iopgd, (da & IOLARGE_MASK));
+			iopte = iopte_get(obj, iopgd, (da & IOLARGE_MASK));
 		}
 		bytes *= nent;
 		memset(iopte, 0, nent * sizeof(*iopte));
@@ -732,7 +747,8 @@ static size_t iopgtable_clear_entry_core(struct omap_iommu *obj, u32 da)
 		/*
 		 * do table walk to check if this table is necessary or not
 		 */
-		iopte = iopte_offset(iopgd, 0);
+		iopte = iopte_get(obj, iopgd, 0);
+
 		for (i = 0; i < PTRS_PER_IOPTE; i++)
 			if (iopte[i])
 				goto out;
@@ -791,8 +807,15 @@ static void iopgtable_clear_entry_all(struct omap_iommu *obj)
 		if (!*iopgd)
 			continue;
 
-		if (iopgd_is_table(*iopgd))
-			iopte_free(obj, iopte_offset(iopgd, 0), true);
+		if (iopgd_is_table(*iopgd)) {
+			if (obj->late_attach)
+				iopte_free(obj, iopte_offset_lateattach(obj,
+									iopgd,
+									0),
+					   true);
+			else
+				iopte_free(obj, iopte_offset(iopgd, 0), true);
+		}
 
 		*iopgd = 0;
 		flush_iopte_range(obj->dev, obj->pd_dma, offset, 1);
@@ -835,7 +858,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data)
 		return IRQ_NONE;
 	}
 
-	iopte = iopte_offset(iopgd, da);
+	iopte = iopte_get(obj, iopgd, da);
 
 	dev_err(obj->dev, "%s: errs:0x%08x da:0x%08x pgd:0x%p *pgd:0x%08x pte:0x%p *pte:0x%08x\n",
 		obj->name, errs, da, iopgd, *iopgd, iopte, *iopte);
@@ -851,6 +874,16 @@ static irqreturn_t iommu_fault_handler(int irq, void *data)
 static int omap_iommu_attach(struct omap_iommu *obj, u32 *iopgd)
 {
 	int err;
+	u32 iopgd_pa;
+
+	if (obj->late_attach) {
+		iopgd_pa = iommu_read_reg(obj, MMU_TTB);
+		iopgd = ioremap(iopgd_pa, EARLY_PAGE_TABLES_SIZE);
+		if (!iopgd)
+			return -ENOMEM;
+	} else {
+		iopgd_pa = virt_to_phys(iopgd);
+	}
 
 	spin_lock(&obj->iommu_lock);
 
@@ -862,11 +895,14 @@ static int omap_iommu_attach(struct omap_iommu *obj, u32 *iopgd)
 		goto out_err;
 	}
 
+	obj->iopgd_pa = iopgd_pa;
 	obj->iopgd = iopgd;
 	err = iommu_enable(obj);
 	if (err)
 		goto out_err;
-	flush_iotlb_all(obj);
+
+	if (!obj->late_attach)
+		flush_iotlb_all(obj);
 
 	spin_unlock(&obj->iommu_lock);
 
@@ -889,13 +925,19 @@ static void omap_iommu_detach(struct omap_iommu *obj)
 	if (!obj || IS_ERR(obj))
 		return;
 
+	if (obj->late_attach && obj->iopgd)
+		iounmap(obj->iopgd);
+
 	spin_lock(&obj->iommu_lock);
 
 	dma_unmap_single(obj->dev, obj->pd_dma, IOPGD_TABLE_SIZE,
 			 DMA_TO_DEVICE);
 	obj->pd_dma = 0;
+
+	obj->iopgd_pa = 0;
 	obj->iopgd = NULL;
 	iommu_disable(obj);
+	obj->late_attach = 0;
 
 	spin_unlock(&obj->iommu_lock);
 
@@ -1069,7 +1111,9 @@ static int omap_iommu_runtime_resume(struct device *dev)
 		}
 	}
 
-	if (pdata && pdata->deassert_reset) {
+	/* do not deassert reset only during initial boot for late attach */
+	if ((!obj->late_attach || obj->domain) &&
+	    pdata && pdata->deassert_reset) {
 		ret = pdata->deassert_reset(pdev, pdata->reset_name);
 		if (ret) {
 			dev_err(dev, "deassert_reset failed: %d\n", ret);
@@ -1170,6 +1214,7 @@ static int omap_iommu_probe(struct platform_device *pdev)
 	struct omap_iommu *obj;
 	struct resource *res;
 	struct device_node *of = pdev->dev.of_node;
+	struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev);
 
 	if (!of) {
 		pr_err("%s: only DT-based devices are supported\n", __func__);
@@ -1192,6 +1237,7 @@ static int omap_iommu_probe(struct platform_device *pdev)
 	obj->name = dev_name(&pdev->dev);
 	obj->nr_tlb_entries = 32;
 	err = of_property_read_u32(of, "ti,#tlb-entries", &obj->nr_tlb_entries);
+
 	if (err && err != -EINVAL)
 		return err;
 	if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8)
@@ -1199,6 +1245,10 @@ static int omap_iommu_probe(struct platform_device *pdev)
 	if (of_find_property(of, "ti,iommu-bus-err-back", NULL))
 		obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN;
 
+	if (pdata && pdata->device_is_enabled &&
+	    pdata->device_is_enabled(pdev))
+		obj->late_attach = 1;
+
 	obj->dev = &pdev->dev;
 	obj->ctx = (void *)obj + sizeof(*obj);
 	obj->cr_ctx = devm_kzalloc(&pdev->dev,
@@ -1247,6 +1297,15 @@ static int omap_iommu_probe(struct platform_device *pdev)
 	}
 
 	pm_runtime_irq_safe(obj->dev);
+
+	/*
+	 * increment the device usage count so that runtime_suspend is not
+	 * invoked immediately after the probe (due to the ti,no-idle-on-init)
+	 * and before any remoteproc has attached to the iommu
+	 */
+	if (obj->late_attach)
+		pm_runtime_get_noresume(obj->dev);
+
 	pm_runtime_enable(obj->dev);
 
 	omap_iommu_debugfs_add(obj);
@@ -1428,6 +1487,11 @@ static int omap_iommu_attach_init(struct device *dev,
 
 	iommu = odomain->iommus;
 	for (i = 0; i < odomain->num_iommus; i++, iommu++) {
+		/*
+		 * not necessary for late attach, the page table would be setup
+		 * by the boot loader. Leaving the below code in place, it does
+		 * not have any side effects during late attach.
+		 */
 		iommu->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_ATOMIC);
 		if (!iommu->pgtable)
 			return -ENOMEM;
@@ -1549,7 +1613,8 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
 	arch_data += (omap_domain->num_iommus - 1);
 	for (i = 0; i < omap_domain->num_iommus; i++, iommu--, arch_data--) {
 		oiommu = iommu->iommu_dev;
-		iopgtable_clear_entry_all(oiommu);
+		if (!oiommu->late_attach)
+			iopgtable_clear_entry_all(oiommu);
 
 		omap_iommu_detach(oiommu);
 		iommu->iommu_dev = NULL;

+ 10 - 0
drivers/iommu/omap-iommu.h

@@ -69,6 +69,8 @@ struct omap_iommu {
 	 * but share it globally for each iommu.
 	 */
 	u32		*iopgd;
+	u32		iopgd_pa;
+	u32		late_attach;
 	spinlock_t	page_table_lock; /* protect iopgd */
 	dma_addr_t	pd_dma;
 
@@ -272,4 +274,12 @@ static inline int iotlb_cr_valid(struct cr_regs *cr)
 	return cr->cam & MMU_CAM_V;
 }
 
+static inline u32 *iopte_get(struct omap_iommu *obj, u32 *iopgd, u32 da)
+{
+	if (obj->late_attach)
+		return iopte_offset_lateattach(obj, iopgd, da);
+	else
+		return iopte_offset(iopgd, da);
+}
+
 #endif /* _OMAP_IOMMU_H */

+ 12 - 0
drivers/iommu/omap-iopgtable.h

@@ -99,4 +99,16 @@ static inline phys_addr_t omap_iommu_translate(u32 d, u32 va, u32 mask)
 #define iopte_index(da)		(((da) >> IOPTE_SHIFT) & (PTRS_PER_IOPTE - 1))
 #define iopte_offset(iopgd, da)	(iopgd_page_vaddr(iopgd) + iopte_index(da))
 
+/*
+ * compute vaddr for second-level page table relative to page table directory
+ * for late-attach mode
+ */
+#define iopgd_page_vaddr_lateattach(obj, pgd)				\
+	((u32 *)((u32 *)((obj)->iopgd)) +				\
+	((u32 *)iopgd_page_paddr((pgd)) - (u32 *)((obj)->iopgd_pa)))
+
+/* to find an entry in the second-level page table for late-attach mode */
+#define iopte_offset_lateattach(obj, iopgd, da)				\
+	(iopgd_page_vaddr_lateattach(obj, iopgd) + iopte_index(da))
+
 #endif /* _OMAP_IOPGTABLE_H */

+ 27 - 4
drivers/remoteproc/omap_remoteproc.c

@@ -573,6 +573,10 @@ static int omap_rproc_start(struct rproc *rproc)
 	int ret;
 	struct mbox_client *client = &oproc->client;
 
+	/*
+	 * We set boot address irrespective of the value of the late attach flag
+	 * as boot address takes effect only on a deassert of remoteproc reset.
+	 */
 	if (oproc->boot_data) {
 		ret = omap_rproc_write_dsp_boot_addr(rproc);
 		if (ret)
@@ -612,10 +616,12 @@ static int omap_rproc_start(struct rproc *rproc)
 		goto put_mbox;
 	}
 
-	ret = pdata->device_enable(pdev);
-	if (ret) {
-		dev_err(dev, "omap_device_enable failed: %d\n", ret);
-		goto reset_timers;
+	if (!rproc->late_attach) {
+		ret = pdata->device_enable(pdev);
+		if (ret) {
+			dev_err(dev, "omap_device_enable failed: %d\n", ret);
+			goto reset_timers;
+		}
 	}
 
 	/*
@@ -671,6 +677,16 @@ static int omap_rproc_stop(struct rproc *rproc)
 	if (ret)
 		goto enable_device;
 
+	/*
+	 * During late attach, we use non-zeroing dma ops to prevent the kernel
+	 * from overwriting already loaded code and data segments. When
+	 * shutting down the processor, we restore the normal zeroing dma ops.
+	 * This allows the kernel to clear memory when loading a new remoteproc
+	 * binary or during error recovery with the current remoteproc binary.
+	 */
+	if (rproc->late_attach)
+		set_dma_ops(dev, &arm_dma_ops);
+
 	mbox_free_channel(oproc->mbox);
 
 	/*
@@ -1310,6 +1326,11 @@ static int omap_rproc_probe(struct platform_device *pdev)
 	if (!rproc)
 		return -ENOMEM;
 
+	if (pdata->device_is_enabled && pdata->device_is_enabled(pdev)) {
+		rproc->late_attach = 1;
+		set_dma_ops(&pdev->dev, &arm_dma_m_ops);
+	}
+
 	oproc = rproc->priv;
 	oproc->rproc = rproc;
 	/* All existing OMAP IPU and DSP processors have an MMU */
@@ -1398,6 +1419,8 @@ static int omap_rproc_probe(struct platform_device *pdev)
 release_mem:
 	of_reserved_mem_device_release(&pdev->dev);
 free_rproc:
+	if (rproc->late_attach)
+		set_dma_ops(&pdev->dev, &arm_dma_ops);
 	rproc_free(rproc);
 	return ret;
 }

+ 46 - 20
drivers/remoteproc/remoteproc_core.c

@@ -742,10 +742,13 @@ static int rproc_handle_devmem(struct rproc *rproc, struct fw_rsc_devmem *rsc,
 	if (!mapping)
 		return -ENOMEM;
 
-	ret = iommu_map(rproc->domain, rsc->da, rsc->pa, rsc->len, rsc->flags);
-	if (ret) {
-		dev_err(dev, "failed to map devmem: %d\n", ret);
-		goto out;
+	if (!rproc->late_attach) {
+		ret = iommu_map(rproc->domain, rsc->da, rsc->pa, rsc->len,
+				rsc->flags);
+		if (ret) {
+			dev_err(dev, "failed to map devmem: %d\n", ret);
+			goto out;
+		}
 	}
 
 	/*
@@ -760,8 +763,12 @@ static int rproc_handle_devmem(struct rproc *rproc, struct fw_rsc_devmem *rsc,
 	mapping->len = rsc->len;
 	list_add_tail(&mapping->node, &rproc->mappings);
 
-	dev_dbg(dev, "mapped devmem pa 0x%x, da 0x%x, len 0x%x\n",
-		rsc->pa, rsc->da, rsc->len);
+	if (!rproc->late_attach)
+		dev_dbg(dev, "mapped devmem pa 0x%x, da 0x%x, len 0x%x\n",
+			rsc->pa, rsc->da, rsc->len);
+	else
+		dev_dbg(dev, "late-attach: processed devmem pa 0x%x, da 0x%x, len 0x%x\n",
+			rsc->pa, rsc->da, rsc->len);
 
 	return 0;
 
@@ -817,7 +824,13 @@ static int rproc_handle_carveout(struct rproc *rproc,
 	if (!carveout)
 		return -ENOMEM;
 
-	va = dma_alloc_coherent(dev->parent, rsc->len, &dma, GFP_KERNEL);
+	if (rproc->late_attach) {
+		va = dma_malloc_coherent(dev->parent, rsc->len, &dma,
+					 GFP_KERNEL);
+	} else {
+		va = dma_alloc_coherent(dev->parent, rsc->len, &dma,
+					GFP_KERNEL);
+	}
 	if (!va) {
 		dev_err(dev->parent,
 			"failed to allocate dma memory: len 0x%x\n", rsc->len);
@@ -852,11 +865,13 @@ static int rproc_handle_carveout(struct rproc *rproc,
 			goto dma_free;
 		}
 
-		ret = iommu_map(rproc->domain, rsc->da, dma, rsc->len,
-				rsc->flags);
-		if (ret) {
-			dev_err(dev, "iommu_map failed: %d\n", ret);
-			goto free_mapping;
+		if (!rproc->late_attach) {
+			ret = iommu_map(rproc->domain, rsc->da, dma, rsc->len,
+					rsc->flags);
+			if (ret) {
+				dev_err(dev, "iommu_map failed: %d\n", ret);
+				goto free_mapping;
+			}
 		}
 
 		/*
@@ -870,8 +885,13 @@ static int rproc_handle_carveout(struct rproc *rproc,
 		mapping->len = rsc->len;
 		list_add_tail(&mapping->node, &rproc->mappings);
 
-		dev_dbg(dev, "carveout mapped 0x%x to %pad\n",
-			rsc->da, &dma);
+		if (!rproc->late_attach)
+			dev_dbg(dev, "carveout mapped 0x%x to %pad\n",
+				rsc->da, &dma);
+		else
+			dev_dbg(dev, "late-attach: carveout processed 0x%x to %pad\n",
+				rsc->da, &dma);
+
 	}
 
 	/*
@@ -1145,11 +1165,14 @@ static void rproc_resource_cleanup(struct rproc *rproc)
 	list_for_each_entry_safe(entry, tmp, &rproc->mappings, node) {
 		size_t unmapped;
 
-		unmapped = iommu_unmap(rproc->domain, entry->da, entry->len);
-		if (unmapped != entry->len) {
-			/* nothing much to do besides complaining */
-			dev_err(dev, "failed to unmap %u/%zu\n", entry->len,
-				unmapped);
+		if (!rproc->late_attach) {
+			unmapped = iommu_unmap(rproc->domain, entry->da,
+					       entry->len);
+			if (unmapped != entry->len) {
+				/* nothing much to do besides complaining */
+				dev_err(dev, "failed to unmap %u/%zu\n",
+					entry->len, unmapped);
+			}
 		}
 
 		list_del(&entry->node);
@@ -1228,7 +1251,7 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 		goto clean_up_resources;
 	}
 
-	if (!rproc->skip_load) {
+	if (!rproc->skip_load && !rproc->late_attach) {
 		/* load the ELF segments to memory */
 		ret = rproc_load_segments(rproc, fw);
 		if (ret) {
@@ -1236,6 +1259,8 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 				ret);
 			goto clean_up_resources;
 		}
+	} else {
+		dev_dbg(dev, "Skipped program segments load for pre-booted rproc\n");
 	}
 
 	/*
@@ -1687,6 +1712,7 @@ void rproc_shutdown(struct rproc *rproc)
 		complete_all(&rproc->crash_comp);
 
 	rproc->state = RPROC_OFFLINE;
+	rproc->late_attach = 0;
 
 	dev_info(dev, "stopped remote processor %s\n", rproc->name);
 

+ 1 - 0
include/clocksource/timer-ti-dm.h

@@ -116,6 +116,7 @@ struct omap_dm_timer {
 	u32 errata;
 	struct platform_device *pdev;
 	struct list_head node;
+	u32 late_attach;
 };
 
 int omap_dm_timer_reserve_systimer(int id);

+ 20 - 6
include/linux/dma-mapping.h

@@ -161,7 +161,8 @@ static inline int is_device_dma_capable(struct device *dev)
  * Don't use them in device drivers.
  */
 int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
-				       dma_addr_t *dma_handle, void **ret);
+				dma_addr_t *dma_handle, void **ret,
+				bool zero);
 int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr);
 
 int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
@@ -173,7 +174,7 @@ int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr,
 				  size_t size, int *ret);
 
 #else
-#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0)
+#define dma_alloc_from_dev_coherent(dev, size, handle, ret, zero) (0)
 #define dma_release_from_dev_coherent(dev, order, vaddr) (0)
 #define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0)
 
@@ -505,9 +506,9 @@ dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr,
 #define arch_dma_alloc_attrs(dev)	(true)
 #endif
 
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
-				       dma_addr_t *dma_handle, gfp_t flag,
-				       unsigned long attrs)
+static inline void *dma_malloc_attrs(struct device *dev, size_t size,
+				     dma_addr_t *dma_handle, gfp_t flag,
+				     unsigned long attrs, bool zero)
 {
 	const struct dma_map_ops *ops = get_dma_ops(dev);
 	void *cpu_addr;
@@ -515,7 +516,7 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size,
 	BUG_ON(!ops);
 	WARN_ON_ONCE(dev && !dev->coherent_dma_mask);
 
-	if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
+	if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr, zero))
 		return cpu_addr;
 
 	/* let the implementation decide on the zone to allocate from: */
@@ -531,6 +532,13 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size,
 	return cpu_addr;
 }
 
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+				    dma_addr_t *dma_handle, gfp_t flag,
+				    unsigned long attrs)
+{
+	return dma_malloc_attrs(dev, size, dma_handle, flag, attrs, true);
+}
+
 static inline void dma_free_attrs(struct device *dev, size_t size,
 				     void *cpu_addr, dma_addr_t dma_handle,
 				     unsigned long attrs)
@@ -563,6 +571,12 @@ static inline void *dma_alloc_coherent(struct device *dev, size_t size,
 	return dma_alloc_attrs(dev, size, dma_handle, flag, 0);
 }
 
+static inline void *dma_malloc_coherent(struct device *dev, size_t size,
+					dma_addr_t *dma_handle, gfp_t flag)
+{
+	return dma_malloc_attrs(dev, size, dma_handle, flag, 0, false);
+}
+
 static inline void dma_free_coherent(struct device *dev, size_t size,
 		void *cpu_addr, dma_addr_t dma_handle)
 {

+ 1 - 0
include/linux/platform_data/dmtimer-omap.h

@@ -28,6 +28,7 @@ struct omap_dm_timer_ops {
 	int	(*free)(struct omap_dm_timer *timer);
 
 	void	(*enable)(struct omap_dm_timer *timer);
+	int	(*is_enabled)(struct omap_dm_timer *timer);
 	void	(*disable)(struct omap_dm_timer *timer);
 
 	int	(*get_irq)(struct omap_dm_timer *timer);

+ 1 - 0
include/linux/platform_data/iommu-omap.h

@@ -17,6 +17,7 @@ struct iommu_platform_data {
 	int (*assert_reset)(struct platform_device *pdev, const char *name);
 	int (*deassert_reset)(struct platform_device *pdev, const char *name);
 	int (*device_enable)(struct platform_device *pdev);
+	bool (*device_is_enabled)(struct platform_device *pdev);
 	int (*device_idle)(struct platform_device *pdev);
 	int (*set_pwrdm_constraint)(struct platform_device *pdev, bool request,
 				    u8 *pwrst);

+ 2 - 0
include/linux/platform_data/remoteproc-omap.h

@@ -15,10 +15,12 @@ struct platform_device;
  * struct omap_rproc_pdata - omap remoteproc's platform data
  * @device_enable: omap-specific handler for enabling a device
  * @device_shutdown: omap-specific handler for shutting down a device
+ * @device_is_enabled: omap-specific handler to check if device is booted
  */
 struct omap_rproc_pdata {
 	int (*device_enable)(struct platform_device *pdev);
 	int (*device_shutdown)(struct platform_device *pdev);
+	bool (*device_is_enabled)(struct platform_device *pdev);
 };
 
 #endif /* _PLAT_REMOTEPROC_H */

+ 2 - 0
include/linux/remoteproc.h

@@ -525,6 +525,7 @@ struct rproc_dump_segment {
  * @deny_sysfs_ops: flag to not permit sysfs operations on state and firmware
  * @skip_firmware_request: flag to skip requesting the firmware
  * @skip_load: flag to skip the loading of firmware segments
+ * @late_attach: flag indicating remote core has been externally pre-booted
  * @dump_segments: list of segments in the firmware
  */
 struct rproc {
@@ -563,6 +564,7 @@ struct rproc {
 	unsigned int deny_sysfs_ops		: 1;
 	unsigned int skip_firmware_request	: 1;
 	unsigned int skip_load			: 1;
+	unsigned int late_attach		: 1;
 	struct list_head dump_segments;
 };
 

+ 6 - 5
kernel/dma/coherent.c

@@ -161,7 +161,7 @@ void *dma_mark_declared_memory_occupied(struct device *dev,
 EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
 
 static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
-		ssize_t size, dma_addr_t *dma_handle)
+		ssize_t size, dma_addr_t *dma_handle, bool zero)
 {
 	int order = get_order(size);
 	unsigned long flags;
@@ -183,7 +183,8 @@ static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
 	*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
 	ret = mem->virt_base + (pageno << PAGE_SHIFT);
 	spin_unlock_irqrestore(&mem->spinlock, flags);
-	memset(ret, 0, size);
+	if (zero)
+		memset(ret, 0, size);
 	return ret;
 err:
 	spin_unlock_irqrestore(&mem->spinlock, flags);
@@ -205,14 +206,14 @@ err:
  * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
  */
 int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
-		dma_addr_t *dma_handle, void **ret)
+		dma_addr_t *dma_handle, void **ret, bool zero)
 {
 	struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
 
 	if (!mem)
 		return 0;
 
-	*ret = __dma_alloc_from_coherent(mem, size, dma_handle);
+	*ret = __dma_alloc_from_coherent(mem, size, dma_handle, zero);
 	if (*ret)
 		return 1;
 
@@ -231,7 +232,7 @@ void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle)
 		return NULL;
 
 	return __dma_alloc_from_coherent(dma_coherent_default_memory, size,
-			dma_handle);
+			dma_handle, true);
 }
 
 static int __dma_release_from_coherent(struct dma_coherent_mem *mem,