Ver código fonte

Merge branch 'for-linus' of git://git.infradead.org/users/vkoul/slave-dma

Pull dmaengine updates from Vinod Koul:
 "Main features this time are:

   - BAM v1.3.0 support form qcom bam dma
   - support for Allwinner sun8i dma
   - atmels eXtended DMA Controller driver
   - chancnt cleanup by Maxime
   - fixes spread over drivers"

* 'for-linus' of git://git.infradead.org/users/vkoul/slave-dma: (56 commits)
  dmaenegine: Delete a check before free_percpu()
  dmaengine: ioatdma: fix dma mapping errors
  dma: cppi41: add a delay while setting the TD bit
  dma: cppi41: wait longer for the HW to return the descriptor
  dmaengine: fsl-edma: fixup reg offset and hw S/G support in big-endian model
  dmaengine: fsl-edma: fix calculation of remaining bytes
  drivers/dma/pch_dma: declare pch_dma_id_table as static
  dmaengine: ste_dma40: fix error return code
  dma: imx-sdma: clarify about firmware not found error
  Documentation: devicetree: Fix Xilinx VDMA specification
  dmaengine: pl330: update author info
  dmaengine: clarify the issue_pending expectations
  dmaengine: at_xdmac: Add DMA_PRIVATE
  ARM: dts: at_xdmac: fix bad value of dma-cells in documentation
  dmaengine: at_xdmac: fix missing spin_unlock
  dmaengine: at_xdmac: fix a bug in transfer residue computation
  dmaengine: at_xdmac: fix software lockup at_xdmac_tx_status()
  dmaengine: at_xdmac: remove chancnt affectation
  dmaengine: at_xdmac: prefer usage of readl/writel_relaxed
  dmaengine: xdmac: fix print warning on dma_addr_t variable
  ...
Linus Torvalds 10 anos atrás
pai
commit
87c779baab
47 arquivos alterados com 2415 adições e 270 exclusões
  1. 54 0
      Documentation/devicetree/bindings/dma/atmel-xdma.txt
  2. 1 0
      Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
  3. 3 1
      Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
  4. 1 1
      Documentation/devicetree/bindings/dma/sun6i-dma.txt
  5. 0 0
      Documentation/dmaengine/client.txt
  6. 0 0
      Documentation/dmaengine/dmatest.txt
  7. 366 0
      Documentation/dmaengine/provider.txt
  8. 9 1
      MAINTAINERS
  9. 9 2
      drivers/dma/Kconfig
  10. 1 0
      drivers/dma/Makefile
  11. 0 2
      drivers/dma/amba-pl08x.c
  12. 1524 0
      drivers/dma/at_xdmac.c
  13. 0 3
      drivers/dma/bcm2835-dma.c
  14. 8 4
      drivers/dma/cppi41.c
  15. 1 3
      drivers/dma/dma-jz4740.c
  16. 1 2
      drivers/dma/dmaengine.c
  17. 97 94
      drivers/dma/fsl-edma.c
  18. 0 1
      drivers/dma/fsldma.c
  19. 4 2
      drivers/dma/imx-sdma.c
  20. 29 6
      drivers/dma/ioat/dma_v3.c
  21. 0 1
      drivers/dma/iop-adma.c
  22. 2 2
      drivers/dma/k3dma.c
  23. 0 1
      drivers/dma/mmp_pdma.c
  24. 0 1
      drivers/dma/mmp_tdma.c
  25. 7 6
      drivers/dma/mpc512x_dma.c
  26. 0 1
      drivers/dma/nbpfaxi.c
  27. 0 2
      drivers/dma/omap-dma.c
  28. 1 1
      drivers/dma/pch_dma.c
  29. 4 3
      drivers/dma/pl330.c
  30. 161 70
      drivers/dma/qcom_bam_dma.c
  31. 0 1
      drivers/dma/s3c24xx-dma.c
  32. 1 2
      drivers/dma/sa11x0-dma.c
  33. 0 3
      drivers/dma/sh/rcar-audmapp.c
  34. 0 3
      drivers/dma/sh/rcar-hpbdma.c
  35. 3 1
      drivers/dma/sh/shdma-base.c
  36. 0 1
      drivers/dma/sh/shdma-of.c
  37. 0 2
      drivers/dma/sh/shdmac.c
  38. 0 3
      drivers/dma/sh/sudmac.c
  39. 3 2
      drivers/dma/sirf-dma.c
  40. 1 0
      drivers/dma/ste_dma40.c
  41. 87 35
      drivers/dma/sun6i-dma.c
  42. 0 1
      drivers/dma/tegra20-apb-dma.c
  43. 0 1
      drivers/dma/timb_dma.c
  44. 9 4
      drivers/dma/xilinx/xilinx_vdma.c
  45. 25 0
      include/dt-bindings/dma/at91.h
  46. 2 1
      include/linux/dmaengine.h
  47. 1 0
      include/linux/platform_data/dma-imx.h

+ 54 - 0
Documentation/devicetree/bindings/dma/atmel-xdma.txt

@@ -0,0 +1,54 @@
+* Atmel Extensible Direct Memory Access Controller (XDMAC)
+
+* XDMA Controller
+Required properties:
+- compatible: Should be "atmel,<chip>-dma".
+  <chip> compatible description:
+  - sama5d4: first SoC adding the XDMAC
+- reg: Should contain DMA registers location and length.
+- interrupts: Should contain DMA interrupt.
+- #dma-cells: Must be <1>, used to represent the number of integer cells in
+the dmas property of client devices.
+  - The 1st cell specifies the channel configuration register:
+    - bit 13: SIF, source interface identifier, used to get the memory
+    interface identifier,
+    - bit 14: DIF, destination interface identifier, used to get the peripheral
+    interface identifier,
+    - bit 30-24: PERID, peripheral identifier.
+
+Example:
+
+dma1: dma-controller@f0004000 {
+	compatible = "atmel,sama5d4-dma";
+	reg = <0xf0004000 0x200>;
+	interrupts = <50 4 0>;
+	#dma-cells = <1>;
+};
+
+
+* DMA clients
+DMA clients connected to the Atmel XDMA controller must use the format
+described in the dma.txt file, using a one-cell specifier for each channel.
+The two cells in order are:
+1. A phandle pointing to the DMA controller.
+2. Channel configuration register. Configurable fields are:
+    - bit 13: SIF, source interface identifier, used to get the memory
+    interface identifier,
+    - bit 14: DIF, destination interface identifier, used to get the peripheral
+    interface identifier,
+  - bit 30-24: PERID, peripheral identifier.
+
+Example:
+
+i2c2: i2c@f8024000 {
+	compatible = "atmel,at91sam9x5-i2c";
+	reg = <0xf8024000 0x4000>;
+	interrupts = <34 4 6>;
+	dmas = <&dma1
+		(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+		 | AT91_XDMAC_DT_PERID(6))>,
+	       <&dma1
+		(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+		| AT91_XDMAC_DT_PERID(7))>;
+	dma-names = "tx", "rx";
+};

+ 1 - 0
Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt

@@ -48,6 +48,7 @@ The full ID of peripheral types can be found below.
 	21	ESAI
 	22	SSI Dual FIFO	(needs firmware ver >= 2)
 	23	Shared ASRC
+	24	SAI
 
 The third cell specifies the transfer priority as below.
 

+ 3 - 1
Documentation/devicetree/bindings/dma/qcom_bam_dma.txt

@@ -1,7 +1,9 @@
 QCOM BAM DMA controller
 
 Required properties:
-- compatible: must contain "qcom,bam-v1.4.0" for MSM8974
+- compatible: must be one of the following:
+ * "qcom,bam-v1.4.0" for MSM8974, APQ8074 and APQ8084
+ * "qcom,bam-v1.3.0" for APQ8064, IPQ8064 and MSM8960
 - reg: Address range for DMA registers
 - interrupts: Should contain the one interrupt shared by all channels
 - #dma-cells: must be <1>, the cell in the dmas property of the client device

+ 1 - 1
Documentation/devicetree/bindings/dma/sun6i-dma.txt

@@ -4,7 +4,7 @@ This driver follows the generic DMA bindings defined in dma.txt.
 
 Required properties:
 
-- compatible:	Must be "allwinner,sun6i-a31-dma"
+- compatible:	Must be "allwinner,sun6i-a31-dma" or "allwinner,sun8i-a23-dma"
 - reg:		Should contain the registers base address and length
 - interrupts:	Should contain a reference to the interrupt used by this device
 - clocks:	Should contain a reference to the parent AHB clock

+ 0 - 0
Documentation/dmaengine.txt → Documentation/dmaengine/client.txt


+ 0 - 0
Documentation/dmatest.txt → Documentation/dmaengine/dmatest.txt


+ 366 - 0
Documentation/dmaengine/provider.txt

@@ -0,0 +1,366 @@
+DMAengine controller documentation
+==================================
+
+Hardware Introduction
++++++++++++++++++++++
+
+Most of the Slave DMA controllers have the same general principles of
+operations.
+
+They have a given number of channels to use for the DMA transfers, and
+a given number of requests lines.
+
+Requests and channels are pretty much orthogonal. Channels can be used
+to serve several to any requests. To simplify, channels are the
+entities that will be doing the copy, and requests what endpoints are
+involved.
+
+The request lines actually correspond to physical lines going from the
+DMA-eligible devices to the controller itself. Whenever the device
+will want to start a transfer, it will assert a DMA request (DRQ) by
+asserting that request line.
+
+A very simple DMA controller would only take into account a single
+parameter: the transfer size. At each clock cycle, it would transfer a
+byte of data from one buffer to another, until the transfer size has
+been reached.
+
+That wouldn't work well in the real world, since slave devices might
+require a specific number of bits to be transferred in a single
+cycle. For example, we may want to transfer as much data as the
+physical bus allows to maximize performances when doing a simple
+memory copy operation, but our audio device could have a narrower FIFO
+that requires data to be written exactly 16 or 24 bits at a time. This
+is why most if not all of the DMA controllers can adjust this, using a
+parameter called the transfer width.
+
+Moreover, some DMA controllers, whenever the RAM is used as a source
+or destination, can group the reads or writes in memory into a buffer,
+so instead of having a lot of small memory accesses, which is not
+really efficient, you'll get several bigger transfers. This is done
+using a parameter called the burst size, that defines how many single
+reads/writes it's allowed to do without the controller splitting the
+transfer into smaller sub-transfers.
+
+Our theoretical DMA controller would then only be able to do transfers
+that involve a single contiguous block of data. However, some of the
+transfers we usually have are not, and want to copy data from
+non-contiguous buffers to a contiguous buffer, which is called
+scatter-gather.
+
+DMAEngine, at least for mem2dev transfers, require support for
+scatter-gather. So we're left with two cases here: either we have a
+quite simple DMA controller that doesn't support it, and we'll have to
+implement it in software, or we have a more advanced DMA controller,
+that implements in hardware scatter-gather.
+
+The latter are usually programmed using a collection of chunks to
+transfer, and whenever the transfer is started, the controller will go
+over that collection, doing whatever we programmed there.
+
+This collection is usually either a table or a linked list. You will
+then push either the address of the table and its number of elements,
+or the first item of the list to one channel of the DMA controller,
+and whenever a DRQ will be asserted, it will go through the collection
+to know where to fetch the data from.
+
+Either way, the format of this collection is completely dependent on
+your hardware. Each DMA controller will require a different structure,
+but all of them will require, for every chunk, at least the source and
+destination addresses, whether it should increment these addresses or
+not and the three parameters we saw earlier: the burst size, the
+transfer width and the transfer size.
+
+The one last thing is that usually, slave devices won't issue DRQ by
+default, and you have to enable this in your slave device driver first
+whenever you're willing to use DMA.
+
+These were just the general memory-to-memory (also called mem2mem) or
+memory-to-device (mem2dev) kind of transfers. Most devices often
+support other kind of transfers or memory operations that dmaengine
+support and will be detailed later in this document.
+
+DMA Support in Linux
+++++++++++++++++++++
+
+Historically, DMA controller drivers have been implemented using the
+async TX API, to offload operations such as memory copy, XOR,
+cryptography, etc., basically any memory to memory operation.
+
+Over time, the need for memory to device transfers arose, and
+dmaengine was extended. Nowadays, the async TX API is written as a
+layer on top of dmaengine, and acts as a client. Still, dmaengine
+accommodates that API in some cases, and made some design choices to
+ensure that it stayed compatible.
+
+For more information on the Async TX API, please look the relevant
+documentation file in Documentation/crypto/async-tx-api.txt.
+
+DMAEngine Registration
+++++++++++++++++++++++
+
+struct dma_device Initialization
+--------------------------------
+
+Just like any other kernel framework, the whole DMAEngine registration
+relies on the driver filling a structure and registering against the
+framework. In our case, that structure is dma_device.
+
+The first thing you need to do in your driver is to allocate this
+structure. Any of the usual memory allocators will do, but you'll also
+need to initialize a few fields in there:
+
+  * channels:	should be initialized as a list using the
+		INIT_LIST_HEAD macro for example
+
+  * dev: 	should hold the pointer to the struct device associated
+		to your current driver instance.
+
+Supported transaction types
+---------------------------
+
+The next thing you need is to set which transaction types your device
+(and driver) supports.
+
+Our dma_device structure has a field called cap_mask that holds the
+various types of transaction supported, and you need to modify this
+mask using the dma_cap_set function, with various flags depending on
+transaction types you support as an argument.
+
+All those capabilities are defined in the dma_transaction_type enum,
+in include/linux/dmaengine.h
+
+Currently, the types available are:
+  * DMA_MEMCPY
+    - The device is able to do memory to memory copies
+
+  * DMA_XOR
+    - The device is able to perform XOR operations on memory areas
+    - Used to accelerate XOR intensive tasks, such as RAID5
+
+  * DMA_XOR_VAL
+    - The device is able to perform parity check using the XOR
+      algorithm against a memory buffer.
+
+  * DMA_PQ
+    - The device is able to perform RAID6 P+Q computations, P being a
+      simple XOR, and Q being a Reed-Solomon algorithm.
+
+  * DMA_PQ_VAL
+    - The device is able to perform parity check using RAID6 P+Q
+      algorithm against a memory buffer.
+
+  * DMA_INTERRUPT
+    - The device is able to trigger a dummy transfer that will
+      generate periodic interrupts
+    - Used by the client drivers to register a callback that will be
+      called on a regular basis through the DMA controller interrupt
+
+  * DMA_SG
+    - The device supports memory to memory scatter-gather
+      transfers.
+    - Even though a plain memcpy can look like a particular case of a
+      scatter-gather transfer, with a single chunk to transfer, it's a
+      distinct transaction type in the mem2mem transfers case
+
+  * DMA_PRIVATE
+    - The devices only supports slave transfers, and as such isn't
+      available for async transfers.
+
+  * DMA_ASYNC_TX
+    - Must not be set by the device, and will be set by the framework
+      if needed
+    - /* TODO: What is it about? */
+
+  * DMA_SLAVE
+    - The device can handle device to memory transfers, including
+      scatter-gather transfers.
+    - While in the mem2mem case we were having two distinct types to
+      deal with a single chunk to copy or a collection of them, here,
+      we just have a single transaction type that is supposed to
+      handle both.
+    - If you want to transfer a single contiguous memory buffer,
+      simply build a scatter list with only one item.
+
+  * DMA_CYCLIC
+    - The device can handle cyclic transfers.
+    - A cyclic transfer is a transfer where the chunk collection will
+      loop over itself, with the last item pointing to the first.
+    - It's usually used for audio transfers, where you want to operate
+      on a single ring buffer that you will fill with your audio data.
+
+  * DMA_INTERLEAVE
+    - The device supports interleaved transfer.
+    - These transfers can transfer data from a non-contiguous buffer
+      to a non-contiguous buffer, opposed to DMA_SLAVE that can
+      transfer data from a non-contiguous data set to a continuous
+      destination buffer.
+    - It's usually used for 2d content transfers, in which case you
+      want to transfer a portion of uncompressed data directly to the
+      display to print it
+
+These various types will also affect how the source and destination
+addresses change over time.
+
+Addresses pointing to RAM are typically incremented (or decremented)
+after each transfer. In case of a ring buffer, they may loop
+(DMA_CYCLIC). Addresses pointing to a device's register (e.g. a FIFO)
+are typically fixed.
+
+Device operations
+-----------------
+
+Our dma_device structure also requires a few function pointers in
+order to implement the actual logic, now that we described what
+operations we were able to perform.
+
+The functions that we have to fill in there, and hence have to
+implement, obviously depend on the transaction types you reported as
+supported.
+
+   * device_alloc_chan_resources
+   * device_free_chan_resources
+     - These functions will be called whenever a driver will call
+       dma_request_channel or dma_release_channel for the first/last
+       time on the channel associated to that driver.
+     - They are in charge of allocating/freeing all the needed
+       resources in order for that channel to be useful for your
+       driver.
+     - These functions can sleep.
+
+   * device_prep_dma_*
+     - These functions are matching the capabilities you registered
+       previously.
+     - These functions all take the buffer or the scatterlist relevant
+       for the transfer being prepared, and should create a hardware
+       descriptor or a list of hardware descriptors from it
+     - These functions can be called from an interrupt context
+     - Any allocation you might do should be using the GFP_NOWAIT
+       flag, in order not to potentially sleep, but without depleting
+       the emergency pool either.
+     - Drivers should try to pre-allocate any memory they might need
+       during the transfer setup at probe time to avoid putting to
+       much pressure on the nowait allocator.
+
+     - It should return a unique instance of the
+       dma_async_tx_descriptor structure, that further represents this
+       particular transfer.
+
+     - This structure can be initialized using the function
+       dma_async_tx_descriptor_init.
+     - You'll also need to set two fields in this structure:
+       + flags:
+		TODO: Can it be modified by the driver itself, or
+		should it be always the flags passed in the arguments
+
+       + tx_submit:	A pointer to a function you have to implement,
+			that is supposed to push the current
+			transaction descriptor to a pending queue, waiting
+			for issue_pending to be called.
+
+   * device_issue_pending
+     - Takes the first transaction descriptor in the pending queue,
+       and starts the transfer. Whenever that transfer is done, it
+       should move to the next transaction in the list.
+     - This function can be called in an interrupt context
+
+   * device_tx_status
+     - Should report the bytes left to go over on the given channel
+     - Should only care about the transaction descriptor passed as
+       argument, not the currently active one on a given channel
+     - The tx_state argument might be NULL
+     - Should use dma_set_residue to report it
+     - In the case of a cyclic transfer, it should only take into
+       account the current period.
+     - This function can be called in an interrupt context.
+
+   * device_control
+     - Used by client drivers to control and configure the channel it
+       has a handle on.
+     - Called with a command and an argument
+       + The command is one of the values listed by the enum
+         dma_ctrl_cmd. The valid commands are:
+         + DMA_PAUSE
+           + Pauses a transfer on the channel
+           + This command should operate synchronously on the channel,
+             pausing right away the work of the given channel
+         + DMA_RESUME
+           + Restarts a transfer on the channel
+           + This command should operate synchronously on the channel,
+             resuming right away the work of the given channel
+         + DMA_TERMINATE_ALL
+           + Aborts all the pending and ongoing transfers on the
+             channel
+           + This command should operate synchronously on the channel,
+             terminating right away all the channels
+         + DMA_SLAVE_CONFIG
+           + Reconfigures the channel with passed configuration
+           + This command should NOT perform synchronously, or on any
+             currently queued transfers, but only on subsequent ones
+           + In this case, the function will receive a
+             dma_slave_config structure pointer as an argument, that
+             will detail which configuration to use.
+           + Even though that structure contains a direction field,
+             this field is deprecated in favor of the direction
+             argument given to the prep_* functions
+         + FSLDMA_EXTERNAL_START
+           + TODO: Why does that even exist?
+       + The argument is an opaque unsigned long. This actually is a
+         pointer to a struct dma_slave_config that should be used only
+         in the DMA_SLAVE_CONFIG.
+
+  * device_slave_caps
+    - Called through the framework by client drivers in order to have
+      an idea of what are the properties of the channel allocated to
+      them.
+    - Such properties are the buswidth, available directions, etc.
+    - Required for every generic layer doing DMA transfers, such as
+      ASoC.
+
+Misc notes (stuff that should be documented, but don't really know
+where to put them)
+------------------------------------------------------------------
+  * dma_run_dependencies
+    - Should be called at the end of an async TX transfer, and can be
+      ignored in the slave transfers case.
+    - Makes sure that dependent operations are run before marking it
+      as complete.
+
+  * dma_cookie_t
+    - it's a DMA transaction ID that will increment over time.
+    - Not really relevant any more since the introduction of virt-dma
+      that abstracts it away.
+
+  * DMA_CTRL_ACK
+    - Undocumented feature
+    - No one really has an idea of what it's about, besides being
+      related to reusing the DMA transaction descriptors or having
+      additional transactions added to it in the async-tx API
+    - Useless in the case of the slave API
+
+General Design Notes
+--------------------
+
+Most of the DMAEngine drivers you'll see are based on a similar design
+that handles the end of transfer interrupts in the handler, but defer
+most work to a tasklet, including the start of a new transfer whenever
+the previous transfer ended.
+
+This is a rather inefficient design though, because the inter-transfer
+latency will be not only the interrupt latency, but also the
+scheduling latency of the tasklet, which will leave the channel idle
+in between, which will slow down the global transfer rate.
+
+You should avoid this kind of practice, and instead of electing a new
+transfer in your tasklet, move that part to the interrupt handler in
+order to have a shorter idle window (that we can't really avoid
+anyway).
+
+Glossary
+--------
+
+Burst: 		A number of consecutive read or write operations
+		that can be queued to buffers before being flushed to
+		memory.
+Chunk:		A contiguous collection of bursts
+Transfer:	A collection of chunks (be it contiguous or not)

+ 9 - 1
MAINTAINERS

@@ -1722,6 +1722,13 @@ F:	drivers/dma/at_hdmac.c
 F:	drivers/dma/at_hdmac_regs.h
 F:	include/linux/platform_data/dma-atmel.h
 
+ATMEL XDMA DRIVER
+M:	Ludovic Desroches <ludovic.desroches@atmel.com>
+L:	linux-arm-kernel@lists.infradead.org
+L:	dmaengine@vger.kernel.org
+S:	Supported
+F:	drivers/dma/at_xdmac.c
+
 ATMEL I2C DRIVER
 M:	Ludovic Desroches <ludovic.desroches@atmel.com>
 L:	linux-i2c@vger.kernel.org
@@ -3162,7 +3169,8 @@ Q:	https://patchwork.kernel.org/project/linux-dmaengine/list/
 S:	Maintained
 F:	drivers/dma/
 F:	include/linux/dma*
-T:	git git://git.infradead.org/users/vkoul/slave-dma.git (slave-dma)
+F:	Documentation/dmaengine/
+T:	git git://git.infradead.org/users/vkoul/slave-dma.git
 
 DME1737 HARDWARE MONITOR DRIVER
 M:	Juerg Haefliger <juergh@gmail.com>

+ 9 - 2
drivers/dma/Kconfig

@@ -107,6 +107,13 @@ config AT_HDMAC
 	help
 	  Support the Atmel AHB DMA controller.
 
+config AT_XDMAC
+	tristate "Atmel XDMA support"
+	depends on ARCH_AT91
+	select DMA_ENGINE
+	help
+	  Support the Atmel XDMA controller.
+
 config FSL_DMA
 	tristate "Freescale Elo series DMA support"
 	depends on FSL_SOC
@@ -395,12 +402,12 @@ config XILINX_VDMA
 
 config DMA_SUN6I
 	tristate "Allwinner A31 SoCs DMA support"
-	depends on MACH_SUN6I || COMPILE_TEST
+	depends on MACH_SUN6I || MACH_SUN8I || COMPILE_TEST
 	depends on RESET_CONTROLLER
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 	help
-	  Support for the DMA engine for Allwinner A31 SoCs.
+	  Support for the DMA engine first found in Allwinner A31 SoCs.
 
 config NBPFAXI_DMA
 	tristate "Renesas Type-AXI NBPF DMA support"

+ 1 - 0
drivers/dma/Makefile

@@ -16,6 +16,7 @@ obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/
 obj-$(CONFIG_MV_XOR) += mv_xor.o
 obj-$(CONFIG_DW_DMAC_CORE) += dw/
 obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
+obj-$(CONFIG_AT_XDMAC) += at_xdmac.o
 obj-$(CONFIG_MX3_IPU) += ipu/
 obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
 obj-$(CONFIG_SH_DMAE_BASE) += sh/

+ 0 - 2
drivers/dma/amba-pl08x.c

@@ -2164,7 +2164,6 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 			 __func__, ret);
 		goto out_no_memcpy;
 	}
-	pl08x->memcpy.chancnt = ret;
 
 	/* Register slave channels */
 	ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave,
@@ -2175,7 +2174,6 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 				__func__, ret);
 		goto out_no_slave;
 	}
-	pl08x->slave.chancnt = ret;
 
 	ret = dma_async_device_register(&pl08x->memcpy);
 	if (ret) {

+ 1524 - 0
drivers/dma/at_xdmac.c

@@ -0,0 +1,1524 @@
+/*
+ * Driver for the Atmel Extensible DMA Controller (aka XDMAC on AT91 systems)
+ *
+ * Copyright (C) 2014 Atmel Corporation
+ *
+ * Author: Ludovic Desroches <ludovic.desroches@atmel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/barrier.h>
+#include <dt-bindings/dma/at91.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+
+#include "dmaengine.h"
+
+/* Global registers */
+#define AT_XDMAC_GTYPE		0x00	/* Global Type Register */
+#define		AT_XDMAC_NB_CH(i)	(((i) & 0x1F) + 1)		/* Number of Channels Minus One */
+#define		AT_XDMAC_FIFO_SZ(i)	(((i) >> 5) & 0x7FF)		/* Number of Bytes */
+#define		AT_XDMAC_NB_REQ(i)	((((i) >> 16) & 0x3F) + 1)	/* Number of Peripheral Requests Minus One */
+#define AT_XDMAC_GCFG		0x04	/* Global Configuration Register */
+#define AT_XDMAC_GWAC		0x08	/* Global Weighted Arbiter Configuration Register */
+#define AT_XDMAC_GIE		0x0C	/* Global Interrupt Enable Register */
+#define AT_XDMAC_GID		0x10	/* Global Interrupt Disable Register */
+#define AT_XDMAC_GIM		0x14	/* Global Interrupt Mask Register */
+#define AT_XDMAC_GIS		0x18	/* Global Interrupt Status Register */
+#define AT_XDMAC_GE		0x1C	/* Global Channel Enable Register */
+#define AT_XDMAC_GD		0x20	/* Global Channel Disable Register */
+#define AT_XDMAC_GS		0x24	/* Global Channel Status Register */
+#define AT_XDMAC_GRS		0x28	/* Global Channel Read Suspend Register */
+#define AT_XDMAC_GWS		0x2C	/* Global Write Suspend Register */
+#define AT_XDMAC_GRWS		0x30	/* Global Channel Read Write Suspend Register */
+#define AT_XDMAC_GRWR		0x34	/* Global Channel Read Write Resume Register */
+#define AT_XDMAC_GSWR		0x38	/* Global Channel Software Request Register */
+#define AT_XDMAC_GSWS		0x3C	/* Global channel Software Request Status Register */
+#define AT_XDMAC_GSWF		0x40	/* Global Channel Software Flush Request Register */
+#define AT_XDMAC_VERSION	0xFFC	/* XDMAC Version Register */
+
+/* Channel relative registers offsets */
+#define AT_XDMAC_CIE		0x00	/* Channel Interrupt Enable Register */
+#define		AT_XDMAC_CIE_BIE	BIT(0)	/* End of Block Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_LIE	BIT(1)	/* End of Linked List Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_DIE	BIT(2)	/* End of Disable Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_FIE	BIT(3)	/* End of Flush Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_RBEIE	BIT(4)	/* Read Bus Error Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_WBEIE	BIT(5)	/* Write Bus Error Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_ROIE	BIT(6)	/* Request Overflow Interrupt Enable Bit */
+#define AT_XDMAC_CID		0x04	/* Channel Interrupt Disable Register */
+#define		AT_XDMAC_CID_BID	BIT(0)	/* End of Block Interrupt Disable Bit */
+#define		AT_XDMAC_CID_LID	BIT(1)	/* End of Linked List Interrupt Disable Bit */
+#define		AT_XDMAC_CID_DID	BIT(2)	/* End of Disable Interrupt Disable Bit */
+#define		AT_XDMAC_CID_FID	BIT(3)	/* End of Flush Interrupt Disable Bit */
+#define		AT_XDMAC_CID_RBEID	BIT(4)	/* Read Bus Error Interrupt Disable Bit */
+#define		AT_XDMAC_CID_WBEID	BIT(5)	/* Write Bus Error Interrupt Disable Bit */
+#define		AT_XDMAC_CID_ROID	BIT(6)	/* Request Overflow Interrupt Disable Bit */
+#define AT_XDMAC_CIM		0x08	/* Channel Interrupt Mask Register */
+#define		AT_XDMAC_CIM_BIM	BIT(0)	/* End of Block Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_LIM	BIT(1)	/* End of Linked List Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_DIM	BIT(2)	/* End of Disable Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_FIM	BIT(3)	/* End of Flush Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_RBEIM	BIT(4)	/* Read Bus Error Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_WBEIM	BIT(5)	/* Write Bus Error Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_ROIM	BIT(6)	/* Request Overflow Interrupt Mask Bit */
+#define AT_XDMAC_CIS		0x0C	/* Channel Interrupt Status Register */
+#define		AT_XDMAC_CIS_BIS	BIT(0)	/* End of Block Interrupt Status Bit */
+#define		AT_XDMAC_CIS_LIS	BIT(1)	/* End of Linked List Interrupt Status Bit */
+#define		AT_XDMAC_CIS_DIS	BIT(2)	/* End of Disable Interrupt Status Bit */
+#define		AT_XDMAC_CIS_FIS	BIT(3)	/* End of Flush Interrupt Status Bit */
+#define		AT_XDMAC_CIS_RBEIS	BIT(4)	/* Read Bus Error Interrupt Status Bit */
+#define		AT_XDMAC_CIS_WBEIS	BIT(5)	/* Write Bus Error Interrupt Status Bit */
+#define		AT_XDMAC_CIS_ROIS	BIT(6)	/* Request Overflow Interrupt Status Bit */
+#define AT_XDMAC_CSA		0x10	/* Channel Source Address Register */
+#define AT_XDMAC_CDA		0x14	/* Channel Destination Address Register */
+#define AT_XDMAC_CNDA		0x18	/* Channel Next Descriptor Address Register */
+#define		AT_XDMAC_CNDA_NDAIF(i)	((i) & 0x1)			/* Channel x Next Descriptor Interface */
+#define		AT_XDMAC_CNDA_NDA(i)	((i) & 0xfffffffc)		/* Channel x Next Descriptor Address */
+#define AT_XDMAC_CNDC		0x1C	/* Channel Next Descriptor Control Register */
+#define		AT_XDMAC_CNDC_NDE		(0x1 << 0)		/* Channel x Next Descriptor Enable */
+#define		AT_XDMAC_CNDC_NDSUP		(0x1 << 1)		/* Channel x Next Descriptor Source Update */
+#define		AT_XDMAC_CNDC_NDDUP		(0x1 << 2)		/* Channel x Next Descriptor Destination Update */
+#define		AT_XDMAC_CNDC_NDVIEW_NDV0	(0x0 << 3)		/* Channel x Next Descriptor View 0 */
+#define		AT_XDMAC_CNDC_NDVIEW_NDV1	(0x1 << 3)		/* Channel x Next Descriptor View 1 */
+#define		AT_XDMAC_CNDC_NDVIEW_NDV2	(0x2 << 3)		/* Channel x Next Descriptor View 2 */
+#define		AT_XDMAC_CNDC_NDVIEW_NDV3	(0x3 << 3)		/* Channel x Next Descriptor View 3 */
+#define AT_XDMAC_CUBC		0x20	/* Channel Microblock Control Register */
+#define AT_XDMAC_CBC		0x24	/* Channel Block Control Register */
+#define AT_XDMAC_CC		0x28	/* Channel Configuration Register */
+#define		AT_XDMAC_CC_TYPE	(0x1 << 0)	/* Channel Transfer Type */
+#define			AT_XDMAC_CC_TYPE_MEM_TRAN	(0x0 << 0)	/* Memory to Memory Transfer */
+#define			AT_XDMAC_CC_TYPE_PER_TRAN	(0x1 << 0)	/* Peripheral to Memory or Memory to Peripheral Transfer */
+#define		AT_XDMAC_CC_MBSIZE_MASK	(0x3 << 1)
+#define			AT_XDMAC_CC_MBSIZE_SINGLE	(0x0 << 1)
+#define			AT_XDMAC_CC_MBSIZE_FOUR		(0x1 << 1)
+#define			AT_XDMAC_CC_MBSIZE_EIGHT	(0x2 << 1)
+#define			AT_XDMAC_CC_MBSIZE_SIXTEEN	(0x3 << 1)
+#define		AT_XDMAC_CC_DSYNC	(0x1 << 4)	/* Channel Synchronization */
+#define			AT_XDMAC_CC_DSYNC_PER2MEM	(0x0 << 4)
+#define			AT_XDMAC_CC_DSYNC_MEM2PER	(0x1 << 4)
+#define		AT_XDMAC_CC_PROT	(0x1 << 5)	/* Channel Protection */
+#define			AT_XDMAC_CC_PROT_SEC		(0x0 << 5)
+#define			AT_XDMAC_CC_PROT_UNSEC		(0x1 << 5)
+#define		AT_XDMAC_CC_SWREQ	(0x1 << 6)	/* Channel Software Request Trigger */
+#define			AT_XDMAC_CC_SWREQ_HWR_CONNECTED	(0x0 << 6)
+#define			AT_XDMAC_CC_SWREQ_SWR_CONNECTED	(0x1 << 6)
+#define		AT_XDMAC_CC_MEMSET	(0x1 << 7)	/* Channel Fill Block of memory */
+#define			AT_XDMAC_CC_MEMSET_NORMAL_MODE	(0x0 << 7)
+#define			AT_XDMAC_CC_MEMSET_HW_MODE	(0x1 << 7)
+#define		AT_XDMAC_CC_CSIZE(i)	((0x7 & (i)) << 8)	/* Channel Chunk Size */
+#define		AT_XDMAC_CC_DWIDTH_OFFSET	11
+#define		AT_XDMAC_CC_DWIDTH_MASK	(0x3 << AT_XDMAC_CC_DWIDTH_OFFSET)
+#define		AT_XDMAC_CC_DWIDTH(i)	((0x3 & (i)) << AT_XDMAC_CC_DWIDTH_OFFSET)	/* Channel Data Width */
+#define			AT_XDMAC_CC_DWIDTH_BYTE		0x0
+#define			AT_XDMAC_CC_DWIDTH_HALFWORD	0x1
+#define			AT_XDMAC_CC_DWIDTH_WORD		0x2
+#define			AT_XDMAC_CC_DWIDTH_DWORD	0x3
+#define		AT_XDMAC_CC_SIF(i)	((0x1 & (i)) << 13)	/* Channel Source Interface Identifier */
+#define		AT_XDMAC_CC_DIF(i)	((0x1 & (i)) << 14)	/* Channel Destination Interface Identifier */
+#define		AT_XDMAC_CC_SAM_MASK	(0x3 << 16)	/* Channel Source Addressing Mode */
+#define			AT_XDMAC_CC_SAM_FIXED_AM	(0x0 << 16)
+#define			AT_XDMAC_CC_SAM_INCREMENTED_AM	(0x1 << 16)
+#define			AT_XDMAC_CC_SAM_UBS_AM		(0x2 << 16)
+#define			AT_XDMAC_CC_SAM_UBS_DS_AM	(0x3 << 16)
+#define		AT_XDMAC_CC_DAM_MASK	(0x3 << 18)	/* Channel Source Addressing Mode */
+#define			AT_XDMAC_CC_DAM_FIXED_AM	(0x0 << 18)
+#define			AT_XDMAC_CC_DAM_INCREMENTED_AM	(0x1 << 18)
+#define			AT_XDMAC_CC_DAM_UBS_AM		(0x2 << 18)
+#define			AT_XDMAC_CC_DAM_UBS_DS_AM	(0x3 << 18)
+#define		AT_XDMAC_CC_INITD	(0x1 << 21)	/* Channel Initialization Terminated (read only) */
+#define			AT_XDMAC_CC_INITD_TERMINATED	(0x0 << 21)
+#define			AT_XDMAC_CC_INITD_IN_PROGRESS	(0x1 << 21)
+#define		AT_XDMAC_CC_RDIP	(0x1 << 22)	/* Read in Progress (read only) */
+#define			AT_XDMAC_CC_RDIP_DONE		(0x0 << 22)
+#define			AT_XDMAC_CC_RDIP_IN_PROGRESS	(0x1 << 22)
+#define		AT_XDMAC_CC_WRIP	(0x1 << 23)	/* Write in Progress (read only) */
+#define			AT_XDMAC_CC_WRIP_DONE		(0x0 << 23)
+#define			AT_XDMAC_CC_WRIP_IN_PROGRESS	(0x1 << 23)
+#define		AT_XDMAC_CC_PERID(i)	(0x7f & (h) << 24)	/* Channel Peripheral Identifier */
+#define AT_XDMAC_CDS_MSP	0x2C	/* Channel Data Stride Memory Set Pattern */
+#define AT_XDMAC_CSUS		0x30	/* Channel Source Microblock Stride */
+#define AT_XDMAC_CDUS		0x34	/* Channel Destination Microblock Stride */
+
+#define AT_XDMAC_CHAN_REG_BASE	0x50	/* Channel registers base address */
+
+/* Microblock control members */
+#define AT_XDMAC_MBR_UBC_UBLEN_MAX	0xFFFFFFUL	/* Maximum Microblock Length */
+#define AT_XDMAC_MBR_UBC_NDE		(0x1 << 24)	/* Next Descriptor Enable */
+#define AT_XDMAC_MBR_UBC_NSEN		(0x1 << 25)	/* Next Descriptor Source Update */
+#define AT_XDMAC_MBR_UBC_NDEN		(0x1 << 26)	/* Next Descriptor Destination Update */
+#define AT_XDMAC_MBR_UBC_NDV0		(0x0 << 27)	/* Next Descriptor View 0 */
+#define AT_XDMAC_MBR_UBC_NDV1		(0x1 << 27)	/* Next Descriptor View 1 */
+#define AT_XDMAC_MBR_UBC_NDV2		(0x2 << 27)	/* Next Descriptor View 2 */
+#define AT_XDMAC_MBR_UBC_NDV3		(0x3 << 27)	/* Next Descriptor View 3 */
+
+#define AT_XDMAC_MAX_CHAN	0x20
+
+enum atc_status {
+	AT_XDMAC_CHAN_IS_CYCLIC = 0,
+	AT_XDMAC_CHAN_IS_PAUSED,
+};
+
+/* ----- Channels ----- */
+struct at_xdmac_chan {
+	struct dma_chan			chan;
+	void __iomem			*ch_regs;
+	u32				mask;		/* Channel Mask */
+	u32				cfg[3];		/* Channel Configuration Register */
+	#define	AT_XDMAC_CUR_CFG	0		/* Current channel conf */
+	#define	AT_XDMAC_DEV_TO_MEM_CFG	1		/* Predifined dev to mem channel conf */
+	#define	AT_XDMAC_MEM_TO_DEV_CFG	2		/* Predifined mem to dev channel conf */
+	u8				perid;		/* Peripheral ID */
+	u8				perif;		/* Peripheral Interface */
+	u8				memif;		/* Memory Interface */
+	u32				per_src_addr;
+	u32				per_dst_addr;
+	u32				save_cim;
+	u32				save_cnda;
+	u32				save_cndc;
+	unsigned long			status;
+	struct tasklet_struct		tasklet;
+
+	spinlock_t			lock;
+
+	struct list_head		xfers_list;
+	struct list_head		free_descs_list;
+};
+
+
+/* ----- Controller ----- */
+struct at_xdmac {
+	struct dma_device	dma;
+	void __iomem		*regs;
+	int			irq;
+	struct clk		*clk;
+	u32			save_gim;
+	u32			save_gs;
+	struct dma_pool		*at_xdmac_desc_pool;
+	struct at_xdmac_chan	chan[0];
+};
+
+
+/* ----- Descriptors ----- */
+
+/* Linked List Descriptor */
+struct at_xdmac_lld {
+	dma_addr_t	mbr_nda;	/* Next Descriptor Member */
+	u32		mbr_ubc;	/* Microblock Control Member */
+	dma_addr_t	mbr_sa;		/* Source Address Member */
+	dma_addr_t	mbr_da;		/* Destination Address Member */
+	u32		mbr_cfg;	/* Configuration Register */
+};
+
+
+struct at_xdmac_desc {
+	struct at_xdmac_lld		lld;
+	enum dma_transfer_direction	direction;
+	struct dma_async_tx_descriptor	tx_dma_desc;
+	struct list_head		desc_node;
+	/* Following members are only used by the first descriptor */
+	bool				active_xfer;
+	unsigned int			xfer_size;
+	struct list_head		descs_list;
+	struct list_head		xfer_node;
+};
+
+static inline void __iomem *at_xdmac_chan_reg_base(struct at_xdmac *atxdmac, unsigned int chan_nb)
+{
+	return atxdmac->regs + (AT_XDMAC_CHAN_REG_BASE + chan_nb * 0x40);
+}
+
+#define at_xdmac_read(atxdmac, reg) readl_relaxed((atxdmac)->regs + (reg))
+#define at_xdmac_write(atxdmac, reg, value) \
+	writel_relaxed((value), (atxdmac)->regs + (reg))
+
+#define at_xdmac_chan_read(atchan, reg) readl_relaxed((atchan)->ch_regs + (reg))
+#define at_xdmac_chan_write(atchan, reg, value) writel_relaxed((value), (atchan)->ch_regs + (reg))
+
+static inline struct at_xdmac_chan *to_at_xdmac_chan(struct dma_chan *dchan)
+{
+	return container_of(dchan, struct at_xdmac_chan, chan);
+}
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+static inline struct at_xdmac *to_at_xdmac(struct dma_device *ddev)
+{
+	return container_of(ddev, struct at_xdmac, dma);
+}
+
+static inline struct at_xdmac_desc *txd_to_at_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct at_xdmac_desc, tx_dma_desc);
+}
+
+static inline int at_xdmac_chan_is_cyclic(struct at_xdmac_chan *atchan)
+{
+	return test_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
+}
+
+static inline int at_xdmac_chan_is_paused(struct at_xdmac_chan *atchan)
+{
+	return test_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
+}
+
+static inline int at_xdmac_csize(u32 maxburst)
+{
+	int csize;
+
+	csize = ffs(maxburst) - 1;
+	if (csize > 4)
+		csize = -EINVAL;
+
+	return csize;
+};
+
+static inline u8 at_xdmac_get_dwidth(u32 cfg)
+{
+	return (cfg & AT_XDMAC_CC_DWIDTH_MASK) >> AT_XDMAC_CC_DWIDTH_OFFSET;
+};
+
+static unsigned int init_nr_desc_per_channel = 64;
+module_param(init_nr_desc_per_channel, uint, 0644);
+MODULE_PARM_DESC(init_nr_desc_per_channel,
+		 "initial descriptors per channel (default: 64)");
+
+
+static bool at_xdmac_chan_is_enabled(struct at_xdmac_chan *atchan)
+{
+	return at_xdmac_chan_read(atchan, AT_XDMAC_GS) & atchan->mask;
+}
+
+static void at_xdmac_off(struct at_xdmac *atxdmac)
+{
+	at_xdmac_write(atxdmac, AT_XDMAC_GD, -1L);
+
+	/* Wait that all chans are disabled. */
+	while (at_xdmac_read(atxdmac, AT_XDMAC_GS))
+		cpu_relax();
+
+	at_xdmac_write(atxdmac, AT_XDMAC_GID, -1L);
+}
+
+/* Call with lock hold. */
+static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan,
+				struct at_xdmac_desc *first)
+{
+	struct at_xdmac	*atxdmac = to_at_xdmac(atchan->chan.device);
+	u32		reg;
+
+	dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, first);
+
+	if (at_xdmac_chan_is_enabled(atchan))
+		return;
+
+	/* Set transfer as active to not try to start it again. */
+	first->active_xfer = true;
+
+	/* Tell xdmac where to get the first descriptor. */
+	reg = AT_XDMAC_CNDA_NDA(first->tx_dma_desc.phys)
+	      | AT_XDMAC_CNDA_NDAIF(atchan->memif);
+	at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, reg);
+
+	/*
+	 * When doing memory to memory transfer we need to use the next
+	 * descriptor view 2 since some fields of the configuration register
+	 * depend on transfer size and src/dest addresses.
+	 */
+	if (is_slave_direction(first->direction)) {
+		reg = AT_XDMAC_CNDC_NDVIEW_NDV1;
+		if (first->direction == DMA_MEM_TO_DEV)
+			atchan->cfg[AT_XDMAC_CUR_CFG] =
+				atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG];
+		else
+			atchan->cfg[AT_XDMAC_CUR_CFG] =
+				atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG];
+		at_xdmac_chan_write(atchan, AT_XDMAC_CC,
+				    atchan->cfg[AT_XDMAC_CUR_CFG]);
+	} else {
+		/*
+		 * No need to write AT_XDMAC_CC reg, it will be done when the
+		 * descriptor is fecthed.
+		 */
+		reg = AT_XDMAC_CNDC_NDVIEW_NDV2;
+	}
+
+	reg |= AT_XDMAC_CNDC_NDDUP
+	       | AT_XDMAC_CNDC_NDSUP
+	       | AT_XDMAC_CNDC_NDE;
+	at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, reg);
+
+	dev_vdbg(chan2dev(&atchan->chan),
+		 "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
+		 __func__, at_xdmac_chan_read(atchan, AT_XDMAC_CC),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
+
+	at_xdmac_chan_write(atchan, AT_XDMAC_CID, 0xffffffff);
+	reg = AT_XDMAC_CIE_RBEIE | AT_XDMAC_CIE_WBEIE | AT_XDMAC_CIE_ROIE;
+	/*
+	 * There is no end of list when doing cyclic dma, we need to get
+	 * an interrupt after each periods.
+	 */
+	if (at_xdmac_chan_is_cyclic(atchan))
+		at_xdmac_chan_write(atchan, AT_XDMAC_CIE,
+				    reg | AT_XDMAC_CIE_BIE);
+	else
+		at_xdmac_chan_write(atchan, AT_XDMAC_CIE,
+				    reg | AT_XDMAC_CIE_LIE);
+	at_xdmac_write(atxdmac, AT_XDMAC_GIE, atchan->mask);
+	dev_vdbg(chan2dev(&atchan->chan),
+		 "%s: enable channel (0x%08x)\n", __func__, atchan->mask);
+	wmb();
+	at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask);
+
+	dev_vdbg(chan2dev(&atchan->chan),
+		 "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
+		 __func__, at_xdmac_chan_read(atchan, AT_XDMAC_CC),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
+
+}
+
+static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct at_xdmac_desc	*desc = txd_to_at_desc(tx);
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(tx->chan);
+	dma_cookie_t		cookie;
+
+	spin_lock_bh(&atchan->lock);
+	cookie = dma_cookie_assign(tx);
+
+	dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n",
+		 __func__, atchan, desc);
+	list_add_tail(&desc->xfer_node, &atchan->xfers_list);
+	if (list_is_singular(&atchan->xfers_list))
+		at_xdmac_start_xfer(atchan, desc);
+
+	spin_unlock_bh(&atchan->lock);
+	return cookie;
+}
+
+static struct at_xdmac_desc *at_xdmac_alloc_desc(struct dma_chan *chan,
+						 gfp_t gfp_flags)
+{
+	struct at_xdmac_desc	*desc;
+	struct at_xdmac		*atxdmac = to_at_xdmac(chan->device);
+	dma_addr_t		phys;
+
+	desc = dma_pool_alloc(atxdmac->at_xdmac_desc_pool, gfp_flags, &phys);
+	if (desc) {
+		memset(desc, 0, sizeof(*desc));
+		INIT_LIST_HEAD(&desc->descs_list);
+		dma_async_tx_descriptor_init(&desc->tx_dma_desc, chan);
+		desc->tx_dma_desc.tx_submit = at_xdmac_tx_submit;
+		desc->tx_dma_desc.phys = phys;
+	}
+
+	return desc;
+}
+
+/* Call must be protected by lock. */
+static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan)
+{
+	struct at_xdmac_desc *desc;
+
+	if (list_empty(&atchan->free_descs_list)) {
+		desc = at_xdmac_alloc_desc(&atchan->chan, GFP_NOWAIT);
+	} else {
+		desc = list_first_entry(&atchan->free_descs_list,
+					struct at_xdmac_desc, desc_node);
+		list_del(&desc->desc_node);
+		desc->active_xfer = false;
+	}
+
+	return desc;
+}
+
+static struct dma_chan *at_xdmac_xlate(struct of_phandle_args *dma_spec,
+				       struct of_dma *of_dma)
+{
+	struct at_xdmac		*atxdmac = of_dma->of_dma_data;
+	struct at_xdmac_chan	*atchan;
+	struct dma_chan		*chan;
+	struct device		*dev = atxdmac->dma.dev;
+
+	if (dma_spec->args_count != 1) {
+		dev_err(dev, "dma phandler args: bad number of args\n");
+		return NULL;
+	}
+
+	chan = dma_get_any_slave_channel(&atxdmac->dma);
+	if (!chan) {
+		dev_err(dev, "can't get a dma channel\n");
+		return NULL;
+	}
+
+	atchan = to_at_xdmac_chan(chan);
+	atchan->memif = AT91_XDMAC_DT_GET_MEM_IF(dma_spec->args[0]);
+	atchan->perif = AT91_XDMAC_DT_GET_PER_IF(dma_spec->args[0]);
+	atchan->perid = AT91_XDMAC_DT_GET_PERID(dma_spec->args[0]);
+	dev_dbg(dev, "chan dt cfg: memif=%u perif=%u perid=%u\n",
+		 atchan->memif, atchan->perif, atchan->perid);
+
+	return chan;
+}
+
+static int at_xdmac_set_slave_config(struct dma_chan *chan,
+				      struct dma_slave_config *sconfig)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	u8 dwidth;
+	int csize;
+
+	atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG] =
+		AT91_XDMAC_DT_PERID(atchan->perid)
+		| AT_XDMAC_CC_DAM_INCREMENTED_AM
+		| AT_XDMAC_CC_SAM_FIXED_AM
+		| AT_XDMAC_CC_DIF(atchan->memif)
+		| AT_XDMAC_CC_SIF(atchan->perif)
+		| AT_XDMAC_CC_SWREQ_HWR_CONNECTED
+		| AT_XDMAC_CC_DSYNC_PER2MEM
+		| AT_XDMAC_CC_MBSIZE_SIXTEEN
+		| AT_XDMAC_CC_TYPE_PER_TRAN;
+	csize = at_xdmac_csize(sconfig->src_maxburst);
+	if (csize < 0) {
+		dev_err(chan2dev(chan), "invalid src maxburst value\n");
+		return -EINVAL;
+	}
+	atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG] |= AT_XDMAC_CC_CSIZE(csize);
+	dwidth = ffs(sconfig->src_addr_width) - 1;
+	atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG] |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+
+	atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG] =
+		AT91_XDMAC_DT_PERID(atchan->perid)
+		| AT_XDMAC_CC_DAM_FIXED_AM
+		| AT_XDMAC_CC_SAM_INCREMENTED_AM
+		| AT_XDMAC_CC_DIF(atchan->perif)
+		| AT_XDMAC_CC_SIF(atchan->memif)
+		| AT_XDMAC_CC_SWREQ_HWR_CONNECTED
+		| AT_XDMAC_CC_DSYNC_MEM2PER
+		| AT_XDMAC_CC_MBSIZE_SIXTEEN
+		| AT_XDMAC_CC_TYPE_PER_TRAN;
+	csize = at_xdmac_csize(sconfig->dst_maxburst);
+	if (csize < 0) {
+		dev_err(chan2dev(chan), "invalid src maxburst value\n");
+		return -EINVAL;
+	}
+	atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG] |= AT_XDMAC_CC_CSIZE(csize);
+	dwidth = ffs(sconfig->dst_addr_width) - 1;
+	atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG] |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+	/* Src and dst addr are needed to configure the link list descriptor. */
+	atchan->per_src_addr = sconfig->src_addr;
+	atchan->per_dst_addr = sconfig->dst_addr;
+
+	dev_dbg(chan2dev(chan),
+		"%s: cfg[dev2mem]=0x%08x, cfg[mem2dev]=0x%08x, per_src_addr=0x%08x, per_dst_addr=0x%08x\n",
+		__func__, atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG],
+		atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG],
+		atchan->per_src_addr, atchan->per_dst_addr);
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		       unsigned int sg_len, enum dma_transfer_direction direction,
+		       unsigned long flags, void *context)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*first = NULL, *prev = NULL;
+	struct scatterlist	*sg;
+	int			i;
+	u32			cfg;
+	unsigned int		xfer_size = 0;
+
+	if (!sgl)
+		return NULL;
+
+	if (!is_slave_direction(direction)) {
+		dev_err(chan2dev(chan), "invalid DMA direction\n");
+		return NULL;
+	}
+
+	dev_dbg(chan2dev(chan), "%s: sg_len=%d, dir=%s, flags=0x%lx\n",
+		 __func__, sg_len,
+		 direction == DMA_MEM_TO_DEV ? "to device" : "from device",
+		 flags);
+
+	/* Protect dma_sconfig field that can be modified by set_slave_conf. */
+	spin_lock_bh(&atchan->lock);
+
+	/* Prepare descriptors. */
+	for_each_sg(sgl, sg, sg_len, i) {
+		struct at_xdmac_desc	*desc = NULL;
+		u32			len, mem;
+
+		len = sg_dma_len(sg);
+		mem = sg_dma_address(sg);
+		if (unlikely(!len)) {
+			dev_err(chan2dev(chan), "sg data length is zero\n");
+			spin_unlock_bh(&atchan->lock);
+			return NULL;
+		}
+		dev_dbg(chan2dev(chan), "%s: * sg%d len=%u, mem=0x%08x\n",
+			 __func__, i, len, mem);
+
+		desc = at_xdmac_get_desc(atchan);
+		if (!desc) {
+			dev_err(chan2dev(chan), "can't get descriptor\n");
+			if (first)
+				list_splice_init(&first->descs_list, &atchan->free_descs_list);
+			spin_unlock_bh(&atchan->lock);
+			return NULL;
+		}
+
+		/* Linked list descriptor setup. */
+		if (direction == DMA_DEV_TO_MEM) {
+			desc->lld.mbr_sa = atchan->per_src_addr;
+			desc->lld.mbr_da = mem;
+			cfg = atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG];
+		} else {
+			desc->lld.mbr_sa = mem;
+			desc->lld.mbr_da = atchan->per_dst_addr;
+			cfg = atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG];
+		}
+		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1		/* next descriptor view */
+			| AT_XDMAC_MBR_UBC_NDEN				/* next descriptor dst parameter update */
+			| AT_XDMAC_MBR_UBC_NSEN				/* next descriptor src parameter update */
+			| (i == sg_len - 1 ? 0 : AT_XDMAC_MBR_UBC_NDE)	/* descriptor fetch */
+			| len / (1 << at_xdmac_get_dwidth(cfg));	/* microblock length */
+		dev_dbg(chan2dev(chan),
+			 "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
+			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);
+
+		/* Chain lld. */
+		if (prev) {
+			prev->lld.mbr_nda = desc->tx_dma_desc.phys;
+			dev_dbg(chan2dev(chan),
+				 "%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
+				 __func__, prev, &prev->lld.mbr_nda);
+		}
+
+		prev = desc;
+		if (!first)
+			first = desc;
+
+		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+			 __func__, desc, first);
+		list_add_tail(&desc->desc_node, &first->descs_list);
+		xfer_size += len;
+	}
+
+	spin_unlock_bh(&atchan->lock);
+
+	first->tx_dma_desc.flags = flags;
+	first->xfer_size = xfer_size;
+	first->direction = direction;
+
+	return &first->tx_dma_desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
+			 size_t buf_len, size_t period_len,
+			 enum dma_transfer_direction direction,
+			 unsigned long flags)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*first = NULL, *prev = NULL;
+	unsigned int		periods = buf_len / period_len;
+	int			i;
+	u32			cfg;
+
+	dev_dbg(chan2dev(chan), "%s: buf_addr=%pad, buf_len=%zd, period_len=%zd, dir=%s, flags=0x%lx\n",
+		__func__, &buf_addr, buf_len, period_len,
+		direction == DMA_MEM_TO_DEV ? "mem2per" : "per2mem", flags);
+
+	if (!is_slave_direction(direction)) {
+		dev_err(chan2dev(chan), "invalid DMA direction\n");
+		return NULL;
+	}
+
+	if (test_and_set_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status)) {
+		dev_err(chan2dev(chan), "channel currently used\n");
+		return NULL;
+	}
+
+	for (i = 0; i < periods; i++) {
+		struct at_xdmac_desc	*desc = NULL;
+
+		spin_lock_bh(&atchan->lock);
+		desc = at_xdmac_get_desc(atchan);
+		if (!desc) {
+			dev_err(chan2dev(chan), "can't get descriptor\n");
+			if (first)
+				list_splice_init(&first->descs_list, &atchan->free_descs_list);
+			spin_unlock_bh(&atchan->lock);
+			return NULL;
+		}
+		spin_unlock_bh(&atchan->lock);
+		dev_dbg(chan2dev(chan),
+			"%s: desc=0x%p, tx_dma_desc.phys=%pad\n",
+			__func__, desc, &desc->tx_dma_desc.phys);
+
+		if (direction == DMA_DEV_TO_MEM) {
+			desc->lld.mbr_sa = atchan->per_src_addr;
+			desc->lld.mbr_da = buf_addr + i * period_len;
+			cfg = atchan->cfg[AT_XDMAC_DEV_TO_MEM_CFG];
+		} else {
+			desc->lld.mbr_sa = buf_addr + i * period_len;
+			desc->lld.mbr_da = atchan->per_dst_addr;
+			cfg = atchan->cfg[AT_XDMAC_MEM_TO_DEV_CFG];
+		}
+		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1
+			| AT_XDMAC_MBR_UBC_NDEN
+			| AT_XDMAC_MBR_UBC_NSEN
+			| AT_XDMAC_MBR_UBC_NDE
+			| period_len >> at_xdmac_get_dwidth(cfg);
+
+		dev_dbg(chan2dev(chan),
+			 "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
+			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);
+
+		/* Chain lld. */
+		if (prev) {
+			prev->lld.mbr_nda = desc->tx_dma_desc.phys;
+			dev_dbg(chan2dev(chan),
+				 "%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
+				 __func__, prev, &prev->lld.mbr_nda);
+		}
+
+		prev = desc;
+		if (!first)
+			first = desc;
+
+		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+			 __func__, desc, first);
+		list_add_tail(&desc->desc_node, &first->descs_list);
+	}
+
+	prev->lld.mbr_nda = first->tx_dma_desc.phys;
+	dev_dbg(chan2dev(chan),
+		"%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
+		__func__, prev, &prev->lld.mbr_nda);
+	first->tx_dma_desc.flags = flags;
+	first->xfer_size = buf_len;
+	first->direction = direction;
+
+	return &first->tx_dma_desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+			 size_t len, unsigned long flags)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*first = NULL, *prev = NULL;
+	size_t			remaining_size = len, xfer_size = 0, ublen;
+	dma_addr_t		src_addr = src, dst_addr = dest;
+	u32			dwidth;
+	/*
+	 * WARNING: We don't know the direction, it involves we can't
+	 * dynamically set the source and dest interface so we have to use the
+	 * same one. Only interface 0 allows EBI access. Hopefully we can
+	 * access DDR through both ports (at least on SAMA5D4x), so we can use
+	 * the same interface for source and dest, that solves the fact we
+	 * don't know the direction.
+	 */
+	u32			chan_cc = AT_XDMAC_CC_DAM_INCREMENTED_AM
+					| AT_XDMAC_CC_SAM_INCREMENTED_AM
+					| AT_XDMAC_CC_DIF(0)
+					| AT_XDMAC_CC_SIF(0)
+					| AT_XDMAC_CC_MBSIZE_SIXTEEN
+					| AT_XDMAC_CC_TYPE_MEM_TRAN;
+
+	dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, len=%zd, flags=0x%lx\n",
+		__func__, &src, &dest, len, flags);
+
+	if (unlikely(!len))
+		return NULL;
+
+	/*
+	 * Check address alignment to select the greater data width we can use.
+	 * Some XDMAC implementations don't provide dword transfer, in this
+	 * case selecting dword has the same behavior as selecting word transfers.
+	 */
+	if (!((src_addr | dst_addr) & 7)) {
+		dwidth = AT_XDMAC_CC_DWIDTH_DWORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
+	} else if (!((src_addr | dst_addr)  & 3)) {
+		dwidth = AT_XDMAC_CC_DWIDTH_WORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
+	} else if (!((src_addr | dst_addr) & 1)) {
+		dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
+	} else {
+		dwidth = AT_XDMAC_CC_DWIDTH_BYTE;
+		dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
+	}
+
+	/* Prepare descriptors. */
+	while (remaining_size) {
+		struct at_xdmac_desc	*desc = NULL;
+
+		dev_dbg(chan2dev(chan), "%s: remaining_size=%zu\n", __func__, remaining_size);
+
+		spin_lock_bh(&atchan->lock);
+		desc = at_xdmac_get_desc(atchan);
+		spin_unlock_bh(&atchan->lock);
+		if (!desc) {
+			dev_err(chan2dev(chan), "can't get descriptor\n");
+			if (first)
+				list_splice_init(&first->descs_list, &atchan->free_descs_list);
+			return NULL;
+		}
+
+		/* Update src and dest addresses. */
+		src_addr += xfer_size;
+		dst_addr += xfer_size;
+
+		if (remaining_size >= AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)
+			xfer_size = AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth;
+		else
+			xfer_size = remaining_size;
+
+		dev_dbg(chan2dev(chan), "%s: xfer_size=%zu\n", __func__, xfer_size);
+
+		/* Check remaining length and change data width if needed. */
+		if (!((src_addr | dst_addr | xfer_size) & 7)) {
+			dwidth = AT_XDMAC_CC_DWIDTH_DWORD;
+			dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
+		} else if (!((src_addr | dst_addr | xfer_size)  & 3)) {
+			dwidth = AT_XDMAC_CC_DWIDTH_WORD;
+			dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
+		} else if (!((src_addr | dst_addr | xfer_size) & 1)) {
+			dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD;
+			dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
+		} else if ((src_addr | dst_addr | xfer_size) & 1) {
+			dwidth = AT_XDMAC_CC_DWIDTH_BYTE;
+			dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
+		}
+		chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+		ublen = xfer_size >> dwidth;
+		remaining_size -= xfer_size;
+
+		desc->lld.mbr_sa = src_addr;
+		desc->lld.mbr_da = dst_addr;
+		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2
+			| AT_XDMAC_MBR_UBC_NDEN
+			| AT_XDMAC_MBR_UBC_NSEN
+			| (remaining_size ? AT_XDMAC_MBR_UBC_NDE : 0)
+			| ublen;
+		desc->lld.mbr_cfg = chan_cc;
+
+		dev_dbg(chan2dev(chan),
+			 "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc, desc->lld.mbr_cfg);
+
+		/* Chain lld. */
+		if (prev) {
+			prev->lld.mbr_nda = desc->tx_dma_desc.phys;
+			dev_dbg(chan2dev(chan),
+				 "%s: chain lld: prev=0x%p, mbr_nda=0x%08x\n",
+				 __func__, prev, prev->lld.mbr_nda);
+		}
+
+		prev = desc;
+		if (!first)
+			first = desc;
+
+		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+			 __func__, desc, first);
+		list_add_tail(&desc->desc_node, &first->descs_list);
+	}
+
+	first->tx_dma_desc.flags = flags;
+	first->xfer_size = len;
+
+	return &first->tx_dma_desc;
+}
+
+static enum dma_status
+at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+		struct dma_tx_state *txstate)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
+	struct at_xdmac_desc	*desc, *_desc;
+	struct list_head	*descs_list;
+	enum dma_status		ret;
+	int			residue;
+	u32			cur_nda, mask, value;
+	u8			dwidth = at_xdmac_get_dwidth(atchan->cfg[AT_XDMAC_CUR_CFG]);
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	if (!txstate)
+		return ret;
+
+	spin_lock_bh(&atchan->lock);
+
+	desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, xfer_node);
+
+	/*
+	 * If the transfer has not been started yet, don't need to compute the
+	 * residue, it's the transfer length.
+	 */
+	if (!desc->active_xfer) {
+		dma_set_residue(txstate, desc->xfer_size);
+		spin_unlock_bh(&atchan->lock);
+		return ret;
+	}
+
+	residue = desc->xfer_size;
+	/*
+	 * Flush FIFO: only relevant when the transfer is source peripheral
+	 * synchronized.
+	 */
+	mask = AT_XDMAC_CC_TYPE | AT_XDMAC_CC_DSYNC;
+	value = AT_XDMAC_CC_TYPE_PER_TRAN | AT_XDMAC_CC_DSYNC_PER2MEM;
+	if ((atchan->cfg[AT_XDMAC_CUR_CFG] & mask) == value) {
+		at_xdmac_write(atxdmac, AT_XDMAC_GSWF, atchan->mask);
+		while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS))
+			cpu_relax();
+	}
+
+	cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+	/*
+	 * Remove size of all microblocks already transferred and the current
+	 * one. Then add the remaining size to transfer of the current
+	 * microblock.
+	 */
+	descs_list = &desc->descs_list;
+	list_for_each_entry_safe(desc, _desc, descs_list, desc_node) {
+		residue -= (desc->lld.mbr_ubc & 0xffffff) << dwidth;
+		if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda)
+			break;
+	}
+	residue += at_xdmac_chan_read(atchan, AT_XDMAC_CUBC) << dwidth;
+
+	spin_unlock_bh(&atchan->lock);
+
+	dma_set_residue(txstate, residue);
+
+	dev_dbg(chan2dev(chan),
+		 "%s: desc=0x%p, tx_dma_desc.phys=%pad, tx_status=%d, cookie=%d, residue=%d\n",
+		 __func__, desc, &desc->tx_dma_desc.phys, ret, cookie, residue);
+
+	return ret;
+}
+
+/* Call must be protected by lock. */
+static void at_xdmac_remove_xfer(struct at_xdmac_chan *atchan,
+				    struct at_xdmac_desc *desc)
+{
+	dev_dbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
+
+	/*
+	 * Remove the transfer from the transfer list then move the transfer
+	 * descriptors into the free descriptors list.
+	 */
+	list_del(&desc->xfer_node);
+	list_splice_init(&desc->descs_list, &atchan->free_descs_list);
+}
+
+static void at_xdmac_advance_work(struct at_xdmac_chan *atchan)
+{
+	struct at_xdmac_desc	*desc;
+
+	spin_lock_bh(&atchan->lock);
+
+	/*
+	 * If channel is enabled, do nothing, advance_work will be triggered
+	 * after the interruption.
+	 */
+	if (!at_xdmac_chan_is_enabled(atchan) && !list_empty(&atchan->xfers_list)) {
+		desc = list_first_entry(&atchan->xfers_list,
+					struct at_xdmac_desc,
+					xfer_node);
+		dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
+		if (!desc->active_xfer)
+			at_xdmac_start_xfer(atchan, desc);
+	}
+
+	spin_unlock_bh(&atchan->lock);
+}
+
+static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan)
+{
+	struct at_xdmac_desc		*desc;
+	struct dma_async_tx_descriptor	*txd;
+
+	desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, xfer_node);
+	txd = &desc->tx_dma_desc;
+
+	if (txd->callback && (txd->flags & DMA_PREP_INTERRUPT))
+		txd->callback(txd->callback_param);
+}
+
+static void at_xdmac_tasklet(unsigned long data)
+{
+	struct at_xdmac_chan	*atchan = (struct at_xdmac_chan *)data;
+	struct at_xdmac_desc	*desc;
+	u32			error_mask;
+
+	dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08lx\n",
+		 __func__, atchan->status);
+
+	error_mask = AT_XDMAC_CIS_RBEIS
+		     | AT_XDMAC_CIS_WBEIS
+		     | AT_XDMAC_CIS_ROIS;
+
+	if (at_xdmac_chan_is_cyclic(atchan)) {
+		at_xdmac_handle_cyclic(atchan);
+	} else if ((atchan->status & AT_XDMAC_CIS_LIS)
+		   || (atchan->status & error_mask)) {
+		struct dma_async_tx_descriptor  *txd;
+
+		if (atchan->status & AT_XDMAC_CIS_RBEIS)
+			dev_err(chan2dev(&atchan->chan), "read bus error!!!");
+		if (atchan->status & AT_XDMAC_CIS_WBEIS)
+			dev_err(chan2dev(&atchan->chan), "write bus error!!!");
+		if (atchan->status & AT_XDMAC_CIS_ROIS)
+			dev_err(chan2dev(&atchan->chan), "request overflow error!!!");
+
+		spin_lock_bh(&atchan->lock);
+		desc = list_first_entry(&atchan->xfers_list,
+					struct at_xdmac_desc,
+					xfer_node);
+		dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
+		BUG_ON(!desc->active_xfer);
+
+		txd = &desc->tx_dma_desc;
+
+		at_xdmac_remove_xfer(atchan, desc);
+		spin_unlock_bh(&atchan->lock);
+
+		if (!at_xdmac_chan_is_cyclic(atchan)) {
+			dma_cookie_complete(txd);
+			if (txd->callback && (txd->flags & DMA_PREP_INTERRUPT))
+				txd->callback(txd->callback_param);
+		}
+
+		dma_run_dependencies(txd);
+
+		at_xdmac_advance_work(atchan);
+	}
+}
+
+static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id)
+{
+	struct at_xdmac		*atxdmac = (struct at_xdmac *)dev_id;
+	struct at_xdmac_chan	*atchan;
+	u32			imr, status, pending;
+	u32			chan_imr, chan_status;
+	int			i, ret = IRQ_NONE;
+
+	do {
+		imr = at_xdmac_read(atxdmac, AT_XDMAC_GIM);
+		status = at_xdmac_read(atxdmac, AT_XDMAC_GIS);
+		pending = status & imr;
+
+		dev_vdbg(atxdmac->dma.dev,
+			 "%s: status=0x%08x, imr=0x%08x, pending=0x%08x\n",
+			 __func__, status, imr, pending);
+
+		if (!pending)
+			break;
+
+		/* We have to find which channel has generated the interrupt. */
+		for (i = 0; i < atxdmac->dma.chancnt; i++) {
+			if (!((1 << i) & pending))
+				continue;
+
+			atchan = &atxdmac->chan[i];
+			chan_imr = at_xdmac_chan_read(atchan, AT_XDMAC_CIM);
+			chan_status = at_xdmac_chan_read(atchan, AT_XDMAC_CIS);
+			atchan->status = chan_status & chan_imr;
+			dev_vdbg(atxdmac->dma.dev,
+				 "%s: chan%d: imr=0x%x, status=0x%x\n",
+				 __func__, i, chan_imr, chan_status);
+			dev_vdbg(chan2dev(&atchan->chan),
+				 "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
+				 __func__,
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CC),
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
+
+			if (atchan->status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS))
+				at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
+
+			tasklet_schedule(&atchan->tasklet);
+			ret = IRQ_HANDLED;
+		}
+
+	} while (pending);
+
+	return ret;
+}
+
+static void at_xdmac_issue_pending(struct dma_chan *chan)
+{
+	struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+
+	dev_dbg(chan2dev(&atchan->chan), "%s\n", __func__);
+
+	if (!at_xdmac_chan_is_cyclic(atchan))
+		at_xdmac_advance_work(atchan);
+
+	return;
+}
+
+static int at_xdmac_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
+			    unsigned long arg)
+{
+	struct at_xdmac_desc	*desc, *_desc;
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
+	int			ret = 0;
+
+	dev_dbg(chan2dev(chan), "%s: cmd=%d\n", __func__, cmd);
+
+	spin_lock_bh(&atchan->lock);
+
+	switch (cmd) {
+	case DMA_PAUSE:
+		at_xdmac_write(atxdmac, AT_XDMAC_GRWS, atchan->mask);
+		set_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
+		break;
+
+	case DMA_RESUME:
+		if (!at_xdmac_chan_is_paused(atchan))
+			break;
+
+		at_xdmac_write(atxdmac, AT_XDMAC_GRWR, atchan->mask);
+		clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
+		break;
+
+	case DMA_TERMINATE_ALL:
+		at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
+		while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask)
+			cpu_relax();
+
+		/* Cancel all pending transfers. */
+		list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node)
+			at_xdmac_remove_xfer(atchan, desc);
+
+		clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
+		break;
+
+	case DMA_SLAVE_CONFIG:
+		ret = at_xdmac_set_slave_config(chan,
+				(struct dma_slave_config *)arg);
+		break;
+
+	default:
+		dev_err(chan2dev(chan),
+			"unmanaged or unknown dma control cmd: %d\n", cmd);
+		ret = -ENXIO;
+	}
+
+	spin_unlock_bh(&atchan->lock);
+
+	return ret;
+}
+
+static int at_xdmac_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*desc;
+	int			i;
+
+	spin_lock_bh(&atchan->lock);
+
+	if (at_xdmac_chan_is_enabled(atchan)) {
+		dev_err(chan2dev(chan),
+			"can't allocate channel resources (channel enabled)\n");
+		i = -EIO;
+		goto spin_unlock;
+	}
+
+	if (!list_empty(&atchan->free_descs_list)) {
+		dev_err(chan2dev(chan),
+			"can't allocate channel resources (channel not free from a previous use)\n");
+		i = -EIO;
+		goto spin_unlock;
+	}
+
+	for (i = 0; i < init_nr_desc_per_channel; i++) {
+		desc = at_xdmac_alloc_desc(chan, GFP_ATOMIC);
+		if (!desc) {
+			dev_warn(chan2dev(chan),
+				"only %d descriptors have been allocated\n", i);
+			break;
+		}
+		list_add_tail(&desc->desc_node, &atchan->free_descs_list);
+	}
+
+	dma_cookie_init(chan);
+
+	dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i);
+
+spin_unlock:
+	spin_unlock_bh(&atchan->lock);
+	return i;
+}
+
+static void at_xdmac_free_chan_resources(struct dma_chan *chan)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac		*atxdmac = to_at_xdmac(chan->device);
+	struct at_xdmac_desc	*desc, *_desc;
+
+	list_for_each_entry_safe(desc, _desc, &atchan->free_descs_list, desc_node) {
+		dev_dbg(chan2dev(chan), "%s: freeing descriptor %p\n", __func__, desc);
+		list_del(&desc->desc_node);
+		dma_pool_free(atxdmac->at_xdmac_desc_pool, desc, desc->tx_dma_desc.phys);
+	}
+
+	return;
+}
+
+#define AT_XDMAC_DMA_BUSWIDTHS\
+	(BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\
+	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |\
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\
+	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |\
+	BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+static int at_xdmac_device_slave_caps(struct dma_chan *dchan,
+				      struct dma_slave_caps *caps)
+{
+
+	caps->src_addr_widths = AT_XDMAC_DMA_BUSWIDTHS;
+	caps->dstn_addr_widths = AT_XDMAC_DMA_BUSWIDTHS;
+	caps->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	caps->cmd_pause = true;
+	caps->cmd_terminate = true;
+	caps->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int atmel_xdmac_prepare(struct device *dev)
+{
+	struct platform_device	*pdev = to_platform_device(dev);
+	struct at_xdmac		*atxdmac = platform_get_drvdata(pdev);
+	struct dma_chan		*chan, *_chan;
+
+	list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
+		struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+
+		/* Wait for transfer completion, except in cyclic case. */
+		if (at_xdmac_chan_is_enabled(atchan) && !at_xdmac_chan_is_cyclic(atchan))
+			return -EAGAIN;
+	}
+	return 0;
+}
+#else
+#	define atmel_xdmac_prepare NULL
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+static int atmel_xdmac_suspend(struct device *dev)
+{
+	struct platform_device	*pdev = to_platform_device(dev);
+	struct at_xdmac		*atxdmac = platform_get_drvdata(pdev);
+	struct dma_chan		*chan, *_chan;
+
+	list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
+		struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+
+		if (at_xdmac_chan_is_cyclic(atchan)) {
+			if (!at_xdmac_chan_is_paused(atchan))
+				at_xdmac_control(chan, DMA_PAUSE, 0);
+			atchan->save_cim = at_xdmac_chan_read(atchan, AT_XDMAC_CIM);
+			atchan->save_cnda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA);
+			atchan->save_cndc = at_xdmac_chan_read(atchan, AT_XDMAC_CNDC);
+		}
+	}
+	atxdmac->save_gim = at_xdmac_read(atxdmac, AT_XDMAC_GIM);
+
+	at_xdmac_off(atxdmac);
+	clk_disable_unprepare(atxdmac->clk);
+	return 0;
+}
+
+static int atmel_xdmac_resume(struct device *dev)
+{
+	struct platform_device	*pdev = to_platform_device(dev);
+	struct at_xdmac		*atxdmac = platform_get_drvdata(pdev);
+	struct at_xdmac_chan	*atchan;
+	struct dma_chan		*chan, *_chan;
+	int			i;
+	u32			cfg;
+
+	clk_prepare_enable(atxdmac->clk);
+
+	/* Clear pending interrupts. */
+	for (i = 0; i < atxdmac->dma.chancnt; i++) {
+		atchan = &atxdmac->chan[i];
+		while (at_xdmac_chan_read(atchan, AT_XDMAC_CIS))
+			cpu_relax();
+	}
+
+	at_xdmac_write(atxdmac, AT_XDMAC_GIE, atxdmac->save_gim);
+	at_xdmac_write(atxdmac, AT_XDMAC_GE, atxdmac->save_gs);
+	list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
+		atchan = to_at_xdmac_chan(chan);
+		cfg = atchan->cfg[AT_XDMAC_CUR_CFG];
+		at_xdmac_chan_write(atchan, AT_XDMAC_CC, cfg);
+		if (at_xdmac_chan_is_cyclic(atchan)) {
+			at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda);
+			at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc);
+			at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim);
+			wmb();
+			at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask);
+		}
+	}
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static int at_xdmac_probe(struct platform_device *pdev)
+{
+	struct resource	*res;
+	struct at_xdmac	*atxdmac;
+	int		irq, size, nr_channels, i, ret;
+	void __iomem	*base;
+	u32		reg;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	/*
+	 * Read number of xdmac channels, read helper function can't be used
+	 * since atxdmac is not yet allocated and we need to know the number
+	 * of channels to do the allocation.
+	 */
+	reg = readl_relaxed(base + AT_XDMAC_GTYPE);
+	nr_channels = AT_XDMAC_NB_CH(reg);
+	if (nr_channels > AT_XDMAC_MAX_CHAN) {
+		dev_err(&pdev->dev, "invalid number of channels (%u)\n",
+			nr_channels);
+		return -EINVAL;
+	}
+
+	size = sizeof(*atxdmac);
+	size += nr_channels * sizeof(struct at_xdmac_chan);
+	atxdmac = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+	if (!atxdmac) {
+		dev_err(&pdev->dev, "can't allocate at_xdmac structure\n");
+		return -ENOMEM;
+	}
+
+	atxdmac->regs = base;
+	atxdmac->irq = irq;
+
+	atxdmac->clk = devm_clk_get(&pdev->dev, "dma_clk");
+	if (IS_ERR(atxdmac->clk)) {
+		dev_err(&pdev->dev, "can't get dma_clk\n");
+		return PTR_ERR(atxdmac->clk);
+	}
+
+	/* Do not use dev res to prevent races with tasklet */
+	ret = request_irq(atxdmac->irq, at_xdmac_interrupt, 0, "at_xdmac", atxdmac);
+	if (ret) {
+		dev_err(&pdev->dev, "can't request irq\n");
+		return ret;
+	}
+
+	ret = clk_prepare_enable(atxdmac->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "can't prepare or enable clock\n");
+		goto err_free_irq;
+	}
+
+	atxdmac->at_xdmac_desc_pool =
+		dmam_pool_create(dev_name(&pdev->dev), &pdev->dev,
+				sizeof(struct at_xdmac_desc), 4, 0);
+	if (!atxdmac->at_xdmac_desc_pool) {
+		dev_err(&pdev->dev, "no memory for descriptors dma pool\n");
+		ret = -ENOMEM;
+		goto err_clk_disable;
+	}
+
+	dma_cap_set(DMA_CYCLIC, atxdmac->dma.cap_mask);
+	dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask);
+	dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask);
+	/*
+	 * Without DMA_PRIVATE the driver is not able to allocate more than
+	 * one channel, second allocation fails in private_candidate.
+	 */
+	dma_cap_set(DMA_PRIVATE, atxdmac->dma.cap_mask);
+	atxdmac->dma.dev				= &pdev->dev;
+	atxdmac->dma.device_alloc_chan_resources	= at_xdmac_alloc_chan_resources;
+	atxdmac->dma.device_free_chan_resources		= at_xdmac_free_chan_resources;
+	atxdmac->dma.device_tx_status			= at_xdmac_tx_status;
+	atxdmac->dma.device_issue_pending		= at_xdmac_issue_pending;
+	atxdmac->dma.device_prep_dma_cyclic		= at_xdmac_prep_dma_cyclic;
+	atxdmac->dma.device_prep_dma_memcpy		= at_xdmac_prep_dma_memcpy;
+	atxdmac->dma.device_prep_slave_sg		= at_xdmac_prep_slave_sg;
+	atxdmac->dma.device_control			= at_xdmac_control;
+	atxdmac->dma.device_slave_caps			= at_xdmac_device_slave_caps;
+
+	/* Disable all chans and interrupts. */
+	at_xdmac_off(atxdmac);
+
+	/* Init channels. */
+	INIT_LIST_HEAD(&atxdmac->dma.channels);
+	for (i = 0; i < nr_channels; i++) {
+		struct at_xdmac_chan *atchan = &atxdmac->chan[i];
+
+		atchan->chan.device = &atxdmac->dma;
+		list_add_tail(&atchan->chan.device_node,
+			      &atxdmac->dma.channels);
+
+		atchan->ch_regs = at_xdmac_chan_reg_base(atxdmac, i);
+		atchan->mask = 1 << i;
+
+		spin_lock_init(&atchan->lock);
+		INIT_LIST_HEAD(&atchan->xfers_list);
+		INIT_LIST_HEAD(&atchan->free_descs_list);
+		tasklet_init(&atchan->tasklet, at_xdmac_tasklet,
+			     (unsigned long)atchan);
+
+		/* Clear pending interrupts. */
+		while (at_xdmac_chan_read(atchan, AT_XDMAC_CIS))
+			cpu_relax();
+	}
+	platform_set_drvdata(pdev, atxdmac);
+
+	ret = dma_async_device_register(&atxdmac->dma);
+	if (ret) {
+		dev_err(&pdev->dev, "fail to register DMA engine device\n");
+		goto err_clk_disable;
+	}
+
+	ret = of_dma_controller_register(pdev->dev.of_node,
+					 at_xdmac_xlate, atxdmac);
+	if (ret) {
+		dev_err(&pdev->dev, "could not register of dma controller\n");
+		goto err_dma_unregister;
+	}
+
+	dev_info(&pdev->dev, "%d channels, mapped at 0x%p\n",
+		 nr_channels, atxdmac->regs);
+
+	return 0;
+
+err_dma_unregister:
+	dma_async_device_unregister(&atxdmac->dma);
+err_clk_disable:
+	clk_disable_unprepare(atxdmac->clk);
+err_free_irq:
+	free_irq(atxdmac->irq, atxdmac->dma.dev);
+	return ret;
+}
+
+static int at_xdmac_remove(struct platform_device *pdev)
+{
+	struct at_xdmac	*atxdmac = (struct at_xdmac *)platform_get_drvdata(pdev);
+	int		i;
+
+	at_xdmac_off(atxdmac);
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&atxdmac->dma);
+	clk_disable_unprepare(atxdmac->clk);
+
+	synchronize_irq(atxdmac->irq);
+
+	free_irq(atxdmac->irq, atxdmac->dma.dev);
+
+	for (i = 0; i < atxdmac->dma.chancnt; i++) {
+		struct at_xdmac_chan *atchan = &atxdmac->chan[i];
+
+		tasklet_kill(&atchan->tasklet);
+		at_xdmac_free_chan_resources(&atchan->chan);
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops atmel_xdmac_dev_pm_ops = {
+	.prepare	= atmel_xdmac_prepare,
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(atmel_xdmac_suspend, atmel_xdmac_resume)
+};
+
+static const struct of_device_id atmel_xdmac_dt_ids[] = {
+	{
+		.compatible = "atmel,sama5d4-dma",
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(of, atmel_xdmac_dt_ids);
+
+static struct platform_driver at_xdmac_driver = {
+	.probe		= at_xdmac_probe,
+	.remove		= at_xdmac_remove,
+	.driver = {
+		.name		= "at_xdmac",
+		.owner		= THIS_MODULE,
+		.of_match_table	= of_match_ptr(atmel_xdmac_dt_ids),
+		.pm		= &atmel_xdmac_dev_pm_ops,
+	}
+};
+
+static int __init at_xdmac_init(void)
+{
+	return platform_driver_probe(&at_xdmac_driver, at_xdmac_probe);
+}
+subsys_initcall(at_xdmac_init);
+
+MODULE_DESCRIPTION("Atmel Extended DMA Controller driver");
+MODULE_AUTHOR("Ludovic Desroches <ludovic.desroches@atmel.com>");
+MODULE_LICENSE("GPL");

+ 0 - 3
drivers/dma/bcm2835-dma.c

@@ -525,8 +525,6 @@ static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id, int irq)
 	vchan_init(&c->vc, &d->ddev);
 	INIT_LIST_HEAD(&c->node);
 
-	d->ddev.chancnt++;
-
 	c->chan_base = BCM2835_DMA_CHANIO(d->base, chan_id);
 	c->ch = chan_id;
 	c->irq_number = irq;
@@ -694,7 +692,6 @@ static struct platform_driver bcm2835_dma_driver = {
 	.remove	= bcm2835_dma_remove,
 	.driver = {
 		.name = "bcm2835-dma",
-		.owner = THIS_MODULE,
 		.of_match_table = of_match_ptr(bcm2835_dma_of_match),
 	},
 };

+ 8 - 4
drivers/dma/cppi41.c

@@ -1,3 +1,4 @@
+#include <linux/delay.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
@@ -567,7 +568,7 @@ static int cppi41_tear_down_chan(struct cppi41_channel *c)
 		reg |= GCR_TEARDOWN;
 		cppi_writel(reg, c->gcr_reg);
 		c->td_queued = 1;
-		c->td_retry = 100;
+		c->td_retry = 500;
 	}
 
 	if (!c->td_seen || !c->td_desc_seen) {
@@ -603,12 +604,16 @@ static int cppi41_tear_down_chan(struct cppi41_channel *c)
 	 * descriptor before the TD we fetch it from enqueue, it has to be
 	 * there waiting for us.
 	 */
-	if (!c->td_seen && c->td_retry)
+	if (!c->td_seen && c->td_retry) {
+		udelay(1);
 		return -EAGAIN;
-
+	}
 	WARN_ON(!c->td_retry);
+
 	if (!c->td_desc_seen) {
 		desc_phys = cppi41_pop_desc(cdd, c->q_num);
+		if (!desc_phys)
+			desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
 		WARN_ON(!desc_phys);
 	}
 
@@ -1088,7 +1093,6 @@ static struct platform_driver cpp41_dma_driver = {
 	.remove = cppi41_dma_remove,
 	.driver = {
 		.name = "cppi41-dma-engine",
-		.owner = THIS_MODULE,
 		.pm = &cppi41_pm_ops,
 		.of_match_table = of_match_ptr(cppi41_dma_ids),
 	},

+ 1 - 3
drivers/dma/dma-jz4740.c

@@ -563,10 +563,9 @@ static int jz4740_dma_probe(struct platform_device *pdev)
 	dd->device_prep_dma_cyclic = jz4740_dma_prep_dma_cyclic;
 	dd->device_control = jz4740_dma_control;
 	dd->dev = &pdev->dev;
-	dd->chancnt = JZ_DMA_NR_CHANS;
 	INIT_LIST_HEAD(&dd->channels);
 
-	for (i = 0; i < dd->chancnt; i++) {
+	for (i = 0; i < JZ_DMA_NR_CHANS; i++) {
 		chan = &dmadev->chan[i];
 		chan->id = i;
 		chan->vchan.desc_free = jz4740_dma_desc_free;
@@ -608,7 +607,6 @@ static struct platform_driver jz4740_dma_driver = {
 	.remove = jz4740_dma_remove,
 	.driver = {
 		.name = "jz4740-dma",
-		.owner = THIS_MODULE,
 	},
 };
 module_platform_driver(jz4740_dma_driver);

+ 1 - 2
drivers/dma/dmaengine.c

@@ -330,8 +330,7 @@ static int __init dma_channel_table_init(void)
 	if (err) {
 		pr_err("initialization failure\n");
 		for_each_dma_cap_mask(cap, dma_cap_mask_all)
-			if (channel_table[cap])
-				free_percpu(channel_table[cap]);
+			free_percpu(channel_table[cap]);
 	}
 
 	return err;

+ 97 - 94
drivers/dma/fsl-edma.c

@@ -118,17 +118,17 @@
 				BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)
 
 struct fsl_edma_hw_tcd {
-	u32	saddr;
-	u16	soff;
-	u16	attr;
-	u32	nbytes;
-	u32	slast;
-	u32	daddr;
-	u16	doff;
-	u16	citer;
-	u32	dlast_sga;
-	u16	csr;
-	u16	biter;
+	__le32	saddr;
+	__le16	soff;
+	__le16	attr;
+	__le32	nbytes;
+	__le32	slast;
+	__le32	daddr;
+	__le16	doff;
+	__le16	citer;
+	__le32	dlast_sga;
+	__le16	csr;
+	__le16	biter;
 };
 
 struct fsl_edma_sw_tcd {
@@ -175,18 +175,12 @@ struct fsl_edma_engine {
 };
 
 /*
- * R/W functions for big- or little-endian registers
- * the eDMA controller's endian is independent of the CPU core's endian.
+ * R/W functions for big- or little-endian registers:
+ * The eDMA controller's endian is independent of the CPU core's endian.
+ * For the big-endian IP module, the offset for 8-bit or 16-bit registers
+ * should also be swapped opposite to that in little-endian IP.
  */
 
-static u16 edma_readw(struct fsl_edma_engine *edma, void __iomem *addr)
-{
-	if (edma->big_endian)
-		return ioread16be(addr);
-	else
-		return ioread16(addr);
-}
-
 static u32 edma_readl(struct fsl_edma_engine *edma, void __iomem *addr)
 {
 	if (edma->big_endian)
@@ -197,13 +191,18 @@ static u32 edma_readl(struct fsl_edma_engine *edma, void __iomem *addr)
 
 static void edma_writeb(struct fsl_edma_engine *edma, u8 val, void __iomem *addr)
 {
-	iowrite8(val, addr);
+	/* swap the reg offset for these in big-endian mode */
+	if (edma->big_endian)
+		iowrite8(val, (void __iomem *)((unsigned long)addr ^ 0x3));
+	else
+		iowrite8(val, addr);
 }
 
 static void edma_writew(struct fsl_edma_engine *edma, u16 val, void __iomem *addr)
 {
+	/* swap the reg offset for these in big-endian mode */
 	if (edma->big_endian)
-		iowrite16be(val, addr);
+		iowrite16be(val, (void __iomem *)((unsigned long)addr ^ 0x2));
 	else
 		iowrite16(val, addr);
 }
@@ -254,13 +253,12 @@ static void fsl_edma_chan_mux(struct fsl_edma_chan *fsl_chan,
 	chans_per_mux = fsl_chan->edma->n_chans / DMAMUX_NR;
 	ch_off = fsl_chan->vchan.chan.chan_id % chans_per_mux;
 	muxaddr = fsl_chan->edma->muxbase[ch / chans_per_mux];
+	slot = EDMAMUX_CHCFG_SOURCE(slot);
 
 	if (enable)
-		edma_writeb(fsl_chan->edma,
-				EDMAMUX_CHCFG_ENBL | EDMAMUX_CHCFG_SOURCE(slot),
-				muxaddr + ch_off);
+		iowrite8(EDMAMUX_CHCFG_ENBL | slot, muxaddr + ch_off);
 	else
-		edma_writeb(fsl_chan->edma, EDMAMUX_CHCFG_DIS, muxaddr + ch_off);
+		iowrite8(EDMAMUX_CHCFG_DIS, muxaddr + ch_off);
 }
 
 static unsigned int fsl_edma_get_tcd_attr(enum dma_slave_buswidth addr_width)
@@ -286,9 +284,8 @@ static void fsl_edma_free_desc(struct virt_dma_desc *vdesc)
 
 	fsl_desc = to_fsl_edma_desc(vdesc);
 	for (i = 0; i < fsl_desc->n_tcds; i++)
-			dma_pool_free(fsl_desc->echan->tcd_pool,
-					fsl_desc->tcd[i].vtcd,
-					fsl_desc->tcd[i].ptcd);
+		dma_pool_free(fsl_desc->echan->tcd_pool, fsl_desc->tcd[i].vtcd,
+			      fsl_desc->tcd[i].ptcd);
 	kfree(fsl_desc);
 }
 
@@ -363,8 +360,8 @@ static size_t fsl_edma_desc_residue(struct fsl_edma_chan *fsl_chan,
 
 	/* calculate the total size in this desc */
 	for (len = i = 0; i < fsl_chan->edesc->n_tcds; i++)
-		len += edma_readl(fsl_chan->edma, &(edesc->tcd[i].vtcd->nbytes))
-			* edma_readw(fsl_chan->edma, &(edesc->tcd[i].vtcd->biter));
+		len += le32_to_cpu(edesc->tcd[i].vtcd->nbytes)
+			* le16_to_cpu(edesc->tcd[i].vtcd->biter);
 
 	if (!in_progress)
 		return len;
@@ -376,17 +373,15 @@ static size_t fsl_edma_desc_residue(struct fsl_edma_chan *fsl_chan,
 
 	/* figure out the finished and calculate the residue */
 	for (i = 0; i < fsl_chan->edesc->n_tcds; i++) {
-		size = edma_readl(fsl_chan->edma, &(edesc->tcd[i].vtcd->nbytes))
-			* edma_readw(fsl_chan->edma, &(edesc->tcd[i].vtcd->biter));
+		size = le32_to_cpu(edesc->tcd[i].vtcd->nbytes)
+			* le16_to_cpu(edesc->tcd[i].vtcd->biter);
 		if (dir == DMA_MEM_TO_DEV)
-			dma_addr = edma_readl(fsl_chan->edma,
-					&(edesc->tcd[i].vtcd->saddr));
+			dma_addr = le32_to_cpu(edesc->tcd[i].vtcd->saddr);
 		else
-			dma_addr = edma_readl(fsl_chan->edma,
-					&(edesc->tcd[i].vtcd->daddr));
+			dma_addr = le32_to_cpu(edesc->tcd[i].vtcd->daddr);
 
 		len -= size;
-		if (cur_addr > dma_addr && cur_addr < dma_addr + size) {
+		if (cur_addr >= dma_addr && cur_addr < dma_addr + size) {
 			len += dma_addr + size - cur_addr;
 			break;
 		}
@@ -424,55 +419,67 @@ static enum dma_status fsl_edma_tx_status(struct dma_chan *chan,
 	return fsl_chan->status;
 }
 
-static void fsl_edma_set_tcd_params(struct fsl_edma_chan *fsl_chan,
-		u32 src, u32 dst, u16 attr, u16 soff, u32 nbytes,
-		u32 slast, u16 citer, u16 biter, u32 doff, u32 dlast_sga,
-		u16 csr)
+static void fsl_edma_set_tcd_regs(struct fsl_edma_chan *fsl_chan,
+				  struct fsl_edma_hw_tcd *tcd)
 {
+	struct fsl_edma_engine *edma = fsl_chan->edma;
 	void __iomem *addr = fsl_chan->edma->membase;
 	u32 ch = fsl_chan->vchan.chan.chan_id;
 
 	/*
-	 * TCD parameters have been swapped in fill_tcd_params(),
-	 * so just write them to registers in the cpu endian here
+	 * TCD parameters are stored in struct fsl_edma_hw_tcd in little
+	 * endian format. However, we need to load the TCD registers in
+	 * big- or little-endian obeying the eDMA engine model endian.
 	 */
-	writew(0, addr + EDMA_TCD_CSR(ch));
-	writel(src, addr + EDMA_TCD_SADDR(ch));
-	writel(dst, addr + EDMA_TCD_DADDR(ch));
-	writew(attr, addr + EDMA_TCD_ATTR(ch));
-	writew(soff, addr + EDMA_TCD_SOFF(ch));
-	writel(nbytes, addr + EDMA_TCD_NBYTES(ch));
-	writel(slast, addr + EDMA_TCD_SLAST(ch));
-	writew(citer, addr + EDMA_TCD_CITER(ch));
-	writew(biter, addr + EDMA_TCD_BITER(ch));
-	writew(doff, addr + EDMA_TCD_DOFF(ch));
-	writel(dlast_sga, addr + EDMA_TCD_DLAST_SGA(ch));
-	writew(csr, addr + EDMA_TCD_CSR(ch));
-}
-
-static void fill_tcd_params(struct fsl_edma_engine *edma,
-		struct fsl_edma_hw_tcd *tcd, u32 src, u32 dst,
-		u16 attr, u16 soff, u32 nbytes, u32 slast, u16 citer,
-		u16 biter, u16 doff, u32 dlast_sga, bool major_int,
-		bool disable_req, bool enable_sg)
+	edma_writew(edma, 0, addr + EDMA_TCD_CSR(ch));
+	edma_writel(edma, le32_to_cpu(tcd->saddr), addr + EDMA_TCD_SADDR(ch));
+	edma_writel(edma, le32_to_cpu(tcd->daddr), addr + EDMA_TCD_DADDR(ch));
+
+	edma_writew(edma, le16_to_cpu(tcd->attr), addr + EDMA_TCD_ATTR(ch));
+	edma_writew(edma, le16_to_cpu(tcd->soff), addr + EDMA_TCD_SOFF(ch));
+
+	edma_writel(edma, le32_to_cpu(tcd->nbytes), addr + EDMA_TCD_NBYTES(ch));
+	edma_writel(edma, le32_to_cpu(tcd->slast), addr + EDMA_TCD_SLAST(ch));
+
+	edma_writew(edma, le16_to_cpu(tcd->citer), addr + EDMA_TCD_CITER(ch));
+	edma_writew(edma, le16_to_cpu(tcd->biter), addr + EDMA_TCD_BITER(ch));
+	edma_writew(edma, le16_to_cpu(tcd->doff), addr + EDMA_TCD_DOFF(ch));
+
+	edma_writel(edma, le32_to_cpu(tcd->dlast_sga), addr + EDMA_TCD_DLAST_SGA(ch));
+
+	edma_writew(edma, le16_to_cpu(tcd->csr), addr + EDMA_TCD_CSR(ch));
+}
+
+static inline
+void fsl_edma_fill_tcd(struct fsl_edma_hw_tcd *tcd, u32 src, u32 dst,
+		       u16 attr, u16 soff, u32 nbytes, u32 slast, u16 citer,
+		       u16 biter, u16 doff, u32 dlast_sga, bool major_int,
+		       bool disable_req, bool enable_sg)
 {
 	u16 csr = 0;
 
 	/*
-	 * eDMA hardware SGs require the TCD parameters stored in memory
-	 * the same endian as the eDMA module so that they can be loaded
-	 * automatically by the engine
+	 * eDMA hardware SGs require the TCDs to be stored in little
+	 * endian format irrespective of the register endian model.
+	 * So we put the value in little endian in memory, waiting
+	 * for fsl_edma_set_tcd_regs doing the swap.
 	 */
-	edma_writel(edma, src, &(tcd->saddr));
-	edma_writel(edma, dst, &(tcd->daddr));
-	edma_writew(edma, attr, &(tcd->attr));
-	edma_writew(edma, EDMA_TCD_SOFF_SOFF(soff), &(tcd->soff));
-	edma_writel(edma, EDMA_TCD_NBYTES_NBYTES(nbytes), &(tcd->nbytes));
-	edma_writel(edma, EDMA_TCD_SLAST_SLAST(slast), &(tcd->slast));
-	edma_writew(edma, EDMA_TCD_CITER_CITER(citer), &(tcd->citer));
-	edma_writew(edma, EDMA_TCD_DOFF_DOFF(doff), &(tcd->doff));
-	edma_writel(edma, EDMA_TCD_DLAST_SGA_DLAST_SGA(dlast_sga), &(tcd->dlast_sga));
-	edma_writew(edma, EDMA_TCD_BITER_BITER(biter), &(tcd->biter));
+	tcd->saddr = cpu_to_le32(src);
+	tcd->daddr = cpu_to_le32(dst);
+
+	tcd->attr = cpu_to_le16(attr);
+
+	tcd->soff = cpu_to_le16(EDMA_TCD_SOFF_SOFF(soff));
+
+	tcd->nbytes = cpu_to_le32(EDMA_TCD_NBYTES_NBYTES(nbytes));
+	tcd->slast = cpu_to_le32(EDMA_TCD_SLAST_SLAST(slast));
+
+	tcd->citer = cpu_to_le16(EDMA_TCD_CITER_CITER(citer));
+	tcd->doff = cpu_to_le16(EDMA_TCD_DOFF_DOFF(doff));
+
+	tcd->dlast_sga = cpu_to_le32(EDMA_TCD_DLAST_SGA_DLAST_SGA(dlast_sga));
+
+	tcd->biter = cpu_to_le16(EDMA_TCD_BITER_BITER(biter));
 	if (major_int)
 		csr |= EDMA_TCD_CSR_INT_MAJOR;
 
@@ -482,7 +489,7 @@ static void fill_tcd_params(struct fsl_edma_engine *edma,
 	if (enable_sg)
 		csr |= EDMA_TCD_CSR_E_SG;
 
-	edma_writew(edma, csr, &(tcd->csr));
+	tcd->csr = cpu_to_le16(csr);
 }
 
 static struct fsl_edma_desc *fsl_edma_alloc_desc(struct fsl_edma_chan *fsl_chan,
@@ -558,9 +565,9 @@ static struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic(
 			doff = fsl_chan->fsc.addr_width;
 		}
 
-		fill_tcd_params(fsl_chan->edma, fsl_desc->tcd[i].vtcd, src_addr,
-				dst_addr, fsl_chan->fsc.attr, soff, nbytes, 0,
-				iter, iter, doff, last_sg, true, false, true);
+		fsl_edma_fill_tcd(fsl_desc->tcd[i].vtcd, src_addr, dst_addr,
+				  fsl_chan->fsc.attr, soff, nbytes, 0, iter,
+				  iter, doff, last_sg, true, false, true);
 		dma_buf_next += period_len;
 	}
 
@@ -607,16 +614,16 @@ static struct dma_async_tx_descriptor *fsl_edma_prep_slave_sg(
 		iter = sg_dma_len(sg) / nbytes;
 		if (i < sg_len - 1) {
 			last_sg = fsl_desc->tcd[(i + 1)].ptcd;
-			fill_tcd_params(fsl_chan->edma, fsl_desc->tcd[i].vtcd,
-					src_addr, dst_addr, fsl_chan->fsc.attr,
-					soff, nbytes, 0, iter, iter, doff, last_sg,
-					false, false, true);
+			fsl_edma_fill_tcd(fsl_desc->tcd[i].vtcd, src_addr,
+					  dst_addr, fsl_chan->fsc.attr, soff,
+					  nbytes, 0, iter, iter, doff, last_sg,
+					  false, false, true);
 		} else {
 			last_sg = 0;
-			fill_tcd_params(fsl_chan->edma, fsl_desc->tcd[i].vtcd,
-					src_addr, dst_addr, fsl_chan->fsc.attr,
-					soff, nbytes, 0, iter, iter, doff, last_sg,
-					true, true, false);
+			fsl_edma_fill_tcd(fsl_desc->tcd[i].vtcd, src_addr,
+					  dst_addr, fsl_chan->fsc.attr, soff,
+					  nbytes, 0, iter, iter, doff, last_sg,
+					  true, true, false);
 		}
 	}
 
@@ -625,17 +632,13 @@ static struct dma_async_tx_descriptor *fsl_edma_prep_slave_sg(
 
 static void fsl_edma_xfer_desc(struct fsl_edma_chan *fsl_chan)
 {
-	struct fsl_edma_hw_tcd *tcd;
 	struct virt_dma_desc *vdesc;
 
 	vdesc = vchan_next_desc(&fsl_chan->vchan);
 	if (!vdesc)
 		return;
 	fsl_chan->edesc = to_fsl_edma_desc(vdesc);
-	tcd = fsl_chan->edesc->tcd[0].vtcd;
-	fsl_edma_set_tcd_params(fsl_chan, tcd->saddr, tcd->daddr, tcd->attr,
-			tcd->soff, tcd->nbytes, tcd->slast, tcd->citer,
-			tcd->biter, tcd->doff, tcd->dlast_sga, tcd->csr);
+	fsl_edma_set_tcd_regs(fsl_chan, fsl_chan->edesc->tcd[0].vtcd);
 	fsl_edma_enable_request(fsl_chan);
 	fsl_chan->status = DMA_IN_PROGRESS;
 }

+ 0 - 1
drivers/dma/fsldma.c

@@ -1337,7 +1337,6 @@ static int fsl_dma_chan_probe(struct fsldma_device *fdev,
 
 	/* Add the channel to DMA device channel list */
 	list_add_tail(&chan->common.device_node, &fdev->common.channels);
-	fdev->common.chancnt++;
 
 	dev_info(fdev->dev, "#%d (%s), irq %d\n", chan->id, compatible,
 		 chan->irq != NO_IRQ ? chan->irq : fdev->irq);

+ 4 - 2
drivers/dma/imx-sdma.c

@@ -729,6 +729,7 @@ static void sdma_get_pc(struct sdma_channel *sdmac,
 	case IMX_DMATYPE_CSPI:
 	case IMX_DMATYPE_EXT:
 	case IMX_DMATYPE_SSI:
+	case IMX_DMATYPE_SAI:
 		per_2_emi = sdma->script_addrs->app_2_mcu_addr;
 		emi_2_per = sdma->script_addrs->mcu_2_app_addr;
 		break;
@@ -1287,7 +1288,8 @@ static void sdma_load_firmware(const struct firmware *fw, void *context)
 	unsigned short *ram_code;
 
 	if (!fw) {
-		dev_err(sdma->dev, "firmware not found\n");
+		dev_info(sdma->dev, "external firmware not found, using ROM firmware\n");
+		/* In this case we just use the ROM firmware. */
 		return;
 	}
 
@@ -1346,7 +1348,7 @@ static int sdma_get_firmware(struct sdma_engine *sdma,
 	return ret;
 }
 
-static int __init sdma_init(struct sdma_engine *sdma)
+static int sdma_init(struct sdma_engine *sdma)
 {
 	int i, ret;
 	dma_addr_t ccb_phys;

+ 29 - 6
drivers/dma/ioat/dma_v3.c

@@ -1265,9 +1265,17 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 	op = IOAT_OP_XOR;
 
 	dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, dest_dma))
+		goto dma_unmap;
+
 	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+		dma_srcs[i] = DMA_ERROR_CODE;
+	for (i = 0; i < IOAT_NUM_SRC_TEST; i++) {
 		dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
 					   DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma_srcs[i]))
+			goto dma_unmap;
+	}
 	tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
 				      IOAT_NUM_SRC_TEST, PAGE_SIZE,
 				      DMA_PREP_INTERRUPT);
@@ -1298,7 +1306,6 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 		goto dma_unmap;
 	}
 
-	dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
 	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
 		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
 
@@ -1313,6 +1320,8 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 	}
 	dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
 
+	dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
 	/* skip validate if the capability is not present */
 	if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
 		goto free_resources;
@@ -1327,8 +1336,13 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 	xor_val_result = 1;
 
 	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = DMA_ERROR_CODE;
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
 		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
 					   DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma_srcs[i]))
+			goto dma_unmap;
+	}
 	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
 					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
 					  &xor_val_result, DMA_PREP_INTERRUPT);
@@ -1374,8 +1388,13 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 
 	xor_val_result = 0;
 	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = DMA_ERROR_CODE;
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
 		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
 					   DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma_srcs[i]))
+			goto dma_unmap;
+	}
 	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
 					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
 					  &xor_val_result, DMA_PREP_INTERRUPT);
@@ -1417,14 +1436,18 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device)
 	goto free_resources;
 dma_unmap:
 	if (op == IOAT_OP_XOR) {
-		dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+		if (dest_dma != DMA_ERROR_CODE)
+			dma_unmap_page(dev, dest_dma, PAGE_SIZE,
+				       DMA_FROM_DEVICE);
 		for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
-			dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
-				       DMA_TO_DEVICE);
+			if (dma_srcs[i] != DMA_ERROR_CODE)
+				dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+					       DMA_TO_DEVICE);
 	} else if (op == IOAT_OP_XOR_VAL) {
 		for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
-			dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
-				       DMA_TO_DEVICE);
+			if (dma_srcs[i] != DMA_ERROR_CODE)
+				dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+					       DMA_TO_DEVICE);
 	}
 free_resources:
 	dma->device_free_chan_resources(dma_chan);

+ 0 - 1
drivers/dma/iop-adma.c

@@ -1557,7 +1557,6 @@ static struct platform_driver iop_adma_driver = {
 	.probe		= iop_adma_probe,
 	.remove		= iop_adma_remove,
 	.driver		= {
-		.owner	= THIS_MODULE,
 		.name	= "iop-adma",
 	},
 };

+ 2 - 2
drivers/dma/k3dma.c

@@ -722,7 +722,6 @@ static int k3_dma_probe(struct platform_device *op)
 	d->slave.device_issue_pending = k3_dma_issue_pending;
 	d->slave.device_control = k3_dma_control;
 	d->slave.copy_align = DMA_ALIGN;
-	d->slave.chancnt = d->dma_requests;
 
 	/* init virtual channel */
 	d->chans = devm_kzalloc(&op->dev,
@@ -787,6 +786,7 @@ static int k3_dma_remove(struct platform_device *op)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int k3_dma_suspend(struct device *dev)
 {
 	struct k3_dma_dev *d = dev_get_drvdata(dev);
@@ -816,13 +816,13 @@ static int k3_dma_resume(struct device *dev)
 	k3_dma_enable_dma(d, true);
 	return 0;
 }
+#endif
 
 static SIMPLE_DEV_PM_OPS(k3_dma_pmops, k3_dma_suspend, k3_dma_resume);
 
 static struct platform_driver k3_pdma_driver = {
 	.driver		= {
 		.name	= DRIVER_NAME,
-		.owner  = THIS_MODULE,
 		.pm	= &k3_dma_pmops,
 		.of_match_table = k3_pdma_dt_ids,
 	},

+ 0 - 1
drivers/dma/mmp_pdma.c

@@ -1098,7 +1098,6 @@ static const struct platform_device_id mmp_pdma_id_table[] = {
 static struct platform_driver mmp_pdma_driver = {
 	.driver		= {
 		.name	= "mmp-pdma",
-		.owner  = THIS_MODULE,
 		.of_match_table = mmp_pdma_dt_ids,
 	},
 	.id_table	= mmp_pdma_id_table,

+ 0 - 1
drivers/dma/mmp_tdma.c

@@ -703,7 +703,6 @@ static const struct platform_device_id mmp_tdma_id_table[] = {
 static struct platform_driver mmp_tdma_driver = {
 	.driver		= {
 		.name	= "mmp-tdma",
-		.owner  = THIS_MODULE,
 		.of_match_table = mmp_tdma_dt_ids,
 	},
 	.id_table	= mmp_tdma_id_table,

+ 7 - 6
drivers/dma/mpc512x_dma.c

@@ -885,6 +885,7 @@ static int mpc_dma_probe(struct platform_device *op)
 	struct resource res;
 	ulong regs_start, regs_size;
 	int retval, i;
+	u8 chancnt;
 
 	mdma = devm_kzalloc(dev, sizeof(struct mpc_dma), GFP_KERNEL);
 	if (!mdma) {
@@ -956,10 +957,6 @@ static int mpc_dma_probe(struct platform_device *op)
 
 	dma = &mdma->dma;
 	dma->dev = dev;
-	if (mdma->is_mpc8308)
-		dma->chancnt = MPC8308_DMACHAN_MAX;
-	else
-		dma->chancnt = MPC512x_DMACHAN_MAX;
 	dma->device_alloc_chan_resources = mpc_dma_alloc_chan_resources;
 	dma->device_free_chan_resources = mpc_dma_free_chan_resources;
 	dma->device_issue_pending = mpc_dma_issue_pending;
@@ -972,7 +969,12 @@ static int mpc_dma_probe(struct platform_device *op)
 	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
 	dma_cap_set(DMA_SLAVE, dma->cap_mask);
 
-	for (i = 0; i < dma->chancnt; i++) {
+	if (mdma->is_mpc8308)
+		chancnt = MPC8308_DMACHAN_MAX;
+	else
+		chancnt = MPC512x_DMACHAN_MAX;
+
+	for (i = 0; i < chancnt; i++) {
 		mchan = &mdma->channels[i];
 
 		mchan->chan.device = dma;
@@ -1090,7 +1092,6 @@ static struct platform_driver mpc_dma_driver = {
 	.remove		= mpc_dma_remove,
 	.driver = {
 		.name = DRV_NAME,
-		.owner = THIS_MODULE,
 		.of_match_table	= mpc_dma_match,
 	},
 };

+ 0 - 1
drivers/dma/nbpfaxi.c

@@ -1500,7 +1500,6 @@ static const struct dev_pm_ops nbpf_pm_ops = {
 
 static struct platform_driver nbpf_driver = {
 	.driver = {
-		.owner = THIS_MODULE,
 		.name = "dma-nbpf",
 		.of_match_table = nbpf_match,
 		.pm = &nbpf_pm_ops,

+ 0 - 2
drivers/dma/omap-dma.c

@@ -1074,8 +1074,6 @@ static int omap_dma_chan_init(struct omap_dmadev *od, int dma_sig)
 	vchan_init(&c->vc, &od->ddev);
 	INIT_LIST_HEAD(&c->node);
 
-	od->ddev.chancnt++;
-
 	return 0;
 }
 

+ 1 - 1
drivers/dma/pch_dma.c

@@ -997,7 +997,7 @@ static void pch_dma_remove(struct pci_dev *pdev)
 #define PCI_DEVICE_ID_ML7831_DMA1_8CH	0x8810
 #define PCI_DEVICE_ID_ML7831_DMA2_4CH	0x8815
 
-const struct pci_device_id pch_dma_id_table[] = {
+static const struct pci_device_id pch_dma_id_table[] = {
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_4CH), 4 },
 	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA1_8CH), 8}, /* UART Video */

+ 4 - 3
drivers/dma/pl330.c

@@ -2619,6 +2619,9 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 		return -ENOMEM;
 	}
 
+	pd = &pl330->ddma;
+	pd->dev = &adev->dev;
+
 	pl330->mcbufsz = pdat ? pdat->mcbuf_sz : 0;
 
 	res = &adev->res;
@@ -2655,7 +2658,6 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	if (!add_desc(pl330, GFP_KERNEL, NR_DEFAULT_DESC))
 		dev_warn(&adev->dev, "unable to allocate desc\n");
 
-	pd = &pl330->ddma;
 	INIT_LIST_HEAD(&pd->channels);
 
 	/* Initialize channel parameters */
@@ -2692,7 +2694,6 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 		list_add_tail(&pch->chan.device_node, &pd->channels);
 	}
 
-	pd->dev = &adev->dev;
 	if (pdat) {
 		pd->cap_mask = pdat->cap_mask;
 	} else {
@@ -2819,6 +2820,6 @@ static struct amba_driver pl330_driver = {
 
 module_amba_driver(pl330_driver);
 
-MODULE_AUTHOR("Jaswinder Singh <jassi.brar@samsung.com>");
+MODULE_AUTHOR("Jaswinder Singh <jassisinghbrar@gmail.com>");
 MODULE_DESCRIPTION("API Driver for PL330 DMAC");
 MODULE_LICENSE("GPL");

+ 161 - 70
drivers/dma/qcom_bam_dma.c

@@ -79,35 +79,97 @@ struct bam_async_desc {
 	struct bam_desc_hw desc[0];
 };
 
-#define BAM_CTRL			0x0000
-#define BAM_REVISION			0x0004
-#define BAM_SW_REVISION			0x0080
-#define BAM_NUM_PIPES			0x003C
-#define BAM_TIMER			0x0040
-#define BAM_TIMER_CTRL			0x0044
-#define BAM_DESC_CNT_TRSHLD		0x0008
-#define BAM_IRQ_SRCS			0x000C
-#define BAM_IRQ_SRCS_MSK		0x0010
-#define BAM_IRQ_SRCS_UNMASKED		0x0030
-#define BAM_IRQ_STTS			0x0014
-#define BAM_IRQ_CLR			0x0018
-#define BAM_IRQ_EN			0x001C
-#define BAM_CNFG_BITS			0x007C
-#define BAM_IRQ_SRCS_EE(ee)		(0x0800 + ((ee) * 0x80))
-#define BAM_IRQ_SRCS_MSK_EE(ee)		(0x0804 + ((ee) * 0x80))
-#define BAM_P_CTRL(pipe)		(0x1000 + ((pipe) * 0x1000))
-#define BAM_P_RST(pipe)			(0x1004 + ((pipe) * 0x1000))
-#define BAM_P_HALT(pipe)		(0x1008 + ((pipe) * 0x1000))
-#define BAM_P_IRQ_STTS(pipe)		(0x1010 + ((pipe) * 0x1000))
-#define BAM_P_IRQ_CLR(pipe)		(0x1014 + ((pipe) * 0x1000))
-#define BAM_P_IRQ_EN(pipe)		(0x1018 + ((pipe) * 0x1000))
-#define BAM_P_EVNT_DEST_ADDR(pipe)	(0x182C + ((pipe) * 0x1000))
-#define BAM_P_EVNT_REG(pipe)		(0x1818 + ((pipe) * 0x1000))
-#define BAM_P_SW_OFSTS(pipe)		(0x1800 + ((pipe) * 0x1000))
-#define BAM_P_DATA_FIFO_ADDR(pipe)	(0x1824 + ((pipe) * 0x1000))
-#define BAM_P_DESC_FIFO_ADDR(pipe)	(0x181C + ((pipe) * 0x1000))
-#define BAM_P_EVNT_TRSHLD(pipe)		(0x1828 + ((pipe) * 0x1000))
-#define BAM_P_FIFO_SIZES(pipe)		(0x1820 + ((pipe) * 0x1000))
+enum bam_reg {
+	BAM_CTRL,
+	BAM_REVISION,
+	BAM_NUM_PIPES,
+	BAM_DESC_CNT_TRSHLD,
+	BAM_IRQ_SRCS,
+	BAM_IRQ_SRCS_MSK,
+	BAM_IRQ_SRCS_UNMASKED,
+	BAM_IRQ_STTS,
+	BAM_IRQ_CLR,
+	BAM_IRQ_EN,
+	BAM_CNFG_BITS,
+	BAM_IRQ_SRCS_EE,
+	BAM_IRQ_SRCS_MSK_EE,
+	BAM_P_CTRL,
+	BAM_P_RST,
+	BAM_P_HALT,
+	BAM_P_IRQ_STTS,
+	BAM_P_IRQ_CLR,
+	BAM_P_IRQ_EN,
+	BAM_P_EVNT_DEST_ADDR,
+	BAM_P_EVNT_REG,
+	BAM_P_SW_OFSTS,
+	BAM_P_DATA_FIFO_ADDR,
+	BAM_P_DESC_FIFO_ADDR,
+	BAM_P_EVNT_GEN_TRSHLD,
+	BAM_P_FIFO_SIZES,
+};
+
+struct reg_offset_data {
+	u32 base_offset;
+	unsigned int pipe_mult, evnt_mult, ee_mult;
+};
+
+static const struct reg_offset_data bam_v1_3_reg_info[] = {
+	[BAM_CTRL]		= { 0x0F80, 0x00, 0x00, 0x00 },
+	[BAM_REVISION]		= { 0x0F84, 0x00, 0x00, 0x00 },
+	[BAM_NUM_PIPES]		= { 0x0FBC, 0x00, 0x00, 0x00 },
+	[BAM_DESC_CNT_TRSHLD]	= { 0x0F88, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS]		= { 0x0F8C, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_MSK]	= { 0x0F90, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_UNMASKED]	= { 0x0FB0, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_STTS]		= { 0x0F94, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_CLR]		= { 0x0F98, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_EN]		= { 0x0F9C, 0x00, 0x00, 0x00 },
+	[BAM_CNFG_BITS]		= { 0x0FFC, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_EE]	= { 0x1800, 0x00, 0x00, 0x80 },
+	[BAM_IRQ_SRCS_MSK_EE]	= { 0x1804, 0x00, 0x00, 0x80 },
+	[BAM_P_CTRL]		= { 0x0000, 0x80, 0x00, 0x00 },
+	[BAM_P_RST]		= { 0x0004, 0x80, 0x00, 0x00 },
+	[BAM_P_HALT]		= { 0x0008, 0x80, 0x00, 0x00 },
+	[BAM_P_IRQ_STTS]	= { 0x0010, 0x80, 0x00, 0x00 },
+	[BAM_P_IRQ_CLR]		= { 0x0014, 0x80, 0x00, 0x00 },
+	[BAM_P_IRQ_EN]		= { 0x0018, 0x80, 0x00, 0x00 },
+	[BAM_P_EVNT_DEST_ADDR]	= { 0x102C, 0x00, 0x40, 0x00 },
+	[BAM_P_EVNT_REG]	= { 0x1018, 0x00, 0x40, 0x00 },
+	[BAM_P_SW_OFSTS]	= { 0x1000, 0x00, 0x40, 0x00 },
+	[BAM_P_DATA_FIFO_ADDR]	= { 0x1024, 0x00, 0x40, 0x00 },
+	[BAM_P_DESC_FIFO_ADDR]	= { 0x101C, 0x00, 0x40, 0x00 },
+	[BAM_P_EVNT_GEN_TRSHLD]	= { 0x1028, 0x00, 0x40, 0x00 },
+	[BAM_P_FIFO_SIZES]	= { 0x1020, 0x00, 0x40, 0x00 },
+};
+
+static const struct reg_offset_data bam_v1_4_reg_info[] = {
+	[BAM_CTRL]		= { 0x0000, 0x00, 0x00, 0x00 },
+	[BAM_REVISION]		= { 0x0004, 0x00, 0x00, 0x00 },
+	[BAM_NUM_PIPES]		= { 0x003C, 0x00, 0x00, 0x00 },
+	[BAM_DESC_CNT_TRSHLD]	= { 0x0008, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS]		= { 0x000C, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_MSK]	= { 0x0010, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_UNMASKED]	= { 0x0030, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_STTS]		= { 0x0014, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_CLR]		= { 0x0018, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_EN]		= { 0x001C, 0x00, 0x00, 0x00 },
+	[BAM_CNFG_BITS]		= { 0x007C, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_EE]	= { 0x0800, 0x00, 0x00, 0x80 },
+	[BAM_IRQ_SRCS_MSK_EE]	= { 0x0804, 0x00, 0x00, 0x80 },
+	[BAM_P_CTRL]		= { 0x1000, 0x1000, 0x00, 0x00 },
+	[BAM_P_RST]		= { 0x1004, 0x1000, 0x00, 0x00 },
+	[BAM_P_HALT]		= { 0x1008, 0x1000, 0x00, 0x00 },
+	[BAM_P_IRQ_STTS]	= { 0x1010, 0x1000, 0x00, 0x00 },
+	[BAM_P_IRQ_CLR]		= { 0x1014, 0x1000, 0x00, 0x00 },
+	[BAM_P_IRQ_EN]		= { 0x1018, 0x1000, 0x00, 0x00 },
+	[BAM_P_EVNT_DEST_ADDR]	= { 0x102C, 0x00, 0x1000, 0x00 },
+	[BAM_P_EVNT_REG]	= { 0x1018, 0x00, 0x1000, 0x00 },
+	[BAM_P_SW_OFSTS]	= { 0x1000, 0x00, 0x1000, 0x00 },
+	[BAM_P_DATA_FIFO_ADDR]	= { 0x1824, 0x00, 0x1000, 0x00 },
+	[BAM_P_DESC_FIFO_ADDR]	= { 0x181C, 0x00, 0x1000, 0x00 },
+	[BAM_P_EVNT_GEN_TRSHLD]	= { 0x1828, 0x00, 0x1000, 0x00 },
+	[BAM_P_FIFO_SIZES]	= { 0x1820, 0x00, 0x1000, 0x00 },
+};
 
 /* BAM CTRL */
 #define BAM_SW_RST			BIT(0)
@@ -297,6 +359,8 @@ struct bam_device {
 	/* execution environment ID, from DT */
 	u32 ee;
 
+	const struct reg_offset_data *layout;
+
 	struct clk *bamclk;
 	int irq;
 
@@ -304,6 +368,23 @@ struct bam_device {
 	struct tasklet_struct task;
 };
 
+/**
+ * bam_addr - returns BAM register address
+ * @bdev: bam device
+ * @pipe: pipe instance (ignored when register doesn't have multiple instances)
+ * @reg:  register enum
+ */
+static inline void __iomem *bam_addr(struct bam_device *bdev, u32 pipe,
+		enum bam_reg reg)
+{
+	const struct reg_offset_data r = bdev->layout[reg];
+
+	return bdev->regs + r.base_offset +
+		r.pipe_mult * pipe +
+		r.evnt_mult * pipe +
+		r.ee_mult * bdev->ee;
+}
+
 /**
  * bam_reset_channel - Reset individual BAM DMA channel
  * @bchan: bam channel
@@ -317,8 +398,8 @@ static void bam_reset_channel(struct bam_chan *bchan)
 	lockdep_assert_held(&bchan->vc.lock);
 
 	/* reset channel */
-	writel_relaxed(1, bdev->regs + BAM_P_RST(bchan->id));
-	writel_relaxed(0, bdev->regs + BAM_P_RST(bchan->id));
+	writel_relaxed(1, bam_addr(bdev, bchan->id, BAM_P_RST));
+	writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_RST));
 
 	/* don't allow cpu to reorder BAM register accesses done after this */
 	wmb();
@@ -347,17 +428,18 @@ static void bam_chan_init_hw(struct bam_chan *bchan,
 	 * because we allocated 1 more descriptor (8 bytes) than we can use
 	 */
 	writel_relaxed(ALIGN(bchan->fifo_phys, sizeof(struct bam_desc_hw)),
-			bdev->regs + BAM_P_DESC_FIFO_ADDR(bchan->id));
-	writel_relaxed(BAM_DESC_FIFO_SIZE, bdev->regs +
-			BAM_P_FIFO_SIZES(bchan->id));
+			bam_addr(bdev, bchan->id, BAM_P_DESC_FIFO_ADDR));
+	writel_relaxed(BAM_DESC_FIFO_SIZE,
+			bam_addr(bdev, bchan->id, BAM_P_FIFO_SIZES));
 
 	/* enable the per pipe interrupts, enable EOT, ERR, and INT irqs */
-	writel_relaxed(P_DEFAULT_IRQS_EN, bdev->regs + BAM_P_IRQ_EN(bchan->id));
+	writel_relaxed(P_DEFAULT_IRQS_EN,
+			bam_addr(bdev, bchan->id, BAM_P_IRQ_EN));
 
 	/* unmask the specific pipe and EE combo */
-	val = readl_relaxed(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
 	val |= BIT(bchan->id);
-	writel_relaxed(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
 
 	/* don't allow cpu to reorder the channel enable done below */
 	wmb();
@@ -367,7 +449,7 @@ static void bam_chan_init_hw(struct bam_chan *bchan,
 	if (dir == DMA_DEV_TO_MEM)
 		val |= P_DIRECTION;
 
-	writel_relaxed(val, bdev->regs + BAM_P_CTRL(bchan->id));
+	writel_relaxed(val, bam_addr(bdev, bchan->id, BAM_P_CTRL));
 
 	bchan->initialized = 1;
 
@@ -432,12 +514,12 @@ static void bam_free_chan(struct dma_chan *chan)
 	bchan->fifo_virt = NULL;
 
 	/* mask irq for pipe/channel */
-	val = readl_relaxed(bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
 	val &= ~BIT(bchan->id);
-	writel_relaxed(val, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
 
 	/* disable irq */
-	writel_relaxed(0, bdev->regs + BAM_P_IRQ_EN(bchan->id));
+	writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_IRQ_EN));
 }
 
 /**
@@ -583,14 +665,14 @@ static int bam_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	switch (cmd) {
 	case DMA_PAUSE:
 		spin_lock_irqsave(&bchan->vc.lock, flag);
-		writel_relaxed(1, bdev->regs + BAM_P_HALT(bchan->id));
+		writel_relaxed(1, bam_addr(bdev, bchan->id, BAM_P_HALT));
 		bchan->paused = 1;
 		spin_unlock_irqrestore(&bchan->vc.lock, flag);
 		break;
 
 	case DMA_RESUME:
 		spin_lock_irqsave(&bchan->vc.lock, flag);
-		writel_relaxed(0, bdev->regs + BAM_P_HALT(bchan->id));
+		writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_HALT));
 		bchan->paused = 0;
 		spin_unlock_irqrestore(&bchan->vc.lock, flag);
 		break;
@@ -626,7 +708,7 @@ static u32 process_channel_irqs(struct bam_device *bdev)
 	unsigned long flags;
 	struct bam_async_desc *async_desc;
 
-	srcs = readl_relaxed(bdev->regs + BAM_IRQ_SRCS_EE(bdev->ee));
+	srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE));
 
 	/* return early if no pipe/channel interrupts are present */
 	if (!(srcs & P_IRQ))
@@ -639,11 +721,9 @@ static u32 process_channel_irqs(struct bam_device *bdev)
 			continue;
 
 		/* clear pipe irq */
-		pipe_stts = readl_relaxed(bdev->regs +
-			BAM_P_IRQ_STTS(i));
+		pipe_stts = readl_relaxed(bam_addr(bdev, i, BAM_P_IRQ_STTS));
 
-		writel_relaxed(pipe_stts, bdev->regs +
-				BAM_P_IRQ_CLR(i));
+		writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR));
 
 		spin_lock_irqsave(&bchan->vc.lock, flags);
 		async_desc = bchan->curr_txd;
@@ -694,12 +774,12 @@ static irqreturn_t bam_dma_irq(int irq, void *data)
 		tasklet_schedule(&bdev->task);
 
 	if (srcs & BAM_IRQ)
-		clr_mask = readl_relaxed(bdev->regs + BAM_IRQ_STTS);
+		clr_mask = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_STTS));
 
 	/* don't allow reorder of the various accesses to the BAM registers */
 	mb();
 
-	writel_relaxed(clr_mask, bdev->regs + BAM_IRQ_CLR);
+	writel_relaxed(clr_mask, bam_addr(bdev, 0, BAM_IRQ_CLR));
 
 	return IRQ_HANDLED;
 }
@@ -763,7 +843,7 @@ static void bam_apply_new_config(struct bam_chan *bchan,
 	else
 		maxburst = bchan->slave.dst_maxburst;
 
-	writel_relaxed(maxburst, bdev->regs + BAM_DESC_CNT_TRSHLD);
+	writel_relaxed(maxburst, bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
 
 	bchan->reconfigure = 0;
 }
@@ -830,7 +910,7 @@ static void bam_start_dma(struct bam_chan *bchan)
 	/* ensure descriptor writes and dma start not reordered */
 	wmb();
 	writel_relaxed(bchan->tail * sizeof(struct bam_desc_hw),
-			bdev->regs + BAM_P_EVNT_REG(bchan->id));
+			bam_addr(bdev, bchan->id, BAM_P_EVNT_REG));
 }
 
 /**
@@ -918,43 +998,44 @@ static int bam_init(struct bam_device *bdev)
 	u32 val;
 
 	/* read revision and configuration information */
-	val = readl_relaxed(bdev->regs + BAM_REVISION) >> NUM_EES_SHIFT;
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION)) >> NUM_EES_SHIFT;
 	val &= NUM_EES_MASK;
 
 	/* check that configured EE is within range */
 	if (bdev->ee >= val)
 		return -EINVAL;
 
-	val = readl_relaxed(bdev->regs + BAM_NUM_PIPES);
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES));
 	bdev->num_channels = val & BAM_NUM_PIPES_MASK;
 
 	/* s/w reset bam */
 	/* after reset all pipes are disabled and idle */
-	val = readl_relaxed(bdev->regs + BAM_CTRL);
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_CTRL));
 	val |= BAM_SW_RST;
-	writel_relaxed(val, bdev->regs + BAM_CTRL);
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
 	val &= ~BAM_SW_RST;
-	writel_relaxed(val, bdev->regs + BAM_CTRL);
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
 
 	/* make sure previous stores are visible before enabling BAM */
 	wmb();
 
 	/* enable bam */
 	val |= BAM_EN;
-	writel_relaxed(val, bdev->regs + BAM_CTRL);
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
 
 	/* set descriptor threshhold, start with 4 bytes */
-	writel_relaxed(DEFAULT_CNT_THRSHLD, bdev->regs + BAM_DESC_CNT_TRSHLD);
+	writel_relaxed(DEFAULT_CNT_THRSHLD,
+			bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
 
 	/* Enable default set of h/w workarounds, ie all except BAM_FULL_PIPE */
-	writel_relaxed(BAM_CNFG_BITS_DEFAULT, bdev->regs + BAM_CNFG_BITS);
+	writel_relaxed(BAM_CNFG_BITS_DEFAULT, bam_addr(bdev, 0, BAM_CNFG_BITS));
 
 	/* enable irqs for errors */
 	writel_relaxed(BAM_ERROR_EN | BAM_HRESP_ERR_EN,
-				bdev->regs + BAM_IRQ_EN);
+			bam_addr(bdev, 0, BAM_IRQ_EN));
 
 	/* unmask global bam interrupt */
-	writel_relaxed(BAM_IRQ_MSK, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	writel_relaxed(BAM_IRQ_MSK, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
 
 	return 0;
 }
@@ -969,9 +1050,18 @@ static void bam_channel_init(struct bam_device *bdev, struct bam_chan *bchan,
 	bchan->vc.desc_free = bam_dma_free_desc;
 }
 
+static const struct of_device_id bam_of_match[] = {
+	{ .compatible = "qcom,bam-v1.3.0", .data = &bam_v1_3_reg_info },
+	{ .compatible = "qcom,bam-v1.4.0", .data = &bam_v1_4_reg_info },
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, bam_of_match);
+
 static int bam_dma_probe(struct platform_device *pdev)
 {
 	struct bam_device *bdev;
+	const struct of_device_id *match;
 	struct resource *iores;
 	int ret, i;
 
@@ -981,6 +1071,14 @@ static int bam_dma_probe(struct platform_device *pdev)
 
 	bdev->dev = &pdev->dev;
 
+	match = of_match_node(bam_of_match, pdev->dev.of_node);
+	if (!match) {
+		dev_err(&pdev->dev, "Unsupported BAM module\n");
+		return -ENODEV;
+	}
+
+	bdev->layout = match->data;
+
 	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	bdev->regs = devm_ioremap_resource(&pdev->dev, iores);
 	if (IS_ERR(bdev->regs))
@@ -1084,7 +1182,7 @@ static int bam_dma_remove(struct platform_device *pdev)
 	dma_async_device_unregister(&bdev->common);
 
 	/* mask all interrupts for this execution environment */
-	writel_relaxed(0, bdev->regs + BAM_IRQ_SRCS_MSK_EE(bdev->ee));
+	writel_relaxed(0, bam_addr(bdev, 0,  BAM_IRQ_SRCS_MSK_EE));
 
 	devm_free_irq(bdev->dev, bdev->irq, bdev);
 
@@ -1104,18 +1202,11 @@ static int bam_dma_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct of_device_id bam_of_match[] = {
-	{ .compatible = "qcom,bam-v1.4.0", },
-	{}
-};
-MODULE_DEVICE_TABLE(of, bam_of_match);
-
 static struct platform_driver bam_dma_driver = {
 	.probe = bam_dma_probe,
 	.remove = bam_dma_remove,
 	.driver = {
 		.name = "bam-dma-engine",
-		.owner = THIS_MODULE,
 		.of_match_table = bam_of_match,
 	},
 };

+ 0 - 1
drivers/dma/s3c24xx-dma.c

@@ -1402,7 +1402,6 @@ static int s3c24xx_dma_remove(struct platform_device *pdev)
 static struct platform_driver s3c24xx_dma_driver = {
 	.driver		= {
 		.name	= "s3c24xx-dma",
-		.owner	= THIS_MODULE,
 	},
 	.id_table	= s3c24xx_dma_driver_ids,
 	.probe		= s3c24xx_dma_probe,

+ 1 - 2
drivers/dma/sa11x0-dma.c

@@ -829,7 +829,6 @@ static int sa11x0_dma_init_dmadev(struct dma_device *dmadev,
 {
 	unsigned i;
 
-	dmadev->chancnt = ARRAY_SIZE(chan_desc);
 	INIT_LIST_HEAD(&dmadev->channels);
 	dmadev->dev = dev;
 	dmadev->device_alloc_chan_resources = sa11x0_dma_alloc_chan_resources;
@@ -838,7 +837,7 @@ static int sa11x0_dma_init_dmadev(struct dma_device *dmadev,
 	dmadev->device_tx_status = sa11x0_dma_tx_status;
 	dmadev->device_issue_pending = sa11x0_dma_issue_pending;
 
-	for (i = 0; i < dmadev->chancnt; i++) {
+	for (i = 0; i < ARRAY_SIZE(chan_desc); i++) {
 		struct sa11x0_dma_chan *c;
 
 		c = kzalloc(sizeof(*c), GFP_KERNEL);

+ 0 - 3
drivers/dma/sh/rcar-audmapp.c

@@ -253,7 +253,6 @@ static int audmapp_chan_probe(struct platform_device *pdev,
 
 static void audmapp_chan_remove(struct audmapp_device *audev)
 {
-	struct dma_device *dma_dev = &audev->shdma_dev.dma_dev;
 	struct shdma_chan *schan;
 	int i;
 
@@ -261,7 +260,6 @@ static void audmapp_chan_remove(struct audmapp_device *audev)
 		BUG_ON(!schan);
 		shdma_chan_remove(schan);
 	}
-	dma_dev->chancnt = 0;
 }
 
 static struct dma_chan *audmapp_of_xlate(struct of_phandle_args *dma_spec,
@@ -367,7 +365,6 @@ static struct platform_driver audmapp_driver = {
 	.probe		= audmapp_probe,
 	.remove		= audmapp_remove,
 	.driver		= {
-		.owner	= THIS_MODULE,
 		.name	= "rcar-audmapp-engine",
 		.of_match_table = audmapp_of_match,
 	},

+ 0 - 3
drivers/dma/sh/rcar-hpbdma.c

@@ -619,7 +619,6 @@ error:
 
 static void hpb_dmae_chan_remove(struct hpb_dmae_device *hpbdev)
 {
-	struct dma_device *dma_dev = &hpbdev->shdma_dev.dma_dev;
 	struct shdma_chan *schan;
 	int i;
 
@@ -628,7 +627,6 @@ static void hpb_dmae_chan_remove(struct hpb_dmae_device *hpbdev)
 
 		shdma_chan_remove(schan);
 	}
-	dma_dev->chancnt = 0;
 }
 
 static int hpb_dmae_remove(struct platform_device *pdev)
@@ -655,7 +653,6 @@ static struct platform_driver hpb_dmae_driver = {
 	.remove		= hpb_dmae_remove,
 	.shutdown	= hpb_dmae_shutdown,
 	.driver = {
-		.owner	= THIS_MODULE,
 		.name	= "hpb-dma-engine",
 	},
 };

+ 3 - 1
drivers/dma/sh/shdma-base.c

@@ -391,6 +391,8 @@ static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all)
 				dev_dbg(schan->dev, "Bring down channel %d\n", schan->id);
 				pm_runtime_put(schan->dev);
 				schan->pm_state = SHDMA_PM_ESTABLISHED;
+			} else if (schan->pm_state == SHDMA_PM_PENDING) {
+				shdma_chan_xfer_ld_queue(schan);
 			}
 		}
 	}
@@ -951,7 +953,7 @@ void shdma_chan_probe(struct shdma_dev *sdev,
 	/* Add the channel to DMA device channel list */
 	list_add_tail(&schan->dma_chan.device_node,
 			&sdev->dma_dev.channels);
-	sdev->schan[sdev->dma_dev.chancnt++] = schan;
+	sdev->schan[id] = schan;
 }
 EXPORT_SYMBOL(shdma_chan_probe);
 

+ 0 - 1
drivers/dma/sh/shdma-of.c

@@ -66,7 +66,6 @@ MODULE_DEVICE_TABLE(of, sh_dmae_of_match);
 
 static struct platform_driver shdma_of = {
 	.driver		= {
-		.owner	= THIS_MODULE,
 		.name	= "shdma-of",
 		.of_match_table = shdma_of_match,
 	},

+ 0 - 2
drivers/dma/sh/shdmac.c

@@ -572,7 +572,6 @@ err_no_irq:
 
 static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
 {
-	struct dma_device *dma_dev = &shdev->shdma_dev.dma_dev;
 	struct shdma_chan *schan;
 	int i;
 
@@ -581,7 +580,6 @@ static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
 
 		shdma_chan_remove(schan);
 	}
-	dma_dev->chancnt = 0;
 }
 
 static void sh_dmae_shutdown(struct platform_device *pdev)

+ 0 - 3
drivers/dma/sh/sudmac.c

@@ -295,7 +295,6 @@ err_no_irq:
 
 static void sudmac_chan_remove(struct sudmac_device *su_dev)
 {
-	struct dma_device *dma_dev = &su_dev->shdma_dev.dma_dev;
 	struct shdma_chan *schan;
 	int i;
 
@@ -304,7 +303,6 @@ static void sudmac_chan_remove(struct sudmac_device *su_dev)
 
 		shdma_chan_remove(schan);
 	}
-	dma_dev->chancnt = 0;
 }
 
 static dma_addr_t sudmac_slave_addr(struct shdma_chan *schan)
@@ -411,7 +409,6 @@ static int sudmac_remove(struct platform_device *pdev)
 
 static struct platform_driver sudmac_driver = {
 	.driver		= {
-		.owner	= THIS_MODULE,
 		.name	= SUDMAC_DRV_NAME,
 	},
 	.probe		= sudmac_probe,

+ 3 - 2
drivers/dma/sirf-dma.c

@@ -735,7 +735,6 @@ static int sirfsoc_dma_probe(struct platform_device *op)
 
 	dma = &sdma->dma;
 	dma->dev = dev;
-	dma->chancnt = SIRFSOC_DMA_CHANNELS;
 
 	dma->device_alloc_chan_resources = sirfsoc_dma_alloc_chan_resources;
 	dma->device_free_chan_resources = sirfsoc_dma_free_chan_resources;
@@ -752,7 +751,7 @@ static int sirfsoc_dma_probe(struct platform_device *op)
 	dma_cap_set(DMA_INTERLEAVE, dma->cap_mask);
 	dma_cap_set(DMA_PRIVATE, dma->cap_mask);
 
-	for (i = 0; i < dma->chancnt; i++) {
+	for (i = 0; i < SIRFSOC_DMA_CHANNELS; i++) {
 		schan = &sdma->channels[i];
 
 		schan->chan.device = dma;
@@ -835,6 +834,7 @@ static int sirfsoc_dma_runtime_resume(struct device *dev)
 	return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int sirfsoc_dma_pm_suspend(struct device *dev)
 {
 	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
@@ -916,6 +916,7 @@ static int sirfsoc_dma_pm_resume(struct device *dev)
 
 	return 0;
 }
+#endif
 
 static const struct dev_pm_ops sirfsoc_dma_pm_ops = {
 	SET_RUNTIME_PM_OPS(sirfsoc_dma_runtime_suspend, sirfsoc_dma_runtime_resume, NULL)

+ 1 - 0
drivers/dma/ste_dma40.c

@@ -3432,6 +3432,7 @@ static int __init d40_lcla_allocate(struct d40_base *base)
 
 			d40_err(base->dev, "Failed to allocate %d pages.\n",
 				base->lcla_pool.pages);
+			ret = -ENOMEM;
 
 			for (j = 0; j < i; j++)
 				free_pages(page_list[j], base->lcla_pool.pages);

+ 87 - 35
drivers/dma/sun6i-dma.c

@@ -18,6 +18,7 @@
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/of_dma.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/slab.h>
@@ -25,24 +26,6 @@
 
 #include "virt-dma.h"
 
-/*
- * There's 16 physical channels that can work in parallel.
- *
- * However we have 30 different endpoints for our requests.
- *
- * Since the channels are able to handle only an unidirectional
- * transfer, we need to allocate more virtual channels so that
- * everyone can grab one channel.
- *
- * Some devices can't work in both direction (mostly because it
- * wouldn't make sense), so we have a bit fewer virtual channels than
- * 2 channels per endpoints.
- */
-
-#define NR_MAX_CHANNELS		16
-#define NR_MAX_REQUESTS		30
-#define NR_MAX_VCHANS		53
-
 /*
  * Common registers
  */
@@ -59,6 +42,12 @@
 
 #define DMA_STAT		0x30
 
+/*
+ * sun8i specific registers
+ */
+#define SUN8I_DMA_GATE		0x20
+#define SUN8I_DMA_GATE_ENABLE	0x4
+
 /*
  * Channels specific registers
  */
@@ -101,6 +90,19 @@
 #define NORMAL_WAIT	8
 #define DRQ_SDRAM	1
 
+/*
+ * Hardware channels / ports representation
+ *
+ * The hardware is used in several SoCs, with differing numbers
+ * of channels and endpoints. This structure ties those numbers
+ * to a certain compatible string.
+ */
+struct sun6i_dma_config {
+	u32 nr_max_channels;
+	u32 nr_max_requests;
+	u32 nr_max_vchans;
+};
+
 /*
  * Hardware representation of the LLI
  *
@@ -159,6 +161,7 @@ struct sun6i_dma_dev {
 	struct dma_pool		*pool;
 	struct sun6i_pchan	*pchans;
 	struct sun6i_vchan	*vchans;
+	const struct sun6i_dma_config *cfg;
 };
 
 static struct device *chan2dev(struct dma_chan *chan)
@@ -426,6 +429,7 @@ static int sun6i_dma_start_desc(struct sun6i_vchan *vchan)
 static void sun6i_dma_tasklet(unsigned long data)
 {
 	struct sun6i_dma_dev *sdev = (struct sun6i_dma_dev *)data;
+	const struct sun6i_dma_config *cfg = sdev->cfg;
 	struct sun6i_vchan *vchan;
 	struct sun6i_pchan *pchan;
 	unsigned int pchan_alloc = 0;
@@ -453,7 +457,7 @@ static void sun6i_dma_tasklet(unsigned long data)
 	}
 
 	spin_lock_irq(&sdev->lock);
-	for (pchan_idx = 0; pchan_idx < NR_MAX_CHANNELS; pchan_idx++) {
+	for (pchan_idx = 0; pchan_idx < cfg->nr_max_channels; pchan_idx++) {
 		pchan = &sdev->pchans[pchan_idx];
 
 		if (pchan->vchan || list_empty(&sdev->pending))
@@ -474,7 +478,7 @@ static void sun6i_dma_tasklet(unsigned long data)
 	}
 	spin_unlock_irq(&sdev->lock);
 
-	for (pchan_idx = 0; pchan_idx < NR_MAX_CHANNELS; pchan_idx++) {
+	for (pchan_idx = 0; pchan_idx < cfg->nr_max_channels; pchan_idx++) {
 		if (!(pchan_alloc & BIT(pchan_idx)))
 			continue;
 
@@ -496,7 +500,7 @@ static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id)
 	int i, j, ret = IRQ_NONE;
 	u32 status;
 
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < sdev->cfg->nr_max_channels / DMA_IRQ_CHAN_NR; i++) {
 		status = readl(sdev->base + DMA_IRQ_STAT(i));
 		if (!status)
 			continue;
@@ -506,7 +510,7 @@ static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id)
 
 		writel(status, sdev->base + DMA_IRQ_STAT(i));
 
-		for (j = 0; (j < 8) && status; j++) {
+		for (j = 0; (j < DMA_IRQ_CHAN_NR) && status; j++) {
 			if (status & DMA_IRQ_QUEUE) {
 				pchan = sdev->pchans + j;
 				vchan = pchan->vchan;
@@ -519,7 +523,7 @@ static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id)
 				}
 			}
 
-			status = status >> 4;
+			status = status >> DMA_IRQ_CHAN_WIDTH;
 		}
 
 		if (!atomic_read(&sdev->tasklet_shutdown))
@@ -815,7 +819,7 @@ static struct dma_chan *sun6i_dma_of_xlate(struct of_phandle_args *dma_spec,
 	struct dma_chan *chan;
 	u8 port = dma_spec->args[0];
 
-	if (port > NR_MAX_REQUESTS)
+	if (port > sdev->cfg->nr_max_requests)
 		return NULL;
 
 	chan = dma_get_any_slave_channel(&sdev->slave);
@@ -848,7 +852,7 @@ static inline void sun6i_dma_free(struct sun6i_dma_dev *sdev)
 {
 	int i;
 
-	for (i = 0; i < NR_MAX_VCHANS; i++) {
+	for (i = 0; i < sdev->cfg->nr_max_vchans; i++) {
 		struct sun6i_vchan *vchan = &sdev->vchans[i];
 
 		list_del(&vchan->vc.chan.device_node);
@@ -856,8 +860,48 @@ static inline void sun6i_dma_free(struct sun6i_dma_dev *sdev)
 	}
 }
 
+/*
+ * For A31:
+ *
+ * There's 16 physical channels that can work in parallel.
+ *
+ * However we have 30 different endpoints for our requests.
+ *
+ * Since the channels are able to handle only an unidirectional
+ * transfer, we need to allocate more virtual channels so that
+ * everyone can grab one channel.
+ *
+ * Some devices can't work in both direction (mostly because it
+ * wouldn't make sense), so we have a bit fewer virtual channels than
+ * 2 channels per endpoints.
+ */
+
+static struct sun6i_dma_config sun6i_a31_dma_cfg = {
+	.nr_max_channels = 16,
+	.nr_max_requests = 30,
+	.nr_max_vchans   = 53,
+};
+
+/*
+ * The A23 only has 8 physical channels, a maximum DRQ port id of 24,
+ * and a total of 37 usable source and destination endpoints.
+ */
+
+static struct sun6i_dma_config sun8i_a23_dma_cfg = {
+	.nr_max_channels = 8,
+	.nr_max_requests = 24,
+	.nr_max_vchans   = 37,
+};
+
+static struct of_device_id sun6i_dma_match[] = {
+	{ .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg },
+	{ .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg },
+	{ /* sentinel */ }
+};
+
 static int sun6i_dma_probe(struct platform_device *pdev)
 {
+	const struct of_device_id *device;
 	struct sun6i_dma_dev *sdc;
 	struct resource *res;
 	int ret, i;
@@ -866,6 +910,11 @@ static int sun6i_dma_probe(struct platform_device *pdev)
 	if (!sdc)
 		return -ENOMEM;
 
+	device = of_match_device(sun6i_dma_match, &pdev->dev);
+	if (!device)
+		return -ENODEV;
+	sdc->cfg = device->data;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	sdc->base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(sdc->base))
@@ -912,31 +961,30 @@ static int sun6i_dma_probe(struct platform_device *pdev)
 	sdc->slave.device_prep_slave_sg		= sun6i_dma_prep_slave_sg;
 	sdc->slave.device_prep_dma_memcpy	= sun6i_dma_prep_dma_memcpy;
 	sdc->slave.device_control		= sun6i_dma_control;
-	sdc->slave.chancnt			= NR_MAX_VCHANS;
 	sdc->slave.copy_align			= 4;
 
 	sdc->slave.dev = &pdev->dev;
 
-	sdc->pchans = devm_kcalloc(&pdev->dev, NR_MAX_CHANNELS,
+	sdc->pchans = devm_kcalloc(&pdev->dev, sdc->cfg->nr_max_channels,
 				   sizeof(struct sun6i_pchan), GFP_KERNEL);
 	if (!sdc->pchans)
 		return -ENOMEM;
 
-	sdc->vchans = devm_kcalloc(&pdev->dev, NR_MAX_VCHANS,
+	sdc->vchans = devm_kcalloc(&pdev->dev, sdc->cfg->nr_max_vchans,
 				   sizeof(struct sun6i_vchan), GFP_KERNEL);
 	if (!sdc->vchans)
 		return -ENOMEM;
 
 	tasklet_init(&sdc->task, sun6i_dma_tasklet, (unsigned long)sdc);
 
-	for (i = 0; i < NR_MAX_CHANNELS; i++) {
+	for (i = 0; i < sdc->cfg->nr_max_channels; i++) {
 		struct sun6i_pchan *pchan = &sdc->pchans[i];
 
 		pchan->idx = i;
 		pchan->base = sdc->base + 0x100 + i * 0x40;
 	}
 
-	for (i = 0; i < NR_MAX_VCHANS; i++) {
+	for (i = 0; i < sdc->cfg->nr_max_vchans; i++) {
 		struct sun6i_vchan *vchan = &sdc->vchans[i];
 
 		INIT_LIST_HEAD(&vchan->node);
@@ -976,6 +1024,15 @@ static int sun6i_dma_probe(struct platform_device *pdev)
 		goto err_dma_unregister;
 	}
 
+	/*
+	 * sun8i variant requires us to toggle a dma gating register,
+	 * as seen in Allwinner's SDK. This register is not documented
+	 * in the A23 user manual.
+	 */
+	if (of_device_is_compatible(pdev->dev.of_node,
+				    "allwinner,sun8i-a23-dma"))
+		writel(SUN8I_DMA_GATE_ENABLE, sdc->base + SUN8I_DMA_GATE);
+
 	return 0;
 
 err_dma_unregister:
@@ -1008,11 +1065,6 @@ static int sun6i_dma_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static struct of_device_id sun6i_dma_match[] = {
-	{ .compatible = "allwinner,sun6i-a31-dma" },
-	{ /* sentinel */ }
-};
-
 static struct platform_driver sun6i_dma_driver = {
 	.probe		= sun6i_dma_probe,
 	.remove		= sun6i_dma_remove,

+ 0 - 1
drivers/dma/tegra20-apb-dma.c

@@ -1597,7 +1597,6 @@ static const struct dev_pm_ops tegra_dma_dev_pm_ops = {
 static struct platform_driver tegra_dmac_driver = {
 	.driver = {
 		.name	= "tegra-apbdma",
-		.owner = THIS_MODULE,
 		.pm	= &tegra_dma_dev_pm_ops,
 		.of_match_table = tegra_dma_of_match,
 	},

+ 0 - 1
drivers/dma/timb_dma.c

@@ -783,7 +783,6 @@ static int td_remove(struct platform_device *pdev)
 static struct platform_driver td_driver = {
 	.driver = {
 		.name	= DRIVER_NAME,
-		.owner  = THIS_MODULE,
 	},
 	.probe	= td_probe,
 	.remove	= td_remove,

+ 9 - 4
drivers/dma/xilinx/xilinx_vdma.c

@@ -942,6 +942,9 @@ xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan,
 	if (!xt->numf || !xt->sgl[0].size)
 		return NULL;
 
+	if (xt->frame_size != 1)
+		return NULL;
+
 	/* Allocate a transaction descriptor. */
 	desc = xilinx_vdma_alloc_tx_descriptor(chan);
 	if (!desc)
@@ -960,7 +963,7 @@ xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan,
 	hw = &segment->hw;
 	hw->vsize = xt->numf;
 	hw->hsize = xt->sgl[0].size;
-	hw->stride = xt->sgl[0].icg <<
+	hw->stride = (xt->sgl[0].icg + xt->sgl[0].size) <<
 			XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT;
 	hw->stride |= chan->config.frm_dly <<
 			XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT;
@@ -971,9 +974,11 @@ xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan,
 		hw->buf_addr = xt->src_start;
 
 	/* Link the previous next descriptor to current */
-	prev = list_last_entry(&desc->segments,
-				struct xilinx_vdma_tx_segment, node);
-	prev->hw.next_desc = segment->phys;
+	if (!list_empty(&desc->segments)) {
+		prev = list_last_entry(&desc->segments,
+				       struct xilinx_vdma_tx_segment, node);
+		prev->hw.next_desc = segment->phys;
+	}
 
 	/* Insert the segment into the descriptor segments list. */
 	list_add_tail(&segment->node, &desc->segments);

+ 25 - 0
include/dt-bindings/dma/at91.h

@@ -9,6 +9,8 @@
 #ifndef __DT_BINDINGS_AT91_DMA_H__
 #define __DT_BINDINGS_AT91_DMA_H__
 
+/* ---------- HDMAC ---------- */
+
 /*
  * Source and/or destination peripheral ID
  */
@@ -24,4 +26,27 @@
 #define AT91_DMA_CFG_FIFOCFG_ALAP	(0x1 << AT91_DMA_CFG_FIFOCFG_OFFSET)	/* largest defined AHB burst */
 #define AT91_DMA_CFG_FIFOCFG_ASAP	(0x2 << AT91_DMA_CFG_FIFOCFG_OFFSET)	/* single AHB access */
 
+
+/* ---------- XDMAC ---------- */
+#define AT91_XDMAC_DT_MEM_IF_MASK	(0x1)
+#define AT91_XDMAC_DT_MEM_IF_OFFSET	(13)
+#define AT91_XDMAC_DT_MEM_IF(mem_if)	(((mem_if) & AT91_XDMAC_DT_MEM_IF_MASK) \
+					<< AT91_XDMAC_DT_MEM_IF_OFFSET)
+#define AT91_XDMAC_DT_GET_MEM_IF(cfg)	(((cfg) >> AT91_XDMAC_DT_MEM_IF_OFFSET) \
+					& AT91_XDMAC_DT_MEM_IF_MASK)
+
+#define AT91_XDMAC_DT_PER_IF_MASK	(0x1)
+#define AT91_XDMAC_DT_PER_IF_OFFSET	(14)
+#define AT91_XDMAC_DT_PER_IF(per_if)	(((per_if) & AT91_XDMAC_DT_PER_IF_MASK) \
+					<< AT91_XDMAC_DT_PER_IF_OFFSET)
+#define AT91_XDMAC_DT_GET_PER_IF(cfg)	(((cfg) >> AT91_XDMAC_DT_PER_IF_OFFSET) \
+					& AT91_XDMAC_DT_PER_IF_MASK)
+
+#define AT91_XDMAC_DT_PERID_MASK	(0x7f)
+#define AT91_XDMAC_DT_PERID_OFFSET	(24)
+#define AT91_XDMAC_DT_PERID(perid)	(((perid) & AT91_XDMAC_DT_PERID_MASK) \
+					<< AT91_XDMAC_DT_PERID_OFFSET)
+#define AT91_XDMAC_DT_GET_PERID(cfg)	(((cfg) >> AT91_XDMAC_DT_PERID_OFFSET) \
+					& AT91_XDMAC_DT_PERID_MASK)
+
 #endif /* __DT_BINDINGS_AT91_DMA_H__ */

+ 2 - 1
include/linux/dmaengine.h

@@ -447,7 +447,8 @@ struct dmaengine_unmap_data {
  * 	communicate status
  * @phys: physical address of the descriptor
  * @chan: target channel for this operation
- * @tx_submit: set the prepared descriptor(s) to be executed by the engine
+ * @tx_submit: accept the descriptor, assign ordered cookie and mark the
+ * descriptor pending. To be pushed on .issue_pending() call
  * @callback: routine to call after this operation is complete
  * @callback_param: general parameter to pass to the callback routine
  * ---async_tx api specific fields---

+ 1 - 0
include/linux/platform_data/dma-imx.h

@@ -41,6 +41,7 @@ enum sdma_peripheral_type {
 	IMX_DMATYPE_ESAI,	/* ESAI */
 	IMX_DMATYPE_SSI_DUAL,	/* SSI Dual FIFO */
 	IMX_DMATYPE_ASRC_SP,	/* Shared ASRC */
+	IMX_DMATYPE_SAI,	/* SAI */
 };
 
 enum imx_dma_prio {