Browse Source

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging-2.6

Larry Finger 14 years ago
parent
commit
c25bdbe288
98 changed files with 3685 additions and 2800 deletions
  1. 6 0
      Documentation/ABI/testing/pstore
  2. 164 70
      Documentation/dmaengine.txt
  3. 2 0
      Documentation/kernel-parameters.txt
  4. 4 0
      arch/ia64/Kconfig
  5. 55 0
      arch/ia64/include/asm/gpio.h
  6. 15 5
      drivers/acpi/apei/erst.c
  7. 1 1
      drivers/base/devtmpfs.c
  8. 0 1
      drivers/dma/TODO
  9. 124 122
      drivers/dma/amba-pl08x.c
  10. 2 2
      drivers/dma/at_hdmac.c
  11. 12 7
      drivers/dma/coh901318.c
  12. 2 2
      drivers/dma/dmaengine.c
  13. 1 1
      drivers/dma/ep93xx_dma.c
  14. 3 1
      drivers/dma/imx-sdma.c
  15. 0 2
      drivers/dma/intel_mid_dma.c
  16. 2 4
      drivers/dma/ipu/ipu_idmac.c
  17. 2 1
      drivers/dma/mv_xor.c
  18. 9 4
      drivers/dma/mxs-dma.c
  19. 94 33
      drivers/dma/pch_dma.c
  20. 36 28
      drivers/dma/pl330.c
  21. 155 115
      drivers/dma/ste_dma40.c
  22. 0 3
      drivers/dma/ste_dma40_ll.h
  23. 231 12
      drivers/firmware/efivars.c
  24. 110 80
      drivers/regulator/core.c
  25. 26 6
      drivers/regulator/dummy.c
  26. 51 12
      drivers/regulator/tps65910-regulator.c
  27. 31 35
      drivers/regulator/twl-regulator.c
  28. 65 61
      drivers/regulator/wm831x-dcdc.c
  29. 22 3
      drivers/regulator/wm831x-ldo.c
  30. 2 2
      drivers/regulator/wm8994-regulator.c
  31. 3 3
      fs/9p/acl.c
  32. 2 2
      fs/9p/acl.h
  33. 3 3
      fs/9p/vfs_inode_dotl.c
  34. 1 0
      fs/block_dev.c
  35. 2 8
      fs/btrfs/acl.c
  36. 45 2
      fs/btrfs/inode.c
  37. 27 42
      fs/dcache.c
  38. 2 6
      fs/ext2/acl.c
  39. 2 7
      fs/ext3/acl.c
  40. 1 1
      fs/ext4/Makefile
  41. 2 7
      fs/ext4/acl.c
  42. 48 0
      fs/ext4/balloc.c
  43. 21 0
      fs/ext4/block_validity.c
  44. 52 3
      fs/ext4/ext4.h
  45. 59 70
      fs/ext4/extents.c
  46. 21 5
      fs/ext4/fsync.c
  47. 1 1
      fs/ext4/ialloc.c
  48. 1482 0
      fs/ext4/indirect.c
  49. 37 1559
      fs/ext4/inode.c
  50. 8 4
      fs/ext4/ioctl.c
  51. 146 84
      fs/ext4/mballoc.c
  52. 0 1
      fs/ext4/mballoc.h
  53. 7 14
      fs/ext4/namei.c
  54. 1 5
      fs/ext4/page-io.c
  55. 103 96
      fs/ext4/resize.c
  56. 58 30
      fs/ext4/super.c
  57. 43 0
      fs/ext4/truncate.h
  58. 4 9
      fs/generic_acl.c
  59. 3 3
      fs/gfs2/acl.c
  60. 1 0
      fs/hppfs/hppfs.c
  61. 8 5
      fs/inode.c
  62. 4 1
      fs/jbd2/checkpoint.c
  63. 0 67
      fs/jbd2/journal.c
  64. 2 2
      fs/jffs2/acl.c
  65. 1 1
      fs/jffs2/acl.h
  66. 1 1
      fs/jffs2/fs.c
  67. 1 1
      fs/jffs2/os-linux.h
  68. 1 3
      fs/jfs/acl.c
  69. 1 3
      fs/jfs/xattr.c
  70. 15 9
      fs/namei.c
  71. 1 1
      fs/nfs/nfs3acl.c
  72. 3 3
      fs/nfs/nfs3proc.c
  73. 2 2
      fs/ocfs2/acl.c
  74. 8 8
      fs/posix_acl.c
  75. 7 5
      fs/pstore/inode.c
  76. 1 1
      fs/pstore/internal.h
  77. 22 8
      fs/pstore/platform.c
  78. 2 8
      fs/reiserfs/xattr_acl.c
  79. 3 3
      fs/xfs/linux-2.6/xfs_acl.c
  80. 6 3
      include/linux/amba/pl08x.h
  81. 6 0
      include/linux/efi.h
  82. 8 1
      include/linux/fs.h
  83. 0 6
      include/linux/jbd2.h
  84. 2 2
      include/linux/nfs_fs.h
  85. 4 4
      include/linux/posix_acl.h
  86. 6 3
      include/linux/pstore.h
  87. 3 0
      include/linux/regulator/consumer.h
  88. 1 3
      include/linux/regulator/driver.h
  89. 79 8
      include/trace/events/ext4.h
  90. 19 17
      include/trace/events/jbd2.h
  91. 15 7
      kernel/debug/gdbstub.c
  92. 2 3
      kernel/debug/kdb/kdb_bt.c
  93. 0 4
      kernel/debug/kdb/kdb_cmds
  94. 8 13
      kernel/debug/kdb/kdb_debugger.c
  95. 25 11
      kernel/debug/kdb/kdb_io.c
  96. 2 2
      kernel/debug/kdb/kdb_main.c
  97. 1 2
      kernel/debug/kdb/kdb_private.h
  98. 3 1
      mm/oom_kill.c

+ 6 - 0
Documentation/ABI/testing/pstore

@@ -39,3 +39,9 @@ Description:	Generic interface to platform dependent persistent storage.
 		multiple) files based on the record size of the underlying
 		multiple) files based on the record size of the underlying
 		persistent storage until at least this amount is reached.
 		persistent storage until at least this amount is reached.
 		Default is 10 Kbytes.
 		Default is 10 Kbytes.
+
+		Pstore only supports one backend at a time. If multiple
+		backends are available, the preferred backend may be
+		set by passing the pstore.backend= argument to the kernel at
+		boot time.
+

+ 164 - 70
Documentation/dmaengine.txt

@@ -10,87 +10,181 @@ NOTE: For DMA Engine usage in async_tx please see:
 Below is a guide to device driver writers on how to use the Slave-DMA API of the
 Below is a guide to device driver writers on how to use the Slave-DMA API of the
 DMA Engine. This is applicable only for slave DMA usage only.
 DMA Engine. This is applicable only for slave DMA usage only.
 
 
-The slave DMA usage consists of following steps
+The slave DMA usage consists of following steps:
 1. Allocate a DMA slave channel
 1. Allocate a DMA slave channel
 2. Set slave and controller specific parameters
 2. Set slave and controller specific parameters
 3. Get a descriptor for transaction
 3. Get a descriptor for transaction
-4. Submit the transaction and wait for callback notification
+4. Submit the transaction
+5. Issue pending requests and wait for callback notification
 
 
 1. Allocate a DMA slave channel
 1. Allocate a DMA slave channel
-Channel allocation is slightly different in the slave DMA context, client
-drivers typically need a channel from a particular DMA controller only and even
-in some cases a specific channel is desired. To request a channel
-dma_request_channel() API is used.
-
-Interface:
-struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
-		dma_filter_fn filter_fn,
-		void *filter_param);
-where dma_filter_fn is defined as:
-typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
-
-When the optional 'filter_fn' parameter is set to NULL dma_request_channel
-simply returns the first channel that satisfies the capability mask.  Otherwise,
-when the mask parameter is insufficient for specifying the necessary channel,
-the filter_fn routine can be used to disposition the available channels in the
-system. The filter_fn routine is called once for each free channel in the
-system.  Upon seeing a suitable channel filter_fn returns DMA_ACK which flags
-that channel to be the return value from dma_request_channel.  A channel
-allocated via this interface is exclusive to the caller, until
-dma_release_channel() is called.
+
+   Channel allocation is slightly different in the slave DMA context,
+   client drivers typically need a channel from a particular DMA
+   controller only and even in some cases a specific channel is desired.
+   To request a channel dma_request_channel() API is used.
+
+   Interface:
+	struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
+			dma_filter_fn filter_fn,
+			void *filter_param);
+   where dma_filter_fn is defined as:
+	typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
+
+   The 'filter_fn' parameter is optional, but highly recommended for
+   slave and cyclic channels as they typically need to obtain a specific
+   DMA channel.
+
+   When the optional 'filter_fn' parameter is NULL, dma_request_channel()
+   simply returns the first channel that satisfies the capability mask.
+
+   Otherwise, the 'filter_fn' routine will be called once for each free
+   channel which has a capability in 'mask'.  'filter_fn' is expected to
+   return 'true' when the desired DMA channel is found.
+
+   A channel allocated via this interface is exclusive to the caller,
+   until dma_release_channel() is called.
 
 
 2. Set slave and controller specific parameters
 2. Set slave and controller specific parameters
-Next step is always to pass some specific information to the DMA driver. Most of
-the generic information which a slave DMA can use is in struct dma_slave_config.
-It allows the clients to specify DMA direction, DMA addresses, bus widths, DMA
-burst lengths etc. If some DMA controllers have more parameters to be sent then
-they should try to embed struct dma_slave_config in their controller specific
-structure. That gives flexibility to client to pass more parameters, if
-required.
-
-Interface:
-int dmaengine_slave_config(struct dma_chan *chan,
-					  struct dma_slave_config *config)
+
+   Next step is always to pass some specific information to the DMA
+   driver.  Most of the generic information which a slave DMA can use
+   is in struct dma_slave_config.  This allows the clients to specify
+   DMA direction, DMA addresses, bus widths, DMA burst lengths etc
+   for the peripheral.
+
+   If some DMA controllers have more parameters to be sent then they
+   should try to embed struct dma_slave_config in their controller
+   specific structure. That gives flexibility to client to pass more
+   parameters, if required.
+
+   Interface:
+	int dmaengine_slave_config(struct dma_chan *chan,
+				  struct dma_slave_config *config)
+
+   Please see the dma_slave_config structure definition in dmaengine.h
+   for a detailed explaination of the struct members.  Please note
+   that the 'direction' member will be going away as it duplicates the
+   direction given in the prepare call.
 
 
 3. Get a descriptor for transaction
 3. Get a descriptor for transaction
-For slave usage the various modes of slave transfers supported by the
-DMA-engine are:
-slave_sg	- DMA a list of scatter gather buffers from/to a peripheral
-dma_cyclic	- Perform a cyclic DMA operation from/to a peripheral till the
+
+   For slave usage the various modes of slave transfers supported by the
+   DMA-engine are:
+
+   slave_sg	- DMA a list of scatter gather buffers from/to a peripheral
+   dma_cyclic	- Perform a cyclic DMA operation from/to a peripheral till the
 		  operation is explicitly stopped.
 		  operation is explicitly stopped.
-The non NULL return of this transfer API represents a "descriptor" for the given
-transaction.
-
-Interface:
-struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_sg)(
-		struct dma_chan *chan,
-		struct scatterlist *dst_sg, unsigned int dst_nents,
-		struct scatterlist *src_sg, unsigned int src_nents,
+
+   A non-NULL return of this transfer API represents a "descriptor" for
+   the given transaction.
+
+   Interface:
+	struct dma_async_tx_descriptor *(*chan->device->device_prep_slave_sg)(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_data_direction direction,
 		unsigned long flags);
 		unsigned long flags);
-struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)(
+
+	struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_data_direction direction);
 		size_t period_len, enum dma_data_direction direction);
 
 
-4. Submit the transaction and wait for callback notification
-To schedule the transaction to be scheduled by dma device, the "descriptor"
-returned in above (3) needs to be submitted.
-To tell the dma driver that a transaction is ready to be serviced, the
-descriptor->submit() callback needs to be invoked. This chains the descriptor to
-the pending queue.
-The transactions in the pending queue can be activated by calling the
-issue_pending API. If channel is idle then the first transaction in queue is
-started and subsequent ones queued up.
-On completion of the DMA operation the next in queue is submitted and a tasklet
-triggered. The tasklet would then call the client driver completion callback
-routine for notification, if set.
-Interface:
-void dma_async_issue_pending(struct dma_chan *chan);
-
-==============================================================================
-
-Additional usage notes for dma driver writers
-1/ Although DMA engine specifies that completion callback routines cannot submit
-any new operations, but typically for slave DMA subsequent transaction may not
-be available for submit prior to callback routine being called. This requirement
-is not a requirement for DMA-slave devices. But they should take care to drop
-the spin-lock they might be holding before calling the callback routine
+   The peripheral driver is expected to have mapped the scatterlist for
+   the DMA operation prior to calling device_prep_slave_sg, and must
+   keep the scatterlist mapped until the DMA operation has completed.
+   The scatterlist must be mapped using the DMA struct device.  So,
+   normal setup should look like this:
+
+	nr_sg = dma_map_sg(chan->device->dev, sgl, sg_len);
+	if (nr_sg == 0)
+		/* error */
+
+	desc = chan->device->device_prep_slave_sg(chan, sgl, nr_sg,
+			direction, flags);
+
+   Once a descriptor has been obtained, the callback information can be
+   added and the descriptor must then be submitted.  Some DMA engine
+   drivers may hold a spinlock between a successful preparation and
+   submission so it is important that these two operations are closely
+   paired.
+
+   Note:
+	Although the async_tx API specifies that completion callback
+	routines cannot submit any new operations, this is not the
+	case for slave/cyclic DMA.
+
+	For slave DMA, the subsequent transaction may not be available
+	for submission prior to callback function being invoked, so
+	slave DMA callbacks are permitted to prepare and submit a new
+	transaction.
+
+	For cyclic DMA, a callback function may wish to terminate the
+	DMA via dmaengine_terminate_all().
+
+	Therefore, it is important that DMA engine drivers drop any
+	locks before calling the callback function which may cause a
+	deadlock.
+
+	Note that callbacks will always be invoked from the DMA
+	engines tasklet, never from interrupt context.
+
+4. Submit the transaction
+
+   Once the descriptor has been prepared and the callback information
+   added, it must be placed on the DMA engine drivers pending queue.
+
+   Interface:
+	dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc)
+
+   This returns a cookie can be used to check the progress of DMA engine
+   activity via other DMA engine calls not covered in this document.
+
+   dmaengine_submit() will not start the DMA operation, it merely adds
+   it to the pending queue.  For this, see step 5, dma_async_issue_pending.
+
+5. Issue pending DMA requests and wait for callback notification
+
+   The transactions in the pending queue can be activated by calling the
+   issue_pending API. If channel is idle then the first transaction in
+   queue is started and subsequent ones queued up.
+
+   On completion of each DMA operation, the next in queue is started and
+   a tasklet triggered. The tasklet will then call the client driver
+   completion callback routine for notification, if set.
+
+   Interface:
+	void dma_async_issue_pending(struct dma_chan *chan);
+
+Further APIs:
+
+1. int dmaengine_terminate_all(struct dma_chan *chan)
+
+   This causes all activity for the DMA channel to be stopped, and may
+   discard data in the DMA FIFO which hasn't been fully transferred.
+   No callback functions will be called for any incomplete transfers.
+
+2. int dmaengine_pause(struct dma_chan *chan)
+
+   This pauses activity on the DMA channel without data loss.
+
+3. int dmaengine_resume(struct dma_chan *chan)
+
+   Resume a previously paused DMA channel.  It is invalid to resume a
+   channel which is not currently paused.
+
+4. enum dma_status dma_async_is_tx_complete(struct dma_chan *chan,
+        dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used)
+
+   This can be used to check the status of the channel.  Please see
+   the documentation in include/linux/dmaengine.h for a more complete
+   description of this API.
+
+   This can be used in conjunction with dma_async_is_complete() and
+   the cookie returned from 'descriptor->submit()' to check for
+   completion of a specific DMA transaction.
+
+   Note:
+	Not all DMA engine drivers can return reliable information for
+	a running DMA channel.  It is recommended that DMA engine users
+	pause or stop (via dmaengine_terminate_all) the channel before
+	using this API.

+ 2 - 0
Documentation/kernel-parameters.txt

@@ -2153,6 +2153,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			[HW,MOUSE] Controls Logitech smartscroll autorepeat.
 			[HW,MOUSE] Controls Logitech smartscroll autorepeat.
 			0 = disabled, 1 = enabled (default).
 			0 = disabled, 1 = enabled (default).
 
 
+	pstore.backend=	Specify the name of the pstore backend to use
+
 	pt.		[PARIDE]
 	pt.		[PARIDE]
 			See Documentation/blockdev/paride.txt.
 			See Documentation/blockdev/paride.txt.
 
 

+ 4 - 0
arch/ia64/Kconfig

@@ -27,6 +27,7 @@ config IA64
 	select GENERIC_PENDING_IRQ if SMP
 	select GENERIC_PENDING_IRQ if SMP
 	select IRQ_PER_CPU
 	select IRQ_PER_CPU
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW
+	select ARCH_WANT_OPTIONAL_GPIOLIB
 	default y
 	default y
 	help
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
 	  The Itanium Processor Family is Intel's 64-bit successor to
@@ -89,6 +90,9 @@ config GENERIC_TIME_VSYSCALL
 config HAVE_SETUP_PER_CPU_AREA
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool y
 	def_bool y
 
 
+config GENERIC_GPIO
+	def_bool y
+
 config DMI
 config DMI
 	bool
 	bool
 	default y
 	default y

+ 55 - 0
arch/ia64/include/asm/gpio.h

@@ -0,0 +1,55 @@
+/*
+ * Generic GPIO API implementation for IA-64.
+ *
+ * A stright copy of that for PowerPC which was:
+ *
+ * Copyright (c) 2007-2008  MontaVista Software, Inc.
+ *
+ * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _ASM_IA64_GPIO_H
+#define _ASM_IA64_GPIO_H
+
+#include <linux/errno.h>
+#include <asm-generic/gpio.h>
+
+#ifdef CONFIG_GPIOLIB
+
+/*
+ * We don't (yet) implement inlined/rapid versions for on-chip gpios.
+ * Just call gpiolib.
+ */
+static inline int gpio_get_value(unsigned int gpio)
+{
+	return __gpio_get_value(gpio);
+}
+
+static inline void gpio_set_value(unsigned int gpio, int value)
+{
+	__gpio_set_value(gpio, value);
+}
+
+static inline int gpio_cansleep(unsigned int gpio)
+{
+	return __gpio_cansleep(gpio);
+}
+
+static inline int gpio_to_irq(unsigned int gpio)
+{
+	return __gpio_to_irq(gpio);
+}
+
+static inline int irq_to_gpio(unsigned int irq)
+{
+	return -EINVAL;
+}
+
+#endif /* CONFIG_GPIOLIB */
+
+#endif /* _ASM_IA64_GPIO_H */

+ 15 - 5
drivers/acpi/apei/erst.c

@@ -932,8 +932,11 @@ static int erst_check_table(struct acpi_table_erst *erst_tab)
 static int erst_open_pstore(struct pstore_info *psi);
 static int erst_open_pstore(struct pstore_info *psi);
 static int erst_close_pstore(struct pstore_info *psi);
 static int erst_close_pstore(struct pstore_info *psi);
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
-		       struct timespec *time);
-static u64 erst_writer(enum pstore_type_id type, size_t size);
+			   struct timespec *time, struct pstore_info *psi);
+static u64 erst_writer(enum pstore_type_id type, unsigned int part,
+		       size_t size, struct pstore_info *psi);
+static int erst_clearer(enum pstore_type_id type, u64 id,
+			struct pstore_info *psi);
 
 
 static struct pstore_info erst_info = {
 static struct pstore_info erst_info = {
 	.owner		= THIS_MODULE,
 	.owner		= THIS_MODULE,
@@ -942,7 +945,7 @@ static struct pstore_info erst_info = {
 	.close		= erst_close_pstore,
 	.close		= erst_close_pstore,
 	.read		= erst_reader,
 	.read		= erst_reader,
 	.write		= erst_writer,
 	.write		= erst_writer,
-	.erase		= erst_clear
+	.erase		= erst_clearer
 };
 };
 
 
 #define CPER_CREATOR_PSTORE						\
 #define CPER_CREATOR_PSTORE						\
@@ -983,7 +986,7 @@ static int erst_close_pstore(struct pstore_info *psi)
 }
 }
 
 
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
 static ssize_t erst_reader(u64 *id, enum pstore_type_id *type,
-		       struct timespec *time)
+			   struct timespec *time, struct pstore_info *psi)
 {
 {
 	int rc;
 	int rc;
 	ssize_t len = 0;
 	ssize_t len = 0;
@@ -1037,7 +1040,8 @@ out:
 	return (rc < 0) ? rc : (len - sizeof(*rcd));
 	return (rc < 0) ? rc : (len - sizeof(*rcd));
 }
 }
 
 
-static u64 erst_writer(enum pstore_type_id type, size_t size)
+static u64 erst_writer(enum pstore_type_id type, unsigned int part,
+		       size_t size, struct pstore_info *psi)
 {
 {
 	struct cper_pstore_record *rcd = (struct cper_pstore_record *)
 	struct cper_pstore_record *rcd = (struct cper_pstore_record *)
 					(erst_info.buf - sizeof(*rcd));
 					(erst_info.buf - sizeof(*rcd));
@@ -1080,6 +1084,12 @@ static u64 erst_writer(enum pstore_type_id type, size_t size)
 	return rcd->hdr.record_id;
 	return rcd->hdr.record_id;
 }
 }
 
 
+static int erst_clearer(enum pstore_type_id type, u64 id,
+			struct pstore_info *psi)
+{
+	return erst_clear(id);
+}
+
 static int __init erst_init(void)
 static int __init erst_init(void)
 {
 {
 	int rc = 0;
 	int rc = 0;

+ 1 - 1
drivers/base/devtmpfs.c

@@ -166,7 +166,7 @@ static int create_path(const char *nodepath)
 {
 {
 	char *path;
 	char *path;
 	char *s;
 	char *s;
-	int err;
+	int err = 0;
 
 
 	/* parent directories do not exist, create them */
 	/* parent directories do not exist, create them */
 	path = kstrdup(nodepath, GFP_KERNEL);
 	path = kstrdup(nodepath, GFP_KERNEL);

+ 0 - 1
drivers/dma/TODO

@@ -9,6 +9,5 @@ TODO for slave dma
 	- mxs-dma.c
 	- mxs-dma.c
 	- dw_dmac
 	- dw_dmac
 	- intel_mid_dma
 	- intel_mid_dma
-	- ste_dma40
 4. Check other subsystems for dma drivers and merge/move to dmaengine
 4. Check other subsystems for dma drivers and merge/move to dmaengine
 5. Remove dma_slave_config's dma direction.
 5. Remove dma_slave_config's dma direction.

+ 124 - 122
drivers/dma/amba-pl08x.c

@@ -156,14 +156,10 @@ struct pl08x_driver_data {
 #define PL08X_BOUNDARY_SHIFT		(10)	/* 1KB 0x400 */
 #define PL08X_BOUNDARY_SHIFT		(10)	/* 1KB 0x400 */
 #define PL08X_BOUNDARY_SIZE		(1 << PL08X_BOUNDARY_SHIFT)
 #define PL08X_BOUNDARY_SIZE		(1 << PL08X_BOUNDARY_SHIFT)
 
 
-/* Minimum period between work queue runs */
-#define PL08X_WQ_PERIODMIN	20
-
 /* Size (bytes) of each LLI buffer allocated for one transfer */
 /* Size (bytes) of each LLI buffer allocated for one transfer */
 # define PL08X_LLI_TSFR_SIZE	0x2000
 # define PL08X_LLI_TSFR_SIZE	0x2000
 
 
 /* Maximum times we call dma_pool_alloc on this pool without freeing */
 /* Maximum times we call dma_pool_alloc on this pool without freeing */
-#define PL08X_MAX_ALLOCS	0x40
 #define MAX_NUM_TSFR_LLIS	(PL08X_LLI_TSFR_SIZE/sizeof(struct pl08x_lli))
 #define MAX_NUM_TSFR_LLIS	(PL08X_LLI_TSFR_SIZE/sizeof(struct pl08x_lli))
 #define PL08X_ALIGN		8
 #define PL08X_ALIGN		8
 
 
@@ -495,10 +491,10 @@ static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth,
 
 
 struct pl08x_lli_build_data {
 struct pl08x_lli_build_data {
 	struct pl08x_txd *txd;
 	struct pl08x_txd *txd;
-	struct pl08x_driver_data *pl08x;
 	struct pl08x_bus_data srcbus;
 	struct pl08x_bus_data srcbus;
 	struct pl08x_bus_data dstbus;
 	struct pl08x_bus_data dstbus;
 	size_t remainder;
 	size_t remainder;
+	u32 lli_bus;
 };
 };
 
 
 /*
 /*
@@ -551,8 +547,7 @@ static void pl08x_fill_lli_for_desc(struct pl08x_lli_build_data *bd,
 	llis_va[num_llis].src = bd->srcbus.addr;
 	llis_va[num_llis].src = bd->srcbus.addr;
 	llis_va[num_llis].dst = bd->dstbus.addr;
 	llis_va[num_llis].dst = bd->dstbus.addr;
 	llis_va[num_llis].lli = llis_bus + (num_llis + 1) * sizeof(struct pl08x_lli);
 	llis_va[num_llis].lli = llis_bus + (num_llis + 1) * sizeof(struct pl08x_lli);
-	if (bd->pl08x->lli_buses & PL08X_AHB2)
-		llis_va[num_llis].lli |= PL080_LLI_LM_AHB2;
+	llis_va[num_llis].lli |= bd->lli_bus;
 
 
 	if (cctl & PL080_CONTROL_SRC_INCR)
 	if (cctl & PL080_CONTROL_SRC_INCR)
 		bd->srcbus.addr += len;
 		bd->srcbus.addr += len;
@@ -605,9 +600,9 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 	cctl = txd->cctl;
 	cctl = txd->cctl;
 
 
 	bd.txd = txd;
 	bd.txd = txd;
-	bd.pl08x = pl08x;
 	bd.srcbus.addr = txd->src_addr;
 	bd.srcbus.addr = txd->src_addr;
 	bd.dstbus.addr = txd->dst_addr;
 	bd.dstbus.addr = txd->dst_addr;
+	bd.lli_bus = (pl08x->lli_buses & PL08X_AHB2) ? PL080_LLI_LM_AHB2 : 0;
 
 
 	/* Find maximum width of the source bus */
 	/* Find maximum width of the source bus */
 	bd.srcbus.maxwidth =
 	bd.srcbus.maxwidth =
@@ -622,25 +617,15 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 	/* Set up the bus widths to the maximum */
 	/* Set up the bus widths to the maximum */
 	bd.srcbus.buswidth = bd.srcbus.maxwidth;
 	bd.srcbus.buswidth = bd.srcbus.maxwidth;
 	bd.dstbus.buswidth = bd.dstbus.maxwidth;
 	bd.dstbus.buswidth = bd.dstbus.maxwidth;
-	dev_vdbg(&pl08x->adev->dev,
-		 "%s source bus is %d bytes wide, dest bus is %d bytes wide\n",
-		 __func__, bd.srcbus.buswidth, bd.dstbus.buswidth);
-
 
 
 	/*
 	/*
 	 * Bytes transferred == tsize * MIN(buswidths), not max(buswidths)
 	 * Bytes transferred == tsize * MIN(buswidths), not max(buswidths)
 	 */
 	 */
 	max_bytes_per_lli = min(bd.srcbus.buswidth, bd.dstbus.buswidth) *
 	max_bytes_per_lli = min(bd.srcbus.buswidth, bd.dstbus.buswidth) *
 		PL080_CONTROL_TRANSFER_SIZE_MASK;
 		PL080_CONTROL_TRANSFER_SIZE_MASK;
-	dev_vdbg(&pl08x->adev->dev,
-		 "%s max bytes per lli = %zu\n",
-		 __func__, max_bytes_per_lli);
 
 
 	/* We need to count this down to zero */
 	/* We need to count this down to zero */
 	bd.remainder = txd->len;
 	bd.remainder = txd->len;
-	dev_vdbg(&pl08x->adev->dev,
-		 "%s remainder = %zu\n",
-		 __func__, bd.remainder);
 
 
 	/*
 	/*
 	 * Choose bus to align to
 	 * Choose bus to align to
@@ -649,6 +634,16 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 	 */
 	 */
 	pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl);
 	pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl);
 
 
+	dev_vdbg(&pl08x->adev->dev, "src=0x%08x%s/%u dst=0x%08x%s/%u len=%zu llimax=%zu\n",
+		 bd.srcbus.addr, cctl & PL080_CONTROL_SRC_INCR ? "+" : "",
+		 bd.srcbus.buswidth,
+		 bd.dstbus.addr, cctl & PL080_CONTROL_DST_INCR ? "+" : "",
+		 bd.dstbus.buswidth,
+		 bd.remainder, max_bytes_per_lli);
+	dev_vdbg(&pl08x->adev->dev, "mbus=%s sbus=%s\n",
+		 mbus == &bd.srcbus ? "src" : "dst",
+		 sbus == &bd.srcbus ? "src" : "dst");
+
 	if (txd->len < mbus->buswidth) {
 	if (txd->len < mbus->buswidth) {
 		/* Less than a bus width available - send as single bytes */
 		/* Less than a bus width available - send as single bytes */
 		while (bd.remainder) {
 		while (bd.remainder) {
@@ -840,15 +835,14 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 	{
 	{
 		int i;
 		int i;
 
 
+		dev_vdbg(&pl08x->adev->dev,
+			 "%-3s %-9s  %-10s %-10s %-10s %s\n",
+			 "lli", "", "csrc", "cdst", "clli", "cctl");
 		for (i = 0; i < num_llis; i++) {
 		for (i = 0; i < num_llis; i++) {
 			dev_vdbg(&pl08x->adev->dev,
 			dev_vdbg(&pl08x->adev->dev,
-				 "lli %d @%p: csrc=0x%08x, cdst=0x%08x, cctl=0x%08x, clli=0x%08x\n",
-				 i,
-				 &llis_va[i],
-				 llis_va[i].src,
-				 llis_va[i].dst,
-				 llis_va[i].cctl,
-				 llis_va[i].lli
+				 "%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+				 i, &llis_va[i], llis_va[i].src,
+				 llis_va[i].dst, llis_va[i].lli, llis_va[i].cctl
 				);
 				);
 		}
 		}
 	}
 	}
@@ -1054,64 +1048,105 @@ pl08x_dma_tx_status(struct dma_chan *chan,
 
 
 /* PrimeCell DMA extension */
 /* PrimeCell DMA extension */
 struct burst_table {
 struct burst_table {
-	int burstwords;
+	u32 burstwords;
 	u32 reg;
 	u32 reg;
 };
 };
 
 
 static const struct burst_table burst_sizes[] = {
 static const struct burst_table burst_sizes[] = {
 	{
 	{
 		.burstwords = 256,
 		.burstwords = 256,
-		.reg = (PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.reg = PL080_BSIZE_256,
 	},
 	},
 	{
 	{
 		.burstwords = 128,
 		.burstwords = 128,
-		.reg = (PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.reg = PL080_BSIZE_128,
 	},
 	},
 	{
 	{
 		.burstwords = 64,
 		.burstwords = 64,
-		.reg = (PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.reg = PL080_BSIZE_64,
 	},
 	},
 	{
 	{
 		.burstwords = 32,
 		.burstwords = 32,
-		.reg = (PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.reg = PL080_BSIZE_32,
 	},
 	},
 	{
 	{
 		.burstwords = 16,
 		.burstwords = 16,
-		.reg = (PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.reg = PL080_BSIZE_16,
 	},
 	},
 	{
 	{
 		.burstwords = 8,
 		.burstwords = 8,
-		.reg = (PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.reg = PL080_BSIZE_8,
 	},
 	},
 	{
 	{
 		.burstwords = 4,
 		.burstwords = 4,
-		.reg = (PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.reg = PL080_BSIZE_4,
 	},
 	},
 	{
 	{
-		.burstwords = 1,
-		.reg = (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT),
+		.burstwords = 0,
+		.reg = PL080_BSIZE_1,
 	},
 	},
 };
 };
 
 
+/*
+ * Given the source and destination available bus masks, select which
+ * will be routed to each port.  We try to have source and destination
+ * on separate ports, but always respect the allowable settings.
+ */
+static u32 pl08x_select_bus(u8 src, u8 dst)
+{
+	u32 cctl = 0;
+
+	if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1)))
+		cctl |= PL080_CONTROL_DST_AHB2;
+	if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2)))
+		cctl |= PL080_CONTROL_SRC_AHB2;
+
+	return cctl;
+}
+
+static u32 pl08x_cctl(u32 cctl)
+{
+	cctl &= ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 |
+		  PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR |
+		  PL080_CONTROL_PROT_MASK);
+
+	/* Access the cell in privileged mode, non-bufferable, non-cacheable */
+	return cctl | PL080_CONTROL_PROT_SYS;
+}
+
+static u32 pl08x_width(enum dma_slave_buswidth width)
+{
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		return PL080_WIDTH_8BIT;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		return PL080_WIDTH_16BIT;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		return PL080_WIDTH_32BIT;
+	default:
+		return ~0;
+	}
+}
+
+static u32 pl08x_burst(u32 maxburst)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(burst_sizes); i++)
+		if (burst_sizes[i].burstwords <= maxburst)
+			break;
+
+	return burst_sizes[i].reg;
+}
+
 static int dma_set_runtime_config(struct dma_chan *chan,
 static int dma_set_runtime_config(struct dma_chan *chan,
 				  struct dma_slave_config *config)
 				  struct dma_slave_config *config)
 {
 {
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_driver_data *pl08x = plchan->host;
 	struct pl08x_driver_data *pl08x = plchan->host;
-	struct pl08x_channel_data *cd = plchan->cd;
 	enum dma_slave_buswidth addr_width;
 	enum dma_slave_buswidth addr_width;
-	dma_addr_t addr;
-	u32 maxburst;
+	u32 width, burst, maxburst;
 	u32 cctl = 0;
 	u32 cctl = 0;
-	int i;
 
 
 	if (!plchan->slave)
 	if (!plchan->slave)
 		return -EINVAL;
 		return -EINVAL;
@@ -1119,11 +1154,9 @@ static int dma_set_runtime_config(struct dma_chan *chan,
 	/* Transfer direction */
 	/* Transfer direction */
 	plchan->runtime_direction = config->direction;
 	plchan->runtime_direction = config->direction;
 	if (config->direction == DMA_TO_DEVICE) {
 	if (config->direction == DMA_TO_DEVICE) {
-		addr = config->dst_addr;
 		addr_width = config->dst_addr_width;
 		addr_width = config->dst_addr_width;
 		maxburst = config->dst_maxburst;
 		maxburst = config->dst_maxburst;
 	} else if (config->direction == DMA_FROM_DEVICE) {
 	} else if (config->direction == DMA_FROM_DEVICE) {
-		addr = config->src_addr;
 		addr_width = config->src_addr_width;
 		addr_width = config->src_addr_width;
 		maxburst = config->src_maxburst;
 		maxburst = config->src_maxburst;
 	} else {
 	} else {
@@ -1132,46 +1165,40 @@ static int dma_set_runtime_config(struct dma_chan *chan,
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	switch (addr_width) {
-	case DMA_SLAVE_BUSWIDTH_1_BYTE:
-		cctl |= (PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT) |
-			(PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT);
-		break;
-	case DMA_SLAVE_BUSWIDTH_2_BYTES:
-		cctl |= (PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT) |
-			(PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT);
-		break;
-	case DMA_SLAVE_BUSWIDTH_4_BYTES:
-		cctl |= (PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT) |
-			(PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT);
-		break;
-	default:
+	width = pl08x_width(addr_width);
+	if (width == ~0) {
 		dev_err(&pl08x->adev->dev,
 		dev_err(&pl08x->adev->dev,
 			"bad runtime_config: alien address width\n");
 			"bad runtime_config: alien address width\n");
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
+	cctl |= width << PL080_CONTROL_SWIDTH_SHIFT;
+	cctl |= width << PL080_CONTROL_DWIDTH_SHIFT;
+
 	/*
 	/*
-	 * Now decide on a maxburst:
 	 * If this channel will only request single transfers, set this
 	 * If this channel will only request single transfers, set this
 	 * down to ONE element.  Also select one element if no maxburst
 	 * down to ONE element.  Also select one element if no maxburst
 	 * is specified.
 	 * is specified.
 	 */
 	 */
-	if (plchan->cd->single || maxburst == 0) {
-		cctl |= (PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT) |
-			(PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT);
+	if (plchan->cd->single)
+		maxburst = 1;
+
+	burst = pl08x_burst(maxburst);
+	cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT;
+	cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT;
+
+	if (plchan->runtime_direction == DMA_FROM_DEVICE) {
+		plchan->src_addr = config->src_addr;
+		plchan->src_cctl = pl08x_cctl(cctl) | PL080_CONTROL_DST_INCR |
+			pl08x_select_bus(plchan->cd->periph_buses,
+					 pl08x->mem_buses);
 	} else {
 	} else {
-		for (i = 0; i < ARRAY_SIZE(burst_sizes); i++)
-			if (burst_sizes[i].burstwords <= maxburst)
-				break;
-		cctl |= burst_sizes[i].reg;
+		plchan->dst_addr = config->dst_addr;
+		plchan->dst_cctl = pl08x_cctl(cctl) | PL080_CONTROL_SRC_INCR |
+			pl08x_select_bus(pl08x->mem_buses,
+					 plchan->cd->periph_buses);
 	}
 	}
 
 
-	plchan->runtime_addr = addr;
-
-	/* Modify the default channel data to fit PrimeCell request */
-	cd->cctl = cctl;
-
 	dev_dbg(&pl08x->adev->dev,
 	dev_dbg(&pl08x->adev->dev,
 		"configured channel %s (%s) for %s, data width %d, "
 		"configured channel %s (%s) for %s, data width %d, "
 		"maxburst %d words, LE, CCTL=0x%08x\n",
 		"maxburst %d words, LE, CCTL=0x%08x\n",
@@ -1270,23 +1297,6 @@ static int pl08x_prep_channel_resources(struct pl08x_dma_chan *plchan,
 	return 0;
 	return 0;
 }
 }
 
 
-/*
- * Given the source and destination available bus masks, select which
- * will be routed to each port.  We try to have source and destination
- * on separate ports, but always respect the allowable settings.
- */
-static u32 pl08x_select_bus(struct pl08x_driver_data *pl08x, u8 src, u8 dst)
-{
-	u32 cctl = 0;
-
-	if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1)))
-		cctl |= PL080_CONTROL_DST_AHB2;
-	if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2)))
-		cctl |= PL080_CONTROL_SRC_AHB2;
-
-	return cctl;
-}
-
 static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan,
 static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan,
 	unsigned long flags)
 	unsigned long flags)
 {
 {
@@ -1338,8 +1348,8 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
 	txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
 	txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
 
 
 	if (pl08x->vd->dualmaster)
 	if (pl08x->vd->dualmaster)
-		txd->cctl |= pl08x_select_bus(pl08x,
-					pl08x->mem_buses, pl08x->mem_buses);
+		txd->cctl |= pl08x_select_bus(pl08x->mem_buses,
+					      pl08x->mem_buses);
 
 
 	ret = pl08x_prep_channel_resources(plchan, txd);
 	ret = pl08x_prep_channel_resources(plchan, txd);
 	if (ret)
 	if (ret)
@@ -1356,7 +1366,6 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
 	struct pl08x_driver_data *pl08x = plchan->host;
 	struct pl08x_driver_data *pl08x = plchan->host;
 	struct pl08x_txd *txd;
 	struct pl08x_txd *txd;
-	u8 src_buses, dst_buses;
 	int ret;
 	int ret;
 
 
 	/*
 	/*
@@ -1390,42 +1399,22 @@ static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
 	txd->direction = direction;
 	txd->direction = direction;
 	txd->len = sgl->length;
 	txd->len = sgl->length;
 
 
-	txd->cctl = plchan->cd->cctl &
-			~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 |
-			  PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR |
-			  PL080_CONTROL_PROT_MASK);
-
-	/* Access the cell in privileged mode, non-bufferable, non-cacheable */
-	txd->cctl |= PL080_CONTROL_PROT_SYS;
-
 	if (direction == DMA_TO_DEVICE) {
 	if (direction == DMA_TO_DEVICE) {
 		txd->ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT;
 		txd->ccfg |= PL080_FLOW_MEM2PER << PL080_CONFIG_FLOW_CONTROL_SHIFT;
-		txd->cctl |= PL080_CONTROL_SRC_INCR;
+		txd->cctl = plchan->dst_cctl;
 		txd->src_addr = sgl->dma_address;
 		txd->src_addr = sgl->dma_address;
-		if (plchan->runtime_addr)
-			txd->dst_addr = plchan->runtime_addr;
-		else
-			txd->dst_addr = plchan->cd->addr;
-		src_buses = pl08x->mem_buses;
-		dst_buses = plchan->cd->periph_buses;
+		txd->dst_addr = plchan->dst_addr;
 	} else if (direction == DMA_FROM_DEVICE) {
 	} else if (direction == DMA_FROM_DEVICE) {
 		txd->ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
 		txd->ccfg |= PL080_FLOW_PER2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
-		txd->cctl |= PL080_CONTROL_DST_INCR;
-		if (plchan->runtime_addr)
-			txd->src_addr = plchan->runtime_addr;
-		else
-			txd->src_addr = plchan->cd->addr;
+		txd->cctl = plchan->src_cctl;
+		txd->src_addr = plchan->src_addr;
 		txd->dst_addr = sgl->dma_address;
 		txd->dst_addr = sgl->dma_address;
-		src_buses = plchan->cd->periph_buses;
-		dst_buses = pl08x->mem_buses;
 	} else {
 	} else {
 		dev_err(&pl08x->adev->dev,
 		dev_err(&pl08x->adev->dev,
 			"%s direction unsupported\n", __func__);
 			"%s direction unsupported\n", __func__);
 		return NULL;
 		return NULL;
 	}
 	}
 
 
-	txd->cctl |= pl08x_select_bus(pl08x, src_buses, dst_buses);
-
 	ret = pl08x_prep_channel_resources(plchan, txd);
 	ret = pl08x_prep_channel_resources(plchan, txd);
 	if (ret)
 	if (ret)
 		return NULL;
 		return NULL;
@@ -1676,6 +1665,20 @@ static irqreturn_t pl08x_irq(int irq, void *dev)
 	return mask ? IRQ_HANDLED : IRQ_NONE;
 	return mask ? IRQ_HANDLED : IRQ_NONE;
 }
 }
 
 
+static void pl08x_dma_slave_init(struct pl08x_dma_chan *chan)
+{
+	u32 cctl = pl08x_cctl(chan->cd->cctl);
+
+	chan->slave = true;
+	chan->name = chan->cd->bus_id;
+	chan->src_addr = chan->cd->addr;
+	chan->dst_addr = chan->cd->addr;
+	chan->src_cctl = cctl | PL080_CONTROL_DST_INCR |
+		pl08x_select_bus(chan->cd->periph_buses, chan->host->mem_buses);
+	chan->dst_cctl = cctl | PL080_CONTROL_SRC_INCR |
+		pl08x_select_bus(chan->host->mem_buses, chan->cd->periph_buses);
+}
+
 /*
 /*
  * Initialise the DMAC memcpy/slave channels.
  * Initialise the DMAC memcpy/slave channels.
  * Make a local wrapper to hold required data
  * Make a local wrapper to hold required data
@@ -1707,9 +1710,8 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
 		chan->state = PL08X_CHAN_IDLE;
 		chan->state = PL08X_CHAN_IDLE;
 
 
 		if (slave) {
 		if (slave) {
-			chan->slave = true;
-			chan->name = pl08x->pd->slave_channels[i].bus_id;
 			chan->cd = &pl08x->pd->slave_channels[i];
 			chan->cd = &pl08x->pd->slave_channels[i];
+			pl08x_dma_slave_init(chan);
 		} else {
 		} else {
 			chan->cd = &pl08x->pd->memcpy_channel;
 			chan->cd = &pl08x->pd->memcpy_channel;
 			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
 			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);

+ 2 - 2
drivers/dma/at_hdmac.c

@@ -1216,7 +1216,7 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	atdma->dma_common.cap_mask = pdata->cap_mask;
 	atdma->dma_common.cap_mask = pdata->cap_mask;
 	atdma->all_chan_mask = (1 << pdata->nr_channels) - 1;
 	atdma->all_chan_mask = (1 << pdata->nr_channels) - 1;
 
 
-	size = io->end - io->start + 1;
+	size = resource_size(io);
 	if (!request_mem_region(io->start, size, pdev->dev.driver->name)) {
 	if (!request_mem_region(io->start, size, pdev->dev.driver->name)) {
 		err = -EBUSY;
 		err = -EBUSY;
 		goto err_kfree;
 		goto err_kfree;
@@ -1362,7 +1362,7 @@ static int __exit at_dma_remove(struct platform_device *pdev)
 	atdma->regs = NULL;
 	atdma->regs = NULL;
 
 
 	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(io->start, io->end - io->start + 1);
+	release_mem_region(io->start, resource_size(io));
 
 
 	kfree(atdma);
 	kfree(atdma);
 
 

+ 12 - 7
drivers/dma/coh901318.c

@@ -41,6 +41,8 @@ struct coh901318_desc {
 	struct coh901318_lli *lli;
 	struct coh901318_lli *lli;
 	enum dma_data_direction dir;
 	enum dma_data_direction dir;
 	unsigned long flags;
 	unsigned long flags;
+	u32 head_config;
+	u32 head_ctrl;
 };
 };
 
 
 struct coh901318_base {
 struct coh901318_base {
@@ -661,6 +663,9 @@ static struct coh901318_desc *coh901318_queue_start(struct coh901318_chan *cohc)
 
 
 		coh901318_desc_submit(cohc, cohd);
 		coh901318_desc_submit(cohc, cohd);
 
 
+		/* Program the transaction head */
+		coh901318_set_conf(cohc, cohd->head_config);
+		coh901318_set_ctrl(cohc, cohd->head_ctrl);
 		coh901318_prep_linked_list(cohc, cohd->lli);
 		coh901318_prep_linked_list(cohc, cohd->lli);
 
 
 		/* start dma job on this channel */
 		/* start dma job on this channel */
@@ -1091,8 +1096,6 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	} else
 	} else
 		goto err_direction;
 		goto err_direction;
 
 
-	coh901318_set_conf(cohc, config);
-
 	/* The dma only supports transmitting packages up to
 	/* The dma only supports transmitting packages up to
 	 * MAX_DMA_PACKET_SIZE. Calculate to total number of
 	 * MAX_DMA_PACKET_SIZE. Calculate to total number of
 	 * dma elemts required to send the entire sg list
 	 * dma elemts required to send the entire sg list
@@ -1129,16 +1132,18 @@ coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	if (ret)
 	if (ret)
 		goto err_lli_fill;
 		goto err_lli_fill;
 
 
-	/*
-	 * Set the default ctrl for the channel to the one from the lli,
-	 * things may have changed due to odd buffer alignment etc.
-	 */
-	coh901318_set_ctrl(cohc, lli->control);
 
 
 	COH_DBG(coh901318_list_print(cohc, lli));
 	COH_DBG(coh901318_list_print(cohc, lli));
 
 
 	/* Pick a descriptor to handle this transfer */
 	/* Pick a descriptor to handle this transfer */
 	cohd = coh901318_desc_get(cohc);
 	cohd = coh901318_desc_get(cohc);
+	cohd->head_config = config;
+	/*
+	 * Set the default head ctrl for the channel to the one from the
+	 * lli, things may have changed due to odd buffer alignment
+	 * etc.
+	 */
+	cohd->head_ctrl = lli->control;
 	cohd->dir = direction;
 	cohd->dir = direction;
 	cohd->flags = flags;
 	cohd->flags = flags;
 	cohd->desc.tx_submit = coh901318_tx_submit;
 	cohd->desc.tx_submit = coh901318_tx_submit;

+ 2 - 2
drivers/dma/dmaengine.c

@@ -510,8 +510,8 @@ struct dma_chan *__dma_request_channel(dma_cap_mask_t *mask, dma_filter_fn fn, v
 					 dma_chan_name(chan));
 					 dma_chan_name(chan));
 				list_del_rcu(&device->global_node);
 				list_del_rcu(&device->global_node);
 			} else if (err)
 			} else if (err)
-				pr_err("dmaengine: failed to get %s: (%d)\n",
-				       dma_chan_name(chan), err);
+				pr_debug("dmaengine: failed to get %s: (%d)\n",
+					 dma_chan_name(chan), err);
 			else
 			else
 				break;
 				break;
 			if (--device->privatecnt == 0)
 			if (--device->privatecnt == 0)

+ 1 - 1
drivers/dma/ep93xx_dma.c

@@ -902,7 +902,7 @@ static void ep93xx_dma_free_chan_resources(struct dma_chan *chan)
  *
  *
  * Returns a valid DMA descriptor or %NULL in case of failure.
  * Returns a valid DMA descriptor or %NULL in case of failure.
  */
  */
-struct dma_async_tx_descriptor *
+static struct dma_async_tx_descriptor *
 ep93xx_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
 ep93xx_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
 			   dma_addr_t src, size_t len, unsigned long flags)
 			   dma_addr_t src, size_t len, unsigned long flags)
 {
 {

+ 3 - 1
drivers/dma/imx-sdma.c

@@ -1305,8 +1305,10 @@ static int __init sdma_probe(struct platform_device *pdev)
 		goto err_request_irq;
 		goto err_request_irq;
 
 
 	sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL);
 	sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL);
-	if (!sdma->script_addrs)
+	if (!sdma->script_addrs) {
+		ret = -ENOMEM;
 		goto err_alloc;
 		goto err_alloc;
+	}
 
 
 	if (of_id)
 	if (of_id)
 		pdev->id_entry = of_id->data;
 		pdev->id_entry = of_id->data;

+ 0 - 2
drivers/dma/intel_mid_dma.c

@@ -1351,7 +1351,6 @@ int dma_suspend(struct pci_dev *pci, pm_message_t state)
 			return -EAGAIN;
 			return -EAGAIN;
 	}
 	}
 	device->state = SUSPENDED;
 	device->state = SUSPENDED;
-	pci_set_drvdata(pci, device);
 	pci_save_state(pci);
 	pci_save_state(pci);
 	pci_disable_device(pci);
 	pci_disable_device(pci);
 	pci_set_power_state(pci, PCI_D3hot);
 	pci_set_power_state(pci, PCI_D3hot);
@@ -1380,7 +1379,6 @@ int dma_resume(struct pci_dev *pci)
 	}
 	}
 	device->state = RUNNING;
 	device->state = RUNNING;
 	iowrite32(REG_BIT0, device->dma_base + DMA_CFG);
 	iowrite32(REG_BIT0, device->dma_base + DMA_CFG);
-	pci_set_drvdata(pci, device);
 	return 0;
 	return 0;
 }
 }
 
 

+ 2 - 4
drivers/dma/ipu/ipu_idmac.c

@@ -1706,16 +1706,14 @@ static int __init ipu_probe(struct platform_device *pdev)
 		ipu_data.irq_fn, ipu_data.irq_err, ipu_data.irq_base);
 		ipu_data.irq_fn, ipu_data.irq_err, ipu_data.irq_base);
 
 
 	/* Remap IPU common registers */
 	/* Remap IPU common registers */
-	ipu_data.reg_ipu = ioremap(mem_ipu->start,
-				   mem_ipu->end - mem_ipu->start + 1);
+	ipu_data.reg_ipu = ioremap(mem_ipu->start, resource_size(mem_ipu));
 	if (!ipu_data.reg_ipu) {
 	if (!ipu_data.reg_ipu) {
 		ret = -ENOMEM;
 		ret = -ENOMEM;
 		goto err_ioremap_ipu;
 		goto err_ioremap_ipu;
 	}
 	}
 
 
 	/* Remap Image Converter and Image DMA Controller registers */
 	/* Remap Image Converter and Image DMA Controller registers */
-	ipu_data.reg_ic = ioremap(mem_ic->start,
-				  mem_ic->end - mem_ic->start + 1);
+	ipu_data.reg_ic = ioremap(mem_ic->start, resource_size(mem_ic));
 	if (!ipu_data.reg_ic) {
 	if (!ipu_data.reg_ic) {
 		ret = -ENOMEM;
 		ret = -ENOMEM;
 		goto err_ioremap_ic;
 		goto err_ioremap_ic;

+ 2 - 1
drivers/dma/mv_xor.c

@@ -1304,7 +1304,8 @@ static int mv_xor_shared_probe(struct platform_device *pdev)
 	if (!res)
 	if (!res)
 		return -ENODEV;
 		return -ENODEV;
 
 
-	msp->xor_base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
+	msp->xor_base = devm_ioremap(&pdev->dev, res->start,
+				     resource_size(res));
 	if (!msp->xor_base)
 	if (!msp->xor_base)
 		return -EBUSY;
 		return -EBUSY;
 
 

+ 9 - 4
drivers/dma/mxs-dma.c

@@ -327,10 +327,12 @@ static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
 
 
 	memset(mxs_chan->ccw, 0, PAGE_SIZE);
 	memset(mxs_chan->ccw, 0, PAGE_SIZE);
 
 
-	ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler,
-				0, "mxs-dma", mxs_dma);
-	if (ret)
-		goto err_irq;
+	if (mxs_chan->chan_irq != NO_IRQ) {
+		ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler,
+					0, "mxs-dma", mxs_dma);
+		if (ret)
+			goto err_irq;
+	}
 
 
 	ret = clk_enable(mxs_dma->clk);
 	ret = clk_enable(mxs_dma->clk);
 	if (ret)
 	if (ret)
@@ -535,6 +537,7 @@ static int mxs_dma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	switch (cmd) {
 	switch (cmd) {
 	case DMA_TERMINATE_ALL:
 	case DMA_TERMINATE_ALL:
 		mxs_dma_disable_chan(mxs_chan);
 		mxs_dma_disable_chan(mxs_chan);
+		mxs_dma_reset_chan(mxs_chan);
 		break;
 		break;
 	case DMA_PAUSE:
 	case DMA_PAUSE:
 		mxs_dma_pause_chan(mxs_chan);
 		mxs_dma_pause_chan(mxs_chan);
@@ -707,6 +710,8 @@ static struct platform_device_id mxs_dma_type[] = {
 	}, {
 	}, {
 		.name = "mxs-dma-apbx",
 		.name = "mxs-dma-apbx",
 		.driver_data = MXS_DMA_APBX,
 		.driver_data = MXS_DMA_APBX,
+	}, {
+		/* end of list */
 	}
 	}
 };
 };
 
 

+ 94 - 33
drivers/dma/pch_dma.c

@@ -45,7 +45,8 @@
 #define DMA_STATUS_MASK_BITS		0x3
 #define DMA_STATUS_MASK_BITS		0x3
 #define DMA_STATUS_SHIFT_BITS		16
 #define DMA_STATUS_SHIFT_BITS		16
 #define DMA_STATUS_IRQ(x)		(0x1 << (x))
 #define DMA_STATUS_IRQ(x)		(0x1 << (x))
-#define DMA_STATUS_ERR(x)		(0x1 << ((x) + 8))
+#define DMA_STATUS0_ERR(x)		(0x1 << ((x) + 8))
+#define DMA_STATUS2_ERR(x)		(0x1 << (x))
 
 
 #define DMA_DESC_WIDTH_SHIFT_BITS	12
 #define DMA_DESC_WIDTH_SHIFT_BITS	12
 #define DMA_DESC_WIDTH_1_BYTE		(0x3 << DMA_DESC_WIDTH_SHIFT_BITS)
 #define DMA_DESC_WIDTH_1_BYTE		(0x3 << DMA_DESC_WIDTH_SHIFT_BITS)
@@ -61,6 +62,9 @@
 
 
 #define MAX_CHAN_NR			8
 #define MAX_CHAN_NR			8
 
 
+#define DMA_MASK_CTL0_MODE	0x33333333
+#define DMA_MASK_CTL2_MODE	0x00003333
+
 static unsigned int init_nr_desc_per_channel = 64;
 static unsigned int init_nr_desc_per_channel = 64;
 module_param(init_nr_desc_per_channel, uint, 0644);
 module_param(init_nr_desc_per_channel, uint, 0644);
 MODULE_PARM_DESC(init_nr_desc_per_channel,
 MODULE_PARM_DESC(init_nr_desc_per_channel,
@@ -133,6 +137,7 @@ struct pch_dma {
 #define PCH_DMA_CTL3	0x0C
 #define PCH_DMA_CTL3	0x0C
 #define PCH_DMA_STS0	0x10
 #define PCH_DMA_STS0	0x10
 #define PCH_DMA_STS1	0x14
 #define PCH_DMA_STS1	0x14
+#define PCH_DMA_STS2	0x18
 
 
 #define dma_readl(pd, name) \
 #define dma_readl(pd, name) \
 	readl((pd)->membase + PCH_DMA_##name)
 	readl((pd)->membase + PCH_DMA_##name)
@@ -183,13 +188,19 @@ static void pdc_enable_irq(struct dma_chan *chan, int enable)
 {
 {
 	struct pch_dma *pd = to_pd(chan->device);
 	struct pch_dma *pd = to_pd(chan->device);
 	u32 val;
 	u32 val;
+	int pos;
+
+	if (chan->chan_id < 8)
+		pos = chan->chan_id;
+	else
+		pos = chan->chan_id + 8;
 
 
 	val = dma_readl(pd, CTL2);
 	val = dma_readl(pd, CTL2);
 
 
 	if (enable)
 	if (enable)
-		val |= 0x1 << chan->chan_id;
+		val |= 0x1 << pos;
 	else
 	else
-		val &= ~(0x1 << chan->chan_id);
+		val &= ~(0x1 << pos);
 
 
 	dma_writel(pd, CTL2, val);
 	dma_writel(pd, CTL2, val);
 
 
@@ -202,10 +213,17 @@ static void pdc_set_dir(struct dma_chan *chan)
 	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
 	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
 	struct pch_dma *pd = to_pd(chan->device);
 	struct pch_dma *pd = to_pd(chan->device);
 	u32 val;
 	u32 val;
+	u32 mask_mode;
+	u32 mask_ctl;
 
 
 	if (chan->chan_id < 8) {
 	if (chan->chan_id < 8) {
 		val = dma_readl(pd, CTL0);
 		val = dma_readl(pd, CTL0);
 
 
+		mask_mode = DMA_CTL0_MODE_MASK_BITS <<
+					(DMA_CTL0_BITS_PER_CH * chan->chan_id);
+		mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+				       (DMA_CTL0_BITS_PER_CH * chan->chan_id));
+		val &= mask_mode;
 		if (pd_chan->dir == DMA_TO_DEVICE)
 		if (pd_chan->dir == DMA_TO_DEVICE)
 			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
 			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
 				       DMA_CTL0_DIR_SHIFT_BITS);
 				       DMA_CTL0_DIR_SHIFT_BITS);
@@ -213,18 +231,24 @@ static void pdc_set_dir(struct dma_chan *chan)
 			val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
 			val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
 					 DMA_CTL0_DIR_SHIFT_BITS));
 					 DMA_CTL0_DIR_SHIFT_BITS));
 
 
+		val |= mask_ctl;
 		dma_writel(pd, CTL0, val);
 		dma_writel(pd, CTL0, val);
 	} else {
 	} else {
 		int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
 		int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
 		val = dma_readl(pd, CTL3);
 		val = dma_readl(pd, CTL3);
 
 
+		mask_mode = DMA_CTL0_MODE_MASK_BITS <<
+						(DMA_CTL0_BITS_PER_CH * ch);
+		mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+						 (DMA_CTL0_BITS_PER_CH * ch));
+		val &= mask_mode;
 		if (pd_chan->dir == DMA_TO_DEVICE)
 		if (pd_chan->dir == DMA_TO_DEVICE)
 			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch +
 			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch +
 				       DMA_CTL0_DIR_SHIFT_BITS);
 				       DMA_CTL0_DIR_SHIFT_BITS);
 		else
 		else
 			val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch +
 			val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch +
 					 DMA_CTL0_DIR_SHIFT_BITS));
 					 DMA_CTL0_DIR_SHIFT_BITS));
-
+		val |= mask_ctl;
 		dma_writel(pd, CTL3, val);
 		dma_writel(pd, CTL3, val);
 	}
 	}
 
 
@@ -236,33 +260,37 @@ static void pdc_set_mode(struct dma_chan *chan, u32 mode)
 {
 {
 	struct pch_dma *pd = to_pd(chan->device);
 	struct pch_dma *pd = to_pd(chan->device);
 	u32 val;
 	u32 val;
+	u32 mask_ctl;
+	u32 mask_dir;
 
 
 	if (chan->chan_id < 8) {
 	if (chan->chan_id < 8) {
+		mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+			   (DMA_CTL0_BITS_PER_CH * chan->chan_id));
+		mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +\
+				 DMA_CTL0_DIR_SHIFT_BITS);
 		val = dma_readl(pd, CTL0);
 		val = dma_readl(pd, CTL0);
-
-		val &= ~(DMA_CTL0_MODE_MASK_BITS <<
-			(DMA_CTL0_BITS_PER_CH * chan->chan_id));
+		val &= mask_dir;
 		val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id);
 		val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id);
-
+		val |= mask_ctl;
 		dma_writel(pd, CTL0, val);
 		dma_writel(pd, CTL0, val);
 	} else {
 	} else {
 		int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
 		int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
-
+		mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+						 (DMA_CTL0_BITS_PER_CH * ch));
+		mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * ch +\
+				 DMA_CTL0_DIR_SHIFT_BITS);
 		val = dma_readl(pd, CTL3);
 		val = dma_readl(pd, CTL3);
-
-		val &= ~(DMA_CTL0_MODE_MASK_BITS <<
-			(DMA_CTL0_BITS_PER_CH * ch));
+		val &= mask_dir;
 		val |= mode << (DMA_CTL0_BITS_PER_CH * ch);
 		val |= mode << (DMA_CTL0_BITS_PER_CH * ch);
-
+		val |= mask_ctl;
 		dma_writel(pd, CTL3, val);
 		dma_writel(pd, CTL3, val);
-
 	}
 	}
 
 
 	dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n",
 	dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n",
 		chan->chan_id, val);
 		chan->chan_id, val);
 }
 }
 
 
-static u32 pdc_get_status(struct pch_dma_chan *pd_chan)
+static u32 pdc_get_status0(struct pch_dma_chan *pd_chan)
 {
 {
 	struct pch_dma *pd = to_pd(pd_chan->chan.device);
 	struct pch_dma *pd = to_pd(pd_chan->chan.device);
 	u32 val;
 	u32 val;
@@ -272,9 +300,27 @@ static u32 pdc_get_status(struct pch_dma_chan *pd_chan)
 			DMA_STATUS_BITS_PER_CH * pd_chan->chan.chan_id));
 			DMA_STATUS_BITS_PER_CH * pd_chan->chan.chan_id));
 }
 }
 
 
+static u32 pdc_get_status2(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma *pd = to_pd(pd_chan->chan.device);
+	u32 val;
+
+	val = dma_readl(pd, STS2);
+	return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS +
+			DMA_STATUS_BITS_PER_CH * (pd_chan->chan.chan_id - 8)));
+}
+
 static bool pdc_is_idle(struct pch_dma_chan *pd_chan)
 static bool pdc_is_idle(struct pch_dma_chan *pd_chan)
 {
 {
-	if (pdc_get_status(pd_chan) == DMA_STATUS_IDLE)
+	u32 sts;
+
+	if (pd_chan->chan.chan_id < 8)
+		sts = pdc_get_status0(pd_chan);
+	else
+		sts = pdc_get_status2(pd_chan);
+
+
+	if (sts == DMA_STATUS_IDLE)
 		return true;
 		return true;
 	else
 	else
 		return false;
 		return false;
@@ -495,11 +541,11 @@ static int pd_alloc_chan_resources(struct dma_chan *chan)
 		list_add_tail(&desc->desc_node, &tmp_list);
 		list_add_tail(&desc->desc_node, &tmp_list);
 	}
 	}
 
 
-	spin_lock_bh(&pd_chan->lock);
+	spin_lock_irq(&pd_chan->lock);
 	list_splice(&tmp_list, &pd_chan->free_list);
 	list_splice(&tmp_list, &pd_chan->free_list);
 	pd_chan->descs_allocated = i;
 	pd_chan->descs_allocated = i;
 	pd_chan->completed_cookie = chan->cookie = 1;
 	pd_chan->completed_cookie = chan->cookie = 1;
-	spin_unlock_bh(&pd_chan->lock);
+	spin_unlock_irq(&pd_chan->lock);
 
 
 	pdc_enable_irq(chan, 1);
 	pdc_enable_irq(chan, 1);
 
 
@@ -517,10 +563,10 @@ static void pd_free_chan_resources(struct dma_chan *chan)
 	BUG_ON(!list_empty(&pd_chan->active_list));
 	BUG_ON(!list_empty(&pd_chan->active_list));
 	BUG_ON(!list_empty(&pd_chan->queue));
 	BUG_ON(!list_empty(&pd_chan->queue));
 
 
-	spin_lock_bh(&pd_chan->lock);
+	spin_lock_irq(&pd_chan->lock);
 	list_splice_init(&pd_chan->free_list, &tmp_list);
 	list_splice_init(&pd_chan->free_list, &tmp_list);
 	pd_chan->descs_allocated = 0;
 	pd_chan->descs_allocated = 0;
-	spin_unlock_bh(&pd_chan->lock);
+	spin_unlock_irq(&pd_chan->lock);
 
 
 	list_for_each_entry_safe(desc, _d, &tmp_list, desc_node)
 	list_for_each_entry_safe(desc, _d, &tmp_list, desc_node)
 		pci_pool_free(pd->pool, desc, desc->txd.phys);
 		pci_pool_free(pd->pool, desc, desc->txd.phys);
@@ -536,10 +582,10 @@ static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	dma_cookie_t last_completed;
 	dma_cookie_t last_completed;
 	int ret;
 	int ret;
 
 
-	spin_lock_bh(&pd_chan->lock);
+	spin_lock_irq(&pd_chan->lock);
 	last_completed = pd_chan->completed_cookie;
 	last_completed = pd_chan->completed_cookie;
 	last_used = chan->cookie;
 	last_used = chan->cookie;
-	spin_unlock_bh(&pd_chan->lock);
+	spin_unlock_irq(&pd_chan->lock);
 
 
 	ret = dma_async_is_complete(cookie, last_completed, last_used);
 	ret = dma_async_is_complete(cookie, last_completed, last_used);
 
 
@@ -654,7 +700,7 @@ static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	if (cmd != DMA_TERMINATE_ALL)
 	if (cmd != DMA_TERMINATE_ALL)
 		return -ENXIO;
 		return -ENXIO;
 
 
-	spin_lock_bh(&pd_chan->lock);
+	spin_lock_irq(&pd_chan->lock);
 
 
 	pdc_set_mode(&pd_chan->chan, DMA_CTL0_DISABLE);
 	pdc_set_mode(&pd_chan->chan, DMA_CTL0_DISABLE);
 
 
@@ -664,7 +710,7 @@ static int pd_device_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	list_for_each_entry_safe(desc, _d, &list, desc_node)
 	list_for_each_entry_safe(desc, _d, &list, desc_node)
 		pdc_chain_complete(pd_chan, desc);
 		pdc_chain_complete(pd_chan, desc);
 
 
-	spin_unlock_bh(&pd_chan->lock);
+	spin_unlock_irq(&pd_chan->lock);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -693,30 +739,45 @@ static irqreturn_t pd_irq(int irq, void *devid)
 	struct pch_dma *pd = (struct pch_dma *)devid;
 	struct pch_dma *pd = (struct pch_dma *)devid;
 	struct pch_dma_chan *pd_chan;
 	struct pch_dma_chan *pd_chan;
 	u32 sts0;
 	u32 sts0;
+	u32 sts2;
 	int i;
 	int i;
-	int ret = IRQ_NONE;
+	int ret0 = IRQ_NONE;
+	int ret2 = IRQ_NONE;
 
 
 	sts0 = dma_readl(pd, STS0);
 	sts0 = dma_readl(pd, STS0);
+	sts2 = dma_readl(pd, STS2);
 
 
 	dev_dbg(pd->dma.dev, "pd_irq sts0: %x\n", sts0);
 	dev_dbg(pd->dma.dev, "pd_irq sts0: %x\n", sts0);
 
 
 	for (i = 0; i < pd->dma.chancnt; i++) {
 	for (i = 0; i < pd->dma.chancnt; i++) {
 		pd_chan = &pd->channels[i];
 		pd_chan = &pd->channels[i];
 
 
-		if (sts0 & DMA_STATUS_IRQ(i)) {
-			if (sts0 & DMA_STATUS_ERR(i))
-				set_bit(0, &pd_chan->err_status);
+		if (i < 8) {
+			if (sts0 & DMA_STATUS_IRQ(i)) {
+				if (sts0 & DMA_STATUS0_ERR(i))
+					set_bit(0, &pd_chan->err_status);
 
 
-			tasklet_schedule(&pd_chan->tasklet);
-			ret = IRQ_HANDLED;
-		}
+				tasklet_schedule(&pd_chan->tasklet);
+				ret0 = IRQ_HANDLED;
+			}
+		} else {
+			if (sts2 & DMA_STATUS_IRQ(i - 8)) {
+				if (sts2 & DMA_STATUS2_ERR(i))
+					set_bit(0, &pd_chan->err_status);
 
 
+				tasklet_schedule(&pd_chan->tasklet);
+				ret2 = IRQ_HANDLED;
+			}
+		}
 	}
 	}
 
 
 	/* clear interrupt bits in status register */
 	/* clear interrupt bits in status register */
-	dma_writel(pd, STS0, sts0);
+	if (ret0)
+		dma_writel(pd, STS0, sts0);
+	if (ret2)
+		dma_writel(pd, STS2, sts2);
 
 
-	return ret;
+	return ret0 | ret2;
 }
 }
 
 
 #ifdef	CONFIG_PM
 #ifdef	CONFIG_PM

+ 36 - 28
drivers/dma/pl330.c

@@ -82,7 +82,7 @@ struct dma_pl330_dmac {
 	spinlock_t pool_lock;
 	spinlock_t pool_lock;
 
 
 	/* Peripheral channels connected to this DMAC */
 	/* Peripheral channels connected to this DMAC */
-	struct dma_pl330_chan peripherals[0]; /* keep at end */
+	struct dma_pl330_chan *peripherals; /* keep at end */
 };
 };
 
 
 struct dma_pl330_desc {
 struct dma_pl330_desc {
@@ -451,8 +451,13 @@ static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch)
 	desc->txd.cookie = 0;
 	desc->txd.cookie = 0;
 	async_tx_ack(&desc->txd);
 	async_tx_ack(&desc->txd);
 
 
-	desc->req.rqtype = peri->rqtype;
-	desc->req.peri = peri->peri_id;
+	if (peri) {
+		desc->req.rqtype = peri->rqtype;
+		desc->req.peri = peri->peri_id;
+	} else {
+		desc->req.rqtype = MEMTOMEM;
+		desc->req.peri = 0;
+	}
 
 
 	dma_async_tx_descriptor_init(&desc->txd, &pch->chan);
 	dma_async_tx_descriptor_init(&desc->txd, &pch->chan);
 
 
@@ -529,10 +534,10 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
 	struct pl330_info *pi;
 	struct pl330_info *pi;
 	int burst;
 	int burst;
 
 
-	if (unlikely(!pch || !len || !peri))
+	if (unlikely(!pch || !len))
 		return NULL;
 		return NULL;
 
 
-	if (peri->rqtype != MEMTOMEM)
+	if (peri && peri->rqtype != MEMTOMEM)
 		return NULL;
 		return NULL;
 
 
 	pi = &pch->dmac->pif;
 	pi = &pch->dmac->pif;
@@ -577,7 +582,7 @@ pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	int i, burst_size;
 	int i, burst_size;
 	dma_addr_t addr;
 	dma_addr_t addr;
 
 
-	if (unlikely(!pch || !sgl || !sg_len))
+	if (unlikely(!pch || !sgl || !sg_len || !peri))
 		return NULL;
 		return NULL;
 
 
 	/* Make sure the direction is consistent */
 	/* Make sure the direction is consistent */
@@ -666,17 +671,12 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	struct dma_device *pd;
 	struct dma_device *pd;
 	struct resource *res;
 	struct resource *res;
 	int i, ret, irq;
 	int i, ret, irq;
+	int num_chan;
 
 
 	pdat = adev->dev.platform_data;
 	pdat = adev->dev.platform_data;
 
 
-	if (!pdat || !pdat->nr_valid_peri) {
-		dev_err(&adev->dev, "platform data missing\n");
-		return -ENODEV;
-	}
-
 	/* Allocate a new DMAC and its Channels */
 	/* Allocate a new DMAC and its Channels */
-	pdmac = kzalloc(pdat->nr_valid_peri * sizeof(*pch)
-				+ sizeof(*pdmac), GFP_KERNEL);
+	pdmac = kzalloc(sizeof(*pdmac), GFP_KERNEL);
 	if (!pdmac) {
 	if (!pdmac) {
 		dev_err(&adev->dev, "unable to allocate mem\n");
 		dev_err(&adev->dev, "unable to allocate mem\n");
 		return -ENOMEM;
 		return -ENOMEM;
@@ -685,7 +685,7 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	pi = &pdmac->pif;
 	pi = &pdmac->pif;
 	pi->dev = &adev->dev;
 	pi->dev = &adev->dev;
 	pi->pl330_data = NULL;
 	pi->pl330_data = NULL;
-	pi->mcbufsz = pdat->mcbuf_sz;
+	pi->mcbufsz = pdat ? pdat->mcbuf_sz : 0;
 
 
 	res = &adev->res;
 	res = &adev->res;
 	request_mem_region(res->start, resource_size(res), "dma-pl330");
 	request_mem_region(res->start, resource_size(res), "dma-pl330");
@@ -717,27 +717,35 @@ pl330_probe(struct amba_device *adev, const struct amba_id *id)
 	INIT_LIST_HEAD(&pd->channels);
 	INIT_LIST_HEAD(&pd->channels);
 
 
 	/* Initialize channel parameters */
 	/* Initialize channel parameters */
-	for (i = 0; i < pdat->nr_valid_peri; i++) {
-		struct dma_pl330_peri *peri = &pdat->peri[i];
-		pch = &pdmac->peripherals[i];
+	num_chan = max(pdat ? pdat->nr_valid_peri : 0, (u8)pi->pcfg.num_chan);
+	pdmac->peripherals = kzalloc(num_chan * sizeof(*pch), GFP_KERNEL);
 
 
-		switch (peri->rqtype) {
-		case MEMTOMEM:
+	for (i = 0; i < num_chan; i++) {
+		pch = &pdmac->peripherals[i];
+		if (pdat) {
+			struct dma_pl330_peri *peri = &pdat->peri[i];
+
+			switch (peri->rqtype) {
+			case MEMTOMEM:
+				dma_cap_set(DMA_MEMCPY, pd->cap_mask);
+				break;
+			case MEMTODEV:
+			case DEVTOMEM:
+				dma_cap_set(DMA_SLAVE, pd->cap_mask);
+				break;
+			default:
+				dev_err(&adev->dev, "DEVTODEV Not Supported\n");
+				continue;
+			}
+			pch->chan.private = peri;
+		} else {
 			dma_cap_set(DMA_MEMCPY, pd->cap_mask);
 			dma_cap_set(DMA_MEMCPY, pd->cap_mask);
-			break;
-		case MEMTODEV:
-		case DEVTOMEM:
-			dma_cap_set(DMA_SLAVE, pd->cap_mask);
-			break;
-		default:
-			dev_err(&adev->dev, "DEVTODEV Not Supported\n");
-			continue;
+			pch->chan.private = NULL;
 		}
 		}
 
 
 		INIT_LIST_HEAD(&pch->work_list);
 		INIT_LIST_HEAD(&pch->work_list);
 		spin_lock_init(&pch->lock);
 		spin_lock_init(&pch->lock);
 		pch->pl330_chid = NULL;
 		pch->pl330_chid = NULL;
-		pch->chan.private = peri;
 		pch->chan.device = pd;
 		pch->chan.device = pd;
 		pch->chan.chan_id = i;
 		pch->chan.chan_id = i;
 		pch->dmac = pdmac;
 		pch->dmac = pdmac;

+ 155 - 115
drivers/dma/ste_dma40.c

@@ -14,6 +14,7 @@
 #include <linux/clk.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
 #include <linux/err.h>
 #include <linux/err.h>
+#include <linux/amba/bus.h>
 
 
 #include <plat/ste_dma40.h>
 #include <plat/ste_dma40.h>
 
 
@@ -45,9 +46,6 @@
 #define D40_ALLOC_PHY		(1 << 30)
 #define D40_ALLOC_PHY		(1 << 30)
 #define D40_ALLOC_LOG_FREE	0
 #define D40_ALLOC_LOG_FREE	0
 
 
-/* Hardware designer of the block */
-#define D40_HW_DESIGNER 0x8
-
 /**
 /**
  * enum 40_command - The different commands and/or statuses.
  * enum 40_command - The different commands and/or statuses.
  *
  *
@@ -186,6 +184,8 @@ struct d40_base;
  * @log_def: Default logical channel settings.
  * @log_def: Default logical channel settings.
  * @lcla: Space for one dst src pair for logical channel transfers.
  * @lcla: Space for one dst src pair for logical channel transfers.
  * @lcpa: Pointer to dst and src lcpa settings.
  * @lcpa: Pointer to dst and src lcpa settings.
+ * @runtime_addr: runtime configured address.
+ * @runtime_direction: runtime configured direction.
  *
  *
  * This struct can either "be" a logical or a physical channel.
  * This struct can either "be" a logical or a physical channel.
  */
  */
@@ -200,6 +200,7 @@ struct d40_chan {
 	struct dma_chan			 chan;
 	struct dma_chan			 chan;
 	struct tasklet_struct		 tasklet;
 	struct tasklet_struct		 tasklet;
 	struct list_head		 client;
 	struct list_head		 client;
+	struct list_head		 pending_queue;
 	struct list_head		 active;
 	struct list_head		 active;
 	struct list_head		 queue;
 	struct list_head		 queue;
 	struct stedma40_chan_cfg	 dma_cfg;
 	struct stedma40_chan_cfg	 dma_cfg;
@@ -645,7 +646,20 @@ static struct d40_desc *d40_first_active_get(struct d40_chan *d40c)
 
 
 static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc)
 static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc)
 {
 {
-	list_add_tail(&desc->node, &d40c->queue);
+	list_add_tail(&desc->node, &d40c->pending_queue);
+}
+
+static struct d40_desc *d40_first_pending(struct d40_chan *d40c)
+{
+	struct d40_desc *d;
+
+	if (list_empty(&d40c->pending_queue))
+		return NULL;
+
+	d = list_first_entry(&d40c->pending_queue,
+			     struct d40_desc,
+			     node);
+	return d;
 }
 }
 
 
 static struct d40_desc *d40_first_queued(struct d40_chan *d40c)
 static struct d40_desc *d40_first_queued(struct d40_chan *d40c)
@@ -802,6 +816,11 @@ static void d40_term_all(struct d40_chan *d40c)
 		d40_desc_free(d40c, d40d);
 		d40_desc_free(d40c, d40d);
 	}
 	}
 
 
+	/* Release pending descriptors */
+	while ((d40d = d40_first_pending(d40c))) {
+		d40_desc_remove(d40d);
+		d40_desc_free(d40c, d40d);
+	}
 
 
 	d40c->pending_tx = 0;
 	d40c->pending_tx = 0;
 	d40c->busy = false;
 	d40c->busy = false;
@@ -2092,7 +2111,7 @@ dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
 	struct scatterlist *sg;
 	struct scatterlist *sg;
 	int i;
 	int i;
 
 
-	sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_KERNEL);
+	sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_NOWAIT);
 	for (i = 0; i < periods; i++) {
 	for (i = 0; i < periods; i++) {
 		sg_dma_address(&sg[i]) = dma_addr;
 		sg_dma_address(&sg[i]) = dma_addr;
 		sg_dma_len(&sg[i]) = period_len;
 		sg_dma_len(&sg[i]) = period_len;
@@ -2152,24 +2171,87 @@ static void d40_issue_pending(struct dma_chan *chan)
 
 
 	spin_lock_irqsave(&d40c->lock, flags);
 	spin_lock_irqsave(&d40c->lock, flags);
 
 
-	/* Busy means that pending jobs are already being processed */
+	list_splice_tail_init(&d40c->pending_queue, &d40c->queue);
+
+	/* Busy means that queued jobs are already being processed */
 	if (!d40c->busy)
 	if (!d40c->busy)
 		(void) d40_queue_start(d40c);
 		(void) d40_queue_start(d40c);
 
 
 	spin_unlock_irqrestore(&d40c->lock, flags);
 	spin_unlock_irqrestore(&d40c->lock, flags);
 }
 }
 
 
+static int
+dma40_config_to_halfchannel(struct d40_chan *d40c,
+			    struct stedma40_half_channel_info *info,
+			    enum dma_slave_buswidth width,
+			    u32 maxburst)
+{
+	enum stedma40_periph_data_width addr_width;
+	int psize;
+
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		addr_width = STEDMA40_BYTE_WIDTH;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		addr_width = STEDMA40_HALFWORD_WIDTH;
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		addr_width = STEDMA40_WORD_WIDTH;
+		break;
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		addr_width = STEDMA40_DOUBLEWORD_WIDTH;
+		break;
+	default:
+		dev_err(d40c->base->dev,
+			"illegal peripheral address width "
+			"requested (%d)\n",
+			width);
+		return -EINVAL;
+	}
+
+	if (chan_is_logical(d40c)) {
+		if (maxburst >= 16)
+			psize = STEDMA40_PSIZE_LOG_16;
+		else if (maxburst >= 8)
+			psize = STEDMA40_PSIZE_LOG_8;
+		else if (maxburst >= 4)
+			psize = STEDMA40_PSIZE_LOG_4;
+		else
+			psize = STEDMA40_PSIZE_LOG_1;
+	} else {
+		if (maxburst >= 16)
+			psize = STEDMA40_PSIZE_PHY_16;
+		else if (maxburst >= 8)
+			psize = STEDMA40_PSIZE_PHY_8;
+		else if (maxburst >= 4)
+			psize = STEDMA40_PSIZE_PHY_4;
+		else
+			psize = STEDMA40_PSIZE_PHY_1;
+	}
+
+	info->data_width = addr_width;
+	info->psize = psize;
+	info->flow_ctrl = STEDMA40_NO_FLOW_CTRL;
+
+	return 0;
+}
+
 /* Runtime reconfiguration extension */
 /* Runtime reconfiguration extension */
-static void d40_set_runtime_config(struct dma_chan *chan,
-			       struct dma_slave_config *config)
+static int d40_set_runtime_config(struct dma_chan *chan,
+				  struct dma_slave_config *config)
 {
 {
 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
 	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
 	struct stedma40_chan_cfg *cfg = &d40c->dma_cfg;
 	struct stedma40_chan_cfg *cfg = &d40c->dma_cfg;
-	enum dma_slave_buswidth config_addr_width;
+	enum dma_slave_buswidth src_addr_width, dst_addr_width;
 	dma_addr_t config_addr;
 	dma_addr_t config_addr;
-	u32 config_maxburst;
-	enum stedma40_periph_data_width addr_width;
-	int psize;
+	u32 src_maxburst, dst_maxburst;
+	int ret;
+
+	src_addr_width = config->src_addr_width;
+	src_maxburst = config->src_maxburst;
+	dst_addr_width = config->dst_addr_width;
+	dst_maxburst = config->dst_maxburst;
 
 
 	if (config->direction == DMA_FROM_DEVICE) {
 	if (config->direction == DMA_FROM_DEVICE) {
 		dma_addr_t dev_addr_rx =
 		dma_addr_t dev_addr_rx =
@@ -2188,8 +2270,11 @@ static void d40_set_runtime_config(struct dma_chan *chan,
 				cfg->dir);
 				cfg->dir);
 		cfg->dir = STEDMA40_PERIPH_TO_MEM;
 		cfg->dir = STEDMA40_PERIPH_TO_MEM;
 
 
-		config_addr_width = config->src_addr_width;
-		config_maxburst = config->src_maxburst;
+		/* Configure the memory side */
+		if (dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+			dst_addr_width = src_addr_width;
+		if (dst_maxburst == 0)
+			dst_maxburst = src_maxburst;
 
 
 	} else if (config->direction == DMA_TO_DEVICE) {
 	} else if (config->direction == DMA_TO_DEVICE) {
 		dma_addr_t dev_addr_tx =
 		dma_addr_t dev_addr_tx =
@@ -2208,68 +2293,39 @@ static void d40_set_runtime_config(struct dma_chan *chan,
 				cfg->dir);
 				cfg->dir);
 		cfg->dir = STEDMA40_MEM_TO_PERIPH;
 		cfg->dir = STEDMA40_MEM_TO_PERIPH;
 
 
-		config_addr_width = config->dst_addr_width;
-		config_maxburst = config->dst_maxburst;
-
+		/* Configure the memory side */
+		if (src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+			src_addr_width = dst_addr_width;
+		if (src_maxburst == 0)
+			src_maxburst = dst_maxburst;
 	} else {
 	} else {
 		dev_err(d40c->base->dev,
 		dev_err(d40c->base->dev,
 			"unrecognized channel direction %d\n",
 			"unrecognized channel direction %d\n",
 			config->direction);
 			config->direction);
-		return;
+		return -EINVAL;
 	}
 	}
 
 
-	switch (config_addr_width) {
-	case DMA_SLAVE_BUSWIDTH_1_BYTE:
-		addr_width = STEDMA40_BYTE_WIDTH;
-		break;
-	case DMA_SLAVE_BUSWIDTH_2_BYTES:
-		addr_width = STEDMA40_HALFWORD_WIDTH;
-		break;
-	case DMA_SLAVE_BUSWIDTH_4_BYTES:
-		addr_width = STEDMA40_WORD_WIDTH;
-		break;
-	case DMA_SLAVE_BUSWIDTH_8_BYTES:
-		addr_width = STEDMA40_DOUBLEWORD_WIDTH;
-		break;
-	default:
+	if (src_maxburst * src_addr_width != dst_maxburst * dst_addr_width) {
 		dev_err(d40c->base->dev,
 		dev_err(d40c->base->dev,
-			"illegal peripheral address width "
-			"requested (%d)\n",
-			config->src_addr_width);
-		return;
+			"src/dst width/maxburst mismatch: %d*%d != %d*%d\n",
+			src_maxburst,
+			src_addr_width,
+			dst_maxburst,
+			dst_addr_width);
+		return -EINVAL;
 	}
 	}
 
 
-	if (chan_is_logical(d40c)) {
-		if (config_maxburst >= 16)
-			psize = STEDMA40_PSIZE_LOG_16;
-		else if (config_maxburst >= 8)
-			psize = STEDMA40_PSIZE_LOG_8;
-		else if (config_maxburst >= 4)
-			psize = STEDMA40_PSIZE_LOG_4;
-		else
-			psize = STEDMA40_PSIZE_LOG_1;
-	} else {
-		if (config_maxburst >= 16)
-			psize = STEDMA40_PSIZE_PHY_16;
-		else if (config_maxburst >= 8)
-			psize = STEDMA40_PSIZE_PHY_8;
-		else if (config_maxburst >= 4)
-			psize = STEDMA40_PSIZE_PHY_4;
-		else if (config_maxburst >= 2)
-			psize = STEDMA40_PSIZE_PHY_2;
-		else
-			psize = STEDMA40_PSIZE_PHY_1;
-	}
+	ret = dma40_config_to_halfchannel(d40c, &cfg->src_info,
+					  src_addr_width,
+					  src_maxburst);
+	if (ret)
+		return ret;
 
 
-	/* Set up all the endpoint configs */
-	cfg->src_info.data_width = addr_width;
-	cfg->src_info.psize = psize;
-	cfg->src_info.big_endian = false;
-	cfg->src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL;
-	cfg->dst_info.data_width = addr_width;
-	cfg->dst_info.psize = psize;
-	cfg->dst_info.big_endian = false;
-	cfg->dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL;
+	ret = dma40_config_to_halfchannel(d40c, &cfg->dst_info,
+					  dst_addr_width,
+					  dst_maxburst);
+	if (ret)
+		return ret;
 
 
 	/* Fill in register values */
 	/* Fill in register values */
 	if (chan_is_logical(d40c))
 	if (chan_is_logical(d40c))
@@ -2282,12 +2338,14 @@ static void d40_set_runtime_config(struct dma_chan *chan,
 	d40c->runtime_addr = config_addr;
 	d40c->runtime_addr = config_addr;
 	d40c->runtime_direction = config->direction;
 	d40c->runtime_direction = config->direction;
 	dev_dbg(d40c->base->dev,
 	dev_dbg(d40c->base->dev,
-		"configured channel %s for %s, data width %d, "
-		"maxburst %d bytes, LE, no flow control\n",
+		"configured channel %s for %s, data width %d/%d, "
+		"maxburst %d/%d elements, LE, no flow control\n",
 		dma_chan_name(chan),
 		dma_chan_name(chan),
 		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
 		(config->direction == DMA_FROM_DEVICE) ? "RX" : "TX",
-		config_addr_width,
-		config_maxburst);
+		src_addr_width, dst_addr_width,
+		src_maxburst, dst_maxburst);
+
+	return 0;
 }
 }
 
 
 static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
@@ -2308,9 +2366,8 @@ static int d40_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
 	case DMA_RESUME:
 	case DMA_RESUME:
 		return d40_resume(d40c);
 		return d40_resume(d40c);
 	case DMA_SLAVE_CONFIG:
 	case DMA_SLAVE_CONFIG:
-		d40_set_runtime_config(chan,
+		return d40_set_runtime_config(chan,
 			(struct dma_slave_config *) arg);
 			(struct dma_slave_config *) arg);
-		return 0;
 	default:
 	default:
 		break;
 		break;
 	}
 	}
@@ -2341,6 +2398,7 @@ static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma,
 
 
 		INIT_LIST_HEAD(&d40c->active);
 		INIT_LIST_HEAD(&d40c->active);
 		INIT_LIST_HEAD(&d40c->queue);
 		INIT_LIST_HEAD(&d40c->queue);
+		INIT_LIST_HEAD(&d40c->pending_queue);
 		INIT_LIST_HEAD(&d40c->client);
 		INIT_LIST_HEAD(&d40c->client);
 
 
 		tasklet_init(&d40c->tasklet, dma_tasklet,
 		tasklet_init(&d40c->tasklet, dma_tasklet,
@@ -2502,25 +2560,6 @@ static int __init d40_phy_res_init(struct d40_base *base)
 
 
 static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 {
 {
-	static const struct d40_reg_val dma_id_regs[] = {
-		/* Peripheral Id */
-		{ .reg = D40_DREG_PERIPHID0, .val = 0x0040},
-		{ .reg = D40_DREG_PERIPHID1, .val = 0x0000},
-		/*
-		 * D40_DREG_PERIPHID2 Depends on HW revision:
-		 *  DB8500ed has 0x0008,
-		 *  ? has 0x0018,
-		 *  DB8500v1 has 0x0028
-		 *  DB8500v2 has 0x0038
-		 */
-		{ .reg = D40_DREG_PERIPHID3, .val = 0x0000},
-
-		/* PCell Id */
-		{ .reg = D40_DREG_CELLID0, .val = 0x000d},
-		{ .reg = D40_DREG_CELLID1, .val = 0x00f0},
-		{ .reg = D40_DREG_CELLID2, .val = 0x0005},
-		{ .reg = D40_DREG_CELLID3, .val = 0x00b1}
-	};
 	struct stedma40_platform_data *plat_data;
 	struct stedma40_platform_data *plat_data;
 	struct clk *clk = NULL;
 	struct clk *clk = NULL;
 	void __iomem *virtbase = NULL;
 	void __iomem *virtbase = NULL;
@@ -2529,8 +2568,9 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	int num_log_chans = 0;
 	int num_log_chans = 0;
 	int num_phy_chans;
 	int num_phy_chans;
 	int i;
 	int i;
-	u32 val;
-	u32 rev;
+	u32 pid;
+	u32 cid;
+	u8 rev;
 
 
 	clk = clk_get(&pdev->dev, NULL);
 	clk = clk_get(&pdev->dev, NULL);
 
 
@@ -2554,32 +2594,32 @@ static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
 	if (!virtbase)
 	if (!virtbase)
 		goto failure;
 		goto failure;
 
 
-	/* HW version check */
-	for (i = 0; i < ARRAY_SIZE(dma_id_regs); i++) {
-		if (dma_id_regs[i].val !=
-		    readl(virtbase + dma_id_regs[i].reg)) {
-			d40_err(&pdev->dev,
-				"Unknown hardware! Expected 0x%x at 0x%x but got 0x%x\n",
-				dma_id_regs[i].val,
-				dma_id_regs[i].reg,
-				readl(virtbase + dma_id_regs[i].reg));
-			goto failure;
-		}
-	}
+	/* This is just a regular AMBA PrimeCell ID actually */
+	for (pid = 0, i = 0; i < 4; i++)
+		pid |= (readl(virtbase + resource_size(res) - 0x20 + 4 * i)
+			& 255) << (i * 8);
+	for (cid = 0, i = 0; i < 4; i++)
+		cid |= (readl(virtbase + resource_size(res) - 0x10 + 4 * i)
+			& 255) << (i * 8);
 
 
-	/* Get silicon revision and designer */
-	val = readl(virtbase + D40_DREG_PERIPHID2);
-
-	if ((val & D40_DREG_PERIPHID2_DESIGNER_MASK) !=
-	    D40_HW_DESIGNER) {
+	if (cid != AMBA_CID) {
+		d40_err(&pdev->dev, "Unknown hardware! No PrimeCell ID\n");
+		goto failure;
+	}
+	if (AMBA_MANF_BITS(pid) != AMBA_VENDOR_ST) {
 		d40_err(&pdev->dev, "Unknown designer! Got %x wanted %x\n",
 		d40_err(&pdev->dev, "Unknown designer! Got %x wanted %x\n",
-			val & D40_DREG_PERIPHID2_DESIGNER_MASK,
-			D40_HW_DESIGNER);
+			AMBA_MANF_BITS(pid),
+			AMBA_VENDOR_ST);
 		goto failure;
 		goto failure;
 	}
 	}
-
-	rev = (val & D40_DREG_PERIPHID2_REV_MASK) >>
-		D40_DREG_PERIPHID2_REV_POS;
+	/*
+	 * HW revision:
+	 * DB8500ed has revision 0
+	 * ? has revision 1
+	 * DB8500v1 has revision 2
+	 * DB8500v2 has revision 3
+	 */
+	rev = AMBA_REV_BITS(pid);
 
 
 	/* The number of physical channels on this HW */
 	/* The number of physical channels on this HW */
 	num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4;
 	num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4;

+ 0 - 3
drivers/dma/ste_dma40_ll.h

@@ -184,9 +184,6 @@
 #define D40_DREG_PERIPHID0	0xFE0
 #define D40_DREG_PERIPHID0	0xFE0
 #define D40_DREG_PERIPHID1	0xFE4
 #define D40_DREG_PERIPHID1	0xFE4
 #define D40_DREG_PERIPHID2	0xFE8
 #define D40_DREG_PERIPHID2	0xFE8
-#define D40_DREG_PERIPHID2_REV_POS 4
-#define D40_DREG_PERIPHID2_REV_MASK (0xf << D40_DREG_PERIPHID2_REV_POS)
-#define D40_DREG_PERIPHID2_DESIGNER_MASK 0xf
 #define D40_DREG_PERIPHID3	0xFEC
 #define D40_DREG_PERIPHID3	0xFEC
 #define D40_DREG_CELLID0	0xFF0
 #define D40_DREG_CELLID0	0xFF0
 #define D40_DREG_CELLID1	0xFF4
 #define D40_DREG_CELLID1	0xFF4

+ 231 - 12
drivers/firmware/efivars.c

@@ -78,6 +78,7 @@
 #include <linux/kobject.h>
 #include <linux/kobject.h>
 #include <linux/device.h>
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
+#include <linux/pstore.h>
 
 
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 
 
@@ -89,6 +90,8 @@ MODULE_DESCRIPTION("sysfs interface to EFI Variables");
 MODULE_LICENSE("GPL");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(EFIVARS_VERSION);
 MODULE_VERSION(EFIVARS_VERSION);
 
 
+#define DUMP_NAME_LEN 52
+
 /*
 /*
  * The maximum size of VariableName + Data = 1024
  * The maximum size of VariableName + Data = 1024
  * Therefore, it's reasonable to save that much
  * Therefore, it's reasonable to save that much
@@ -119,6 +122,10 @@ struct efivar_attribute {
 	ssize_t (*store)(struct efivar_entry *entry, const char *buf, size_t count);
 	ssize_t (*store)(struct efivar_entry *entry, const char *buf, size_t count);
 };
 };
 
 
+#define PSTORE_EFI_ATTRIBUTES \
+	(EFI_VARIABLE_NON_VOLATILE | \
+	 EFI_VARIABLE_BOOTSERVICE_ACCESS | \
+	 EFI_VARIABLE_RUNTIME_ACCESS)
 
 
 #define EFIVAR_ATTR(_name, _mode, _show, _store) \
 #define EFIVAR_ATTR(_name, _mode, _show, _store) \
 struct efivar_attribute efivar_attr_##_name = { \
 struct efivar_attribute efivar_attr_##_name = { \
@@ -141,38 +148,72 @@ efivar_create_sysfs_entry(struct efivars *efivars,
 
 
 /* Return the number of unicode characters in data */
 /* Return the number of unicode characters in data */
 static unsigned long
 static unsigned long
-utf8_strlen(efi_char16_t *data, unsigned long maxlength)
+utf16_strnlen(efi_char16_t *s, size_t maxlength)
 {
 {
 	unsigned long length = 0;
 	unsigned long length = 0;
 
 
-	while (*data++ != 0 && length < maxlength)
+	while (*s++ != 0 && length < maxlength)
 		length++;
 		length++;
 	return length;
 	return length;
 }
 }
 
 
+static unsigned long
+utf16_strlen(efi_char16_t *s)
+{
+	return utf16_strnlen(s, ~0UL);
+}
+
 /*
 /*
  * Return the number of bytes is the length of this string
  * Return the number of bytes is the length of this string
  * Note: this is NOT the same as the number of unicode characters
  * Note: this is NOT the same as the number of unicode characters
  */
  */
 static inline unsigned long
 static inline unsigned long
-utf8_strsize(efi_char16_t *data, unsigned long maxlength)
+utf16_strsize(efi_char16_t *data, unsigned long maxlength)
 {
 {
-	return utf8_strlen(data, maxlength/sizeof(efi_char16_t)) * sizeof(efi_char16_t);
+	return utf16_strnlen(data, maxlength/sizeof(efi_char16_t)) * sizeof(efi_char16_t);
+}
+
+static inline int
+utf16_strncmp(const efi_char16_t *a, const efi_char16_t *b, size_t len)
+{
+	while (1) {
+		if (len == 0)
+			return 0;
+		if (*a < *b)
+			return -1;
+		if (*a > *b)
+			return 1;
+		if (*a == 0) /* implies *b == 0 */
+			return 0;
+		a++;
+		b++;
+		len--;
+	}
 }
 }
 
 
 static efi_status_t
 static efi_status_t
-get_var_data(struct efivars *efivars, struct efi_variable *var)
+get_var_data_locked(struct efivars *efivars, struct efi_variable *var)
 {
 {
 	efi_status_t status;
 	efi_status_t status;
 
 
-	spin_lock(&efivars->lock);
 	var->DataSize = 1024;
 	var->DataSize = 1024;
 	status = efivars->ops->get_variable(var->VariableName,
 	status = efivars->ops->get_variable(var->VariableName,
 					    &var->VendorGuid,
 					    &var->VendorGuid,
 					    &var->Attributes,
 					    &var->Attributes,
 					    &var->DataSize,
 					    &var->DataSize,
 					    var->Data);
 					    var->Data);
+	return status;
+}
+
+static efi_status_t
+get_var_data(struct efivars *efivars, struct efi_variable *var)
+{
+	efi_status_t status;
+
+	spin_lock(&efivars->lock);
+	status = get_var_data_locked(efivars, var);
 	spin_unlock(&efivars->lock);
 	spin_unlock(&efivars->lock);
+
 	if (status != EFI_SUCCESS) {
 	if (status != EFI_SUCCESS) {
 		printk(KERN_WARNING "efivars: get_variable() failed 0x%lx!\n",
 		printk(KERN_WARNING "efivars: get_variable() failed 0x%lx!\n",
 			status);
 			status);
@@ -387,12 +428,180 @@ static struct kobj_type efivar_ktype = {
 	.default_attrs = def_attrs,
 	.default_attrs = def_attrs,
 };
 };
 
 
+static struct pstore_info efi_pstore_info;
+
 static inline void
 static inline void
 efivar_unregister(struct efivar_entry *var)
 efivar_unregister(struct efivar_entry *var)
 {
 {
 	kobject_put(&var->kobj);
 	kobject_put(&var->kobj);
 }
 }
 
 
+#ifdef CONFIG_PSTORE
+
+static int efi_pstore_open(struct pstore_info *psi)
+{
+	struct efivars *efivars = psi->data;
+
+	spin_lock(&efivars->lock);
+	efivars->walk_entry = list_first_entry(&efivars->list,
+					       struct efivar_entry, list);
+	return 0;
+}
+
+static int efi_pstore_close(struct pstore_info *psi)
+{
+	struct efivars *efivars = psi->data;
+
+	spin_unlock(&efivars->lock);
+	return 0;
+}
+
+static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
+			       struct timespec *timespec, struct pstore_info *psi)
+{
+	efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
+	struct efivars *efivars = psi->data;
+	char name[DUMP_NAME_LEN];
+	int i;
+	unsigned int part, size;
+	unsigned long time;
+
+	while (&efivars->walk_entry->list != &efivars->list) {
+		if (!efi_guidcmp(efivars->walk_entry->var.VendorGuid,
+				 vendor)) {
+			for (i = 0; i < DUMP_NAME_LEN; i++) {
+				name[i] = efivars->walk_entry->var.VariableName[i];
+			}
+			if (sscanf(name, "dump-type%u-%u-%lu", type, &part, &time) == 3) {
+				*id = part;
+				timespec->tv_sec = time;
+				timespec->tv_nsec = 0;
+				get_var_data_locked(efivars, &efivars->walk_entry->var);
+				size = efivars->walk_entry->var.DataSize;
+				memcpy(psi->buf, efivars->walk_entry->var.Data, size);
+				efivars->walk_entry = list_entry(efivars->walk_entry->list.next,
+					           struct efivar_entry, list);
+				return size;
+			}
+		}
+		efivars->walk_entry = list_entry(efivars->walk_entry->list.next,
+						 struct efivar_entry, list);
+	}
+	return 0;
+}
+
+static u64 efi_pstore_write(enum pstore_type_id type, unsigned int part,
+			    size_t size, struct pstore_info *psi)
+{
+	char name[DUMP_NAME_LEN];
+	char stub_name[DUMP_NAME_LEN];
+	efi_char16_t efi_name[DUMP_NAME_LEN];
+	efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
+	struct efivars *efivars = psi->data;
+	struct efivar_entry *entry, *found = NULL;
+	int i;
+
+	sprintf(stub_name, "dump-type%u-%u-", type, part);
+	sprintf(name, "%s%lu", stub_name, get_seconds());
+
+	spin_lock(&efivars->lock);
+
+	for (i = 0; i < DUMP_NAME_LEN; i++)
+		efi_name[i] = stub_name[i];
+
+	/*
+	 * Clean up any entries with the same name
+	 */
+
+	list_for_each_entry(entry, &efivars->list, list) {
+		get_var_data_locked(efivars, &entry->var);
+
+		if (efi_guidcmp(entry->var.VendorGuid, vendor))
+			continue;
+		if (utf16_strncmp(entry->var.VariableName, efi_name,
+				  utf16_strlen(efi_name)))
+			continue;
+		/* Needs to be a prefix */
+		if (entry->var.VariableName[utf16_strlen(efi_name)] == 0)
+			continue;
+
+		/* found */
+		found = entry;
+		efivars->ops->set_variable(entry->var.VariableName,
+					   &entry->var.VendorGuid,
+					   PSTORE_EFI_ATTRIBUTES,
+					   0, NULL);
+	}
+
+	if (found)
+		list_del(&found->list);
+
+	for (i = 0; i < DUMP_NAME_LEN; i++)
+		efi_name[i] = name[i];
+
+	efivars->ops->set_variable(efi_name, &vendor, PSTORE_EFI_ATTRIBUTES,
+				   size, psi->buf);
+
+	spin_unlock(&efivars->lock);
+
+	if (found)
+		efivar_unregister(found);
+
+	if (size)
+		efivar_create_sysfs_entry(efivars,
+					  utf16_strsize(efi_name,
+							DUMP_NAME_LEN * 2),
+					  efi_name, &vendor);
+
+	return part;
+};
+
+static int efi_pstore_erase(enum pstore_type_id type, u64 id,
+			    struct pstore_info *psi)
+{
+	efi_pstore_write(type, id, 0, psi);
+
+	return 0;
+}
+#else
+static int efi_pstore_open(struct pstore_info *psi)
+{
+	return 0;
+}
+
+static int efi_pstore_close(struct pstore_info *psi)
+{
+	return 0;
+}
+
+static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
+			       struct timespec *time, struct pstore_info *psi)
+{
+	return -1;
+}
+
+static u64 efi_pstore_write(enum pstore_type_id type, int part, size_t size,
+			    struct pstore_info *psi)
+{
+	return 0;
+}
+
+static int efi_pstore_erase(enum pstore_type_id type, u64 id,
+			    struct pstore_info *psi)
+{
+	return 0;
+}
+#endif
+
+static struct pstore_info efi_pstore_info = {
+	.owner		= THIS_MODULE,
+	.name		= "efi",
+	.open		= efi_pstore_open,
+	.close		= efi_pstore_close,
+	.read		= efi_pstore_read,
+	.write		= efi_pstore_write,
+	.erase		= efi_pstore_erase,
+};
 
 
 static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
 static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
 			     struct bin_attribute *bin_attr,
 			     struct bin_attribute *bin_attr,
@@ -414,8 +623,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
 	 * Does this variable already exist?
 	 * Does this variable already exist?
 	 */
 	 */
 	list_for_each_entry_safe(search_efivar, n, &efivars->list, list) {
 	list_for_each_entry_safe(search_efivar, n, &efivars->list, list) {
-		strsize1 = utf8_strsize(search_efivar->var.VariableName, 1024);
-		strsize2 = utf8_strsize(new_var->VariableName, 1024);
+		strsize1 = utf16_strsize(search_efivar->var.VariableName, 1024);
+		strsize2 = utf16_strsize(new_var->VariableName, 1024);
 		if (strsize1 == strsize2 &&
 		if (strsize1 == strsize2 &&
 			!memcmp(&(search_efivar->var.VariableName),
 			!memcmp(&(search_efivar->var.VariableName),
 				new_var->VariableName, strsize1) &&
 				new_var->VariableName, strsize1) &&
@@ -447,8 +656,8 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
 
 
 	/* Create the entry in sysfs.  Locking is not required here */
 	/* Create the entry in sysfs.  Locking is not required here */
 	status = efivar_create_sysfs_entry(efivars,
 	status = efivar_create_sysfs_entry(efivars,
-					   utf8_strsize(new_var->VariableName,
-							1024),
+					   utf16_strsize(new_var->VariableName,
+							 1024),
 					   new_var->VariableName,
 					   new_var->VariableName,
 					   &new_var->VendorGuid);
 					   &new_var->VendorGuid);
 	if (status) {
 	if (status) {
@@ -477,8 +686,8 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
 	 * Does this variable already exist?
 	 * Does this variable already exist?
 	 */
 	 */
 	list_for_each_entry_safe(search_efivar, n, &efivars->list, list) {
 	list_for_each_entry_safe(search_efivar, n, &efivars->list, list) {
-		strsize1 = utf8_strsize(search_efivar->var.VariableName, 1024);
-		strsize2 = utf8_strsize(del_var->VariableName, 1024);
+		strsize1 = utf16_strsize(search_efivar->var.VariableName, 1024);
+		strsize2 = utf16_strsize(del_var->VariableName, 1024);
 		if (strsize1 == strsize2 &&
 		if (strsize1 == strsize2 &&
 			!memcmp(&(search_efivar->var.VariableName),
 			!memcmp(&(search_efivar->var.VariableName),
 				del_var->VariableName, strsize1) &&
 				del_var->VariableName, strsize1) &&
@@ -763,6 +972,16 @@ int register_efivars(struct efivars *efivars,
 	if (error)
 	if (error)
 		unregister_efivars(efivars);
 		unregister_efivars(efivars);
 
 
+	efivars->efi_pstore_info = efi_pstore_info;
+
+	efivars->efi_pstore_info.buf = kmalloc(4096, GFP_KERNEL);
+	if (efivars->efi_pstore_info.buf) {
+		efivars->efi_pstore_info.bufsize = 1024;
+		efivars->efi_pstore_info.data = efivars;
+		mutex_init(&efivars->efi_pstore_info.buf_mutex);
+		pstore_register(&efivars->efi_pstore_info);
+	}
+
 out:
 out:
 	kfree(variable_name);
 	kfree(variable_name);
 
 

+ 110 - 80
drivers/regulator/core.c

@@ -20,6 +20,7 @@
 #include <linux/debugfs.h>
 #include <linux/debugfs.h>
 #include <linux/device.h>
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
+#include <linux/async.h>
 #include <linux/err.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
 #include <linux/mutex.h>
 #include <linux/suspend.h>
 #include <linux/suspend.h>
@@ -33,6 +34,8 @@
 
 
 #include "dummy.h"
 #include "dummy.h"
 
 
+#define rdev_crit(rdev, fmt, ...)					\
+	pr_crit("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
 #define rdev_err(rdev, fmt, ...)					\
 #define rdev_err(rdev, fmt, ...)					\
 	pr_err("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
 	pr_err("%s: " fmt, rdev_get_name(rdev), ##__VA_ARGS__)
 #define rdev_warn(rdev, fmt, ...)					\
 #define rdev_warn(rdev, fmt, ...)					\
@@ -78,11 +81,13 @@ struct regulator {
 	char *supply_name;
 	char *supply_name;
 	struct device_attribute dev_attr;
 	struct device_attribute dev_attr;
 	struct regulator_dev *rdev;
 	struct regulator_dev *rdev;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs;
+#endif
 };
 };
 
 
 static int _regulator_is_enabled(struct regulator_dev *rdev);
 static int _regulator_is_enabled(struct regulator_dev *rdev);
-static int _regulator_disable(struct regulator_dev *rdev,
-		struct regulator_dev **supply_rdev_ptr);
+static int _regulator_disable(struct regulator_dev *rdev);
 static int _regulator_get_voltage(struct regulator_dev *rdev);
 static int _regulator_get_voltage(struct regulator_dev *rdev);
 static int _regulator_get_current_limit(struct regulator_dev *rdev);
 static int _regulator_get_current_limit(struct regulator_dev *rdev);
 static unsigned int _regulator_get_mode(struct regulator_dev *rdev);
 static unsigned int _regulator_get_mode(struct regulator_dev *rdev);
@@ -90,6 +95,9 @@ static void _notifier_call_chain(struct regulator_dev *rdev,
 				  unsigned long event, void *data);
 				  unsigned long event, void *data);
 static int _regulator_do_set_voltage(struct regulator_dev *rdev,
 static int _regulator_do_set_voltage(struct regulator_dev *rdev,
 				     int min_uV, int max_uV);
 				     int min_uV, int max_uV);
+static struct regulator *create_regulator(struct regulator_dev *rdev,
+					  struct device *dev,
+					  const char *supply_name);
 
 
 static const char *rdev_get_name(struct regulator_dev *rdev)
 static const char *rdev_get_name(struct regulator_dev *rdev)
 {
 {
@@ -143,8 +151,11 @@ static int regulator_check_voltage(struct regulator_dev *rdev,
 	if (*min_uV < rdev->constraints->min_uV)
 	if (*min_uV < rdev->constraints->min_uV)
 		*min_uV = rdev->constraints->min_uV;
 		*min_uV = rdev->constraints->min_uV;
 
 
-	if (*min_uV > *max_uV)
+	if (*min_uV > *max_uV) {
+		rdev_err(rdev, "unsupportable voltage range: %d-%duV\n",
+			 *min_uV, *max_uV);
 		return -EINVAL;
 		return -EINVAL;
+	}
 
 
 	return 0;
 	return 0;
 }
 }
@@ -197,8 +208,11 @@ static int regulator_check_current_limit(struct regulator_dev *rdev,
 	if (*min_uA < rdev->constraints->min_uA)
 	if (*min_uA < rdev->constraints->min_uA)
 		*min_uA = rdev->constraints->min_uA;
 		*min_uA = rdev->constraints->min_uA;
 
 
-	if (*min_uA > *max_uA)
+	if (*min_uA > *max_uA) {
+		rdev_err(rdev, "unsupportable current range: %d-%duA\n",
+			 *min_uA, *max_uA);
 		return -EINVAL;
 		return -EINVAL;
+	}
 
 
 	return 0;
 	return 0;
 }
 }
@@ -213,6 +227,7 @@ static int regulator_mode_constrain(struct regulator_dev *rdev, int *mode)
 	case REGULATOR_MODE_STANDBY:
 	case REGULATOR_MODE_STANDBY:
 		break;
 		break;
 	default:
 	default:
+		rdev_err(rdev, "invalid mode %x specified\n", *mode);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
@@ -779,7 +794,6 @@ static int machine_constraints_voltage(struct regulator_dev *rdev,
 		if (ret < 0) {
 		if (ret < 0) {
 			rdev_err(rdev, "failed to apply %duV constraint\n",
 			rdev_err(rdev, "failed to apply %duV constraint\n",
 				 rdev->constraints->min_uV);
 				 rdev->constraints->min_uV);
-			rdev->constraints = NULL;
 			return ret;
 			return ret;
 		}
 		}
 	}
 	}
@@ -882,7 +896,6 @@ static int set_machine_constraints(struct regulator_dev *rdev,
 		ret = suspend_prepare(rdev, rdev->constraints->initial_state);
 		ret = suspend_prepare(rdev, rdev->constraints->initial_state);
 		if (ret < 0) {
 		if (ret < 0) {
 			rdev_err(rdev, "failed to set suspend state\n");
 			rdev_err(rdev, "failed to set suspend state\n");
-			rdev->constraints = NULL;
 			goto out;
 			goto out;
 		}
 		}
 	}
 	}
@@ -909,13 +922,15 @@ static int set_machine_constraints(struct regulator_dev *rdev,
 		ret = ops->enable(rdev);
 		ret = ops->enable(rdev);
 		if (ret < 0) {
 		if (ret < 0) {
 			rdev_err(rdev, "failed to enable\n");
 			rdev_err(rdev, "failed to enable\n");
-			rdev->constraints = NULL;
 			goto out;
 			goto out;
 		}
 		}
 	}
 	}
 
 
 	print_constraints(rdev);
 	print_constraints(rdev);
+	return 0;
 out:
 out:
+	kfree(rdev->constraints);
+	rdev->constraints = NULL;
 	return ret;
 	return ret;
 }
 }
 
 
@@ -929,21 +944,20 @@ out:
  * core if it's child is enabled.
  * core if it's child is enabled.
  */
  */
 static int set_supply(struct regulator_dev *rdev,
 static int set_supply(struct regulator_dev *rdev,
-	struct regulator_dev *supply_rdev)
+		      struct regulator_dev *supply_rdev)
 {
 {
 	int err;
 	int err;
 
 
-	err = sysfs_create_link(&rdev->dev.kobj, &supply_rdev->dev.kobj,
-				"supply");
-	if (err) {
-		rdev_err(rdev, "could not add device link %s err %d\n",
-			 supply_rdev->dev.kobj.name, err);
-		       goto out;
+	rdev_info(rdev, "supplied by %s\n", rdev_get_name(supply_rdev));
+
+	rdev->supply = create_regulator(supply_rdev, &rdev->dev, "SUPPLY");
+	if (IS_ERR(rdev->supply)) {
+		err = PTR_ERR(rdev->supply);
+		rdev->supply = NULL;
+		return err;
 	}
 	}
-	rdev->supply = supply_rdev;
-	list_add(&rdev->slist, &supply_rdev->supply_list);
-out:
-	return err;
+
+	return 0;
 }
 }
 
 
 /**
 /**
@@ -1032,7 +1046,7 @@ static void unset_regulator_supplies(struct regulator_dev *rdev)
 	}
 	}
 }
 }
 
 
-#define REG_STR_SIZE	32
+#define REG_STR_SIZE	64
 
 
 static struct regulator *create_regulator(struct regulator_dev *rdev,
 static struct regulator *create_regulator(struct regulator_dev *rdev,
 					  struct device *dev,
 					  struct device *dev,
@@ -1052,8 +1066,9 @@ static struct regulator *create_regulator(struct regulator_dev *rdev,
 
 
 	if (dev) {
 	if (dev) {
 		/* create a 'requested_microamps_name' sysfs entry */
 		/* create a 'requested_microamps_name' sysfs entry */
-		size = scnprintf(buf, REG_STR_SIZE, "microamps_requested_%s",
-			supply_name);
+		size = scnprintf(buf, REG_STR_SIZE,
+				 "microamps_requested_%s-%s",
+				 dev_name(dev), supply_name);
 		if (size >= REG_STR_SIZE)
 		if (size >= REG_STR_SIZE)
 			goto overflow_err;
 			goto overflow_err;
 
 
@@ -1088,7 +1103,28 @@ static struct regulator *create_regulator(struct regulator_dev *rdev,
 				  dev->kobj.name, err);
 				  dev->kobj.name, err);
 			goto link_name_err;
 			goto link_name_err;
 		}
 		}
+	} else {
+		regulator->supply_name = kstrdup(supply_name, GFP_KERNEL);
+		if (regulator->supply_name == NULL)
+			goto attr_err;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	regulator->debugfs = debugfs_create_dir(regulator->supply_name,
+						rdev->debugfs);
+	if (IS_ERR_OR_NULL(regulator->debugfs)) {
+		rdev_warn(rdev, "Failed to create debugfs directory\n");
+		regulator->debugfs = NULL;
+	} else {
+		debugfs_create_u32("uA_load", 0444, regulator->debugfs,
+				   &regulator->uA_load);
+		debugfs_create_u32("min_uV", 0444, regulator->debugfs,
+				   &regulator->min_uV);
+		debugfs_create_u32("max_uV", 0444, regulator->debugfs,
+				   &regulator->max_uV);
 	}
 	}
+#endif
+
 	mutex_unlock(&rdev->mutex);
 	mutex_unlock(&rdev->mutex);
 	return regulator;
 	return regulator;
 link_name_err:
 link_name_err:
@@ -1267,13 +1303,17 @@ void regulator_put(struct regulator *regulator)
 	mutex_lock(&regulator_list_mutex);
 	mutex_lock(&regulator_list_mutex);
 	rdev = regulator->rdev;
 	rdev = regulator->rdev;
 
 
+#ifdef CONFIG_DEBUG_FS
+	debugfs_remove_recursive(regulator->debugfs);
+#endif
+
 	/* remove any sysfs entries */
 	/* remove any sysfs entries */
 	if (regulator->dev) {
 	if (regulator->dev) {
 		sysfs_remove_link(&rdev->dev.kobj, regulator->supply_name);
 		sysfs_remove_link(&rdev->dev.kobj, regulator->supply_name);
-		kfree(regulator->supply_name);
 		device_remove_file(regulator->dev, &regulator->dev_attr);
 		device_remove_file(regulator->dev, &regulator->dev_attr);
 		kfree(regulator->dev_attr.attr.name);
 		kfree(regulator->dev_attr.attr.name);
 	}
 	}
+	kfree(regulator->supply_name);
 	list_del(&regulator->list);
 	list_del(&regulator->list);
 	kfree(regulator);
 	kfree(regulator);
 
 
@@ -1301,19 +1341,6 @@ static int _regulator_enable(struct regulator_dev *rdev)
 {
 {
 	int ret, delay;
 	int ret, delay;
 
 
-	if (rdev->use_count == 0) {
-		/* do we need to enable the supply regulator first */
-		if (rdev->supply) {
-			mutex_lock(&rdev->supply->mutex);
-			ret = _regulator_enable(rdev->supply);
-			mutex_unlock(&rdev->supply->mutex);
-			if (ret < 0) {
-				rdev_err(rdev, "failed to enable: %d\n", ret);
-				return ret;
-			}
-		}
-	}
-
 	/* check voltage and requested load before enabling */
 	/* check voltage and requested load before enabling */
 	if (rdev->constraints &&
 	if (rdev->constraints &&
 	    (rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_DRMS))
 	    (rdev->constraints->valid_ops_mask & REGULATOR_CHANGE_DRMS))
@@ -1388,19 +1415,27 @@ int regulator_enable(struct regulator *regulator)
 	struct regulator_dev *rdev = regulator->rdev;
 	struct regulator_dev *rdev = regulator->rdev;
 	int ret = 0;
 	int ret = 0;
 
 
+	if (rdev->supply) {
+		ret = regulator_enable(rdev->supply);
+		if (ret != 0)
+			return ret;
+	}
+
 	mutex_lock(&rdev->mutex);
 	mutex_lock(&rdev->mutex);
 	ret = _regulator_enable(rdev);
 	ret = _regulator_enable(rdev);
 	mutex_unlock(&rdev->mutex);
 	mutex_unlock(&rdev->mutex);
+
+	if (ret != 0)
+		regulator_disable(rdev->supply);
+
 	return ret;
 	return ret;
 }
 }
 EXPORT_SYMBOL_GPL(regulator_enable);
 EXPORT_SYMBOL_GPL(regulator_enable);
 
 
 /* locks held by regulator_disable() */
 /* locks held by regulator_disable() */
-static int _regulator_disable(struct regulator_dev *rdev,
-		struct regulator_dev **supply_rdev_ptr)
+static int _regulator_disable(struct regulator_dev *rdev)
 {
 {
 	int ret = 0;
 	int ret = 0;
-	*supply_rdev_ptr = NULL;
 
 
 	if (WARN(rdev->use_count <= 0,
 	if (WARN(rdev->use_count <= 0,
 		 "unbalanced disables for %s\n", rdev_get_name(rdev)))
 		 "unbalanced disables for %s\n", rdev_get_name(rdev)))
@@ -1427,9 +1462,6 @@ static int _regulator_disable(struct regulator_dev *rdev,
 					     NULL);
 					     NULL);
 		}
 		}
 
 
-		/* decrease our supplies ref count and disable if required */
-		*supply_rdev_ptr = rdev->supply;
-
 		rdev->use_count = 0;
 		rdev->use_count = 0;
 	} else if (rdev->use_count > 1) {
 	} else if (rdev->use_count > 1) {
 
 
@@ -1440,6 +1472,7 @@ static int _regulator_disable(struct regulator_dev *rdev,
 
 
 		rdev->use_count--;
 		rdev->use_count--;
 	}
 	}
+
 	return ret;
 	return ret;
 }
 }
 
 
@@ -1458,29 +1491,21 @@ static int _regulator_disable(struct regulator_dev *rdev,
 int regulator_disable(struct regulator *regulator)
 int regulator_disable(struct regulator *regulator)
 {
 {
 	struct regulator_dev *rdev = regulator->rdev;
 	struct regulator_dev *rdev = regulator->rdev;
-	struct regulator_dev *supply_rdev = NULL;
 	int ret = 0;
 	int ret = 0;
 
 
 	mutex_lock(&rdev->mutex);
 	mutex_lock(&rdev->mutex);
-	ret = _regulator_disable(rdev, &supply_rdev);
+	ret = _regulator_disable(rdev);
 	mutex_unlock(&rdev->mutex);
 	mutex_unlock(&rdev->mutex);
 
 
-	/* decrease our supplies ref count and disable if required */
-	while (supply_rdev != NULL) {
-		rdev = supply_rdev;
-
-		mutex_lock(&rdev->mutex);
-		_regulator_disable(rdev, &supply_rdev);
-		mutex_unlock(&rdev->mutex);
-	}
+	if (ret == 0 && rdev->supply)
+		regulator_disable(rdev->supply);
 
 
 	return ret;
 	return ret;
 }
 }
 EXPORT_SYMBOL_GPL(regulator_disable);
 EXPORT_SYMBOL_GPL(regulator_disable);
 
 
 /* locks held by regulator_force_disable() */
 /* locks held by regulator_force_disable() */
-static int _regulator_force_disable(struct regulator_dev *rdev,
-		struct regulator_dev **supply_rdev_ptr)
+static int _regulator_force_disable(struct regulator_dev *rdev)
 {
 {
 	int ret = 0;
 	int ret = 0;
 
 
@@ -1497,10 +1522,6 @@ static int _regulator_force_disable(struct regulator_dev *rdev,
 			REGULATOR_EVENT_DISABLE, NULL);
 			REGULATOR_EVENT_DISABLE, NULL);
 	}
 	}
 
 
-	/* decrease our supplies ref count and disable if required */
-	*supply_rdev_ptr = rdev->supply;
-
-	rdev->use_count = 0;
 	return ret;
 	return ret;
 }
 }
 
 
@@ -1516,16 +1537,16 @@ static int _regulator_force_disable(struct regulator_dev *rdev,
 int regulator_force_disable(struct regulator *regulator)
 int regulator_force_disable(struct regulator *regulator)
 {
 {
 	struct regulator_dev *rdev = regulator->rdev;
 	struct regulator_dev *rdev = regulator->rdev;
-	struct regulator_dev *supply_rdev = NULL;
 	int ret;
 	int ret;
 
 
 	mutex_lock(&rdev->mutex);
 	mutex_lock(&rdev->mutex);
 	regulator->uA_load = 0;
 	regulator->uA_load = 0;
-	ret = _regulator_force_disable(rdev, &supply_rdev);
+	ret = _regulator_force_disable(regulator->rdev);
 	mutex_unlock(&rdev->mutex);
 	mutex_unlock(&rdev->mutex);
 
 
-	if (supply_rdev)
-		regulator_disable(get_device_regulator(rdev_get_dev(supply_rdev)));
+	if (rdev->supply)
+		while (rdev->open_count--)
+			regulator_disable(rdev->supply);
 
 
 	return ret;
 	return ret;
 }
 }
@@ -2136,7 +2157,7 @@ int regulator_set_optimum_mode(struct regulator *regulator, int uA_load)
 	/* get input voltage */
 	/* get input voltage */
 	input_uV = 0;
 	input_uV = 0;
 	if (rdev->supply)
 	if (rdev->supply)
-		input_uV = _regulator_get_voltage(rdev->supply);
+		input_uV = regulator_get_voltage(rdev->supply);
 	if (input_uV <= 0)
 	if (input_uV <= 0)
 		input_uV = rdev->constraints->input_uV;
 		input_uV = rdev->constraints->input_uV;
 	if (input_uV <= 0) {
 	if (input_uV <= 0) {
@@ -2206,17 +2227,8 @@ EXPORT_SYMBOL_GPL(regulator_unregister_notifier);
 static void _notifier_call_chain(struct regulator_dev *rdev,
 static void _notifier_call_chain(struct regulator_dev *rdev,
 				  unsigned long event, void *data)
 				  unsigned long event, void *data)
 {
 {
-	struct regulator_dev *_rdev;
-
 	/* call rdev chain first */
 	/* call rdev chain first */
 	blocking_notifier_call_chain(&rdev->notifier, event, NULL);
 	blocking_notifier_call_chain(&rdev->notifier, event, NULL);
-
-	/* now notify regulator we supply */
-	list_for_each_entry(_rdev, &rdev->supply_list, slist) {
-		mutex_lock(&_rdev->mutex);
-		_notifier_call_chain(_rdev, event, data);
-		mutex_unlock(&_rdev->mutex);
-	}
 }
 }
 
 
 /**
 /**
@@ -2264,6 +2276,13 @@ err:
 }
 }
 EXPORT_SYMBOL_GPL(regulator_bulk_get);
 EXPORT_SYMBOL_GPL(regulator_bulk_get);
 
 
+static void regulator_bulk_enable_async(void *data, async_cookie_t cookie)
+{
+	struct regulator_bulk_data *bulk = data;
+
+	bulk->ret = regulator_enable(bulk->consumer);
+}
+
 /**
 /**
  * regulator_bulk_enable - enable multiple regulator consumers
  * regulator_bulk_enable - enable multiple regulator consumers
  *
  *
@@ -2279,21 +2298,33 @@ EXPORT_SYMBOL_GPL(regulator_bulk_get);
 int regulator_bulk_enable(int num_consumers,
 int regulator_bulk_enable(int num_consumers,
 			  struct regulator_bulk_data *consumers)
 			  struct regulator_bulk_data *consumers)
 {
 {
+	LIST_HEAD(async_domain);
 	int i;
 	int i;
-	int ret;
+	int ret = 0;
+
+	for (i = 0; i < num_consumers; i++)
+		async_schedule_domain(regulator_bulk_enable_async,
+				      &consumers[i], &async_domain);
+
+	async_synchronize_full_domain(&async_domain);
 
 
+	/* If any consumer failed we need to unwind any that succeeded */
 	for (i = 0; i < num_consumers; i++) {
 	for (i = 0; i < num_consumers; i++) {
-		ret = regulator_enable(consumers[i].consumer);
-		if (ret != 0)
+		if (consumers[i].ret != 0) {
+			ret = consumers[i].ret;
 			goto err;
 			goto err;
+		}
 	}
 	}
 
 
 	return 0;
 	return 0;
 
 
 err:
 err:
-	pr_err("Failed to enable %s: %d\n", consumers[i].supply, ret);
-	for (--i; i >= 0; --i)
-		regulator_disable(consumers[i].consumer);
+	for (i = 0; i < num_consumers; i++)
+		if (consumers[i].ret == 0)
+			regulator_disable(consumers[i].consumer);
+		else
+			pr_err("Failed to enable %s: %d\n",
+			       consumers[i].supply, consumers[i].ret);
 
 
 	return ret;
 	return ret;
 }
 }
@@ -2589,9 +2620,7 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
 	rdev->owner = regulator_desc->owner;
 	rdev->owner = regulator_desc->owner;
 	rdev->desc = regulator_desc;
 	rdev->desc = regulator_desc;
 	INIT_LIST_HEAD(&rdev->consumer_list);
 	INIT_LIST_HEAD(&rdev->consumer_list);
-	INIT_LIST_HEAD(&rdev->supply_list);
 	INIT_LIST_HEAD(&rdev->list);
 	INIT_LIST_HEAD(&rdev->list);
-	INIT_LIST_HEAD(&rdev->slist);
 	BLOCKING_INIT_NOTIFIER_HEAD(&rdev->notifier);
 	BLOCKING_INIT_NOTIFIER_HEAD(&rdev->notifier);
 
 
 	/* preform any regulator specific init */
 	/* preform any regulator specific init */
@@ -2672,6 +2701,7 @@ unset_supplies:
 	unset_regulator_supplies(rdev);
 	unset_regulator_supplies(rdev);
 
 
 scrub:
 scrub:
+	kfree(rdev->constraints);
 	device_unregister(&rdev->dev);
 	device_unregister(&rdev->dev);
 	/* device core frees rdev */
 	/* device core frees rdev */
 	rdev = ERR_PTR(ret);
 	rdev = ERR_PTR(ret);
@@ -2703,7 +2733,7 @@ void regulator_unregister(struct regulator_dev *rdev)
 	unset_regulator_supplies(rdev);
 	unset_regulator_supplies(rdev);
 	list_del(&rdev->list);
 	list_del(&rdev->list);
 	if (rdev->supply)
 	if (rdev->supply)
-		sysfs_remove_link(&rdev->dev.kobj, "supply");
+		regulator_put(rdev->supply);
 	device_unregister(&rdev->dev);
 	device_unregister(&rdev->dev);
 	kfree(rdev->constraints);
 	kfree(rdev->constraints);
 	mutex_unlock(&regulator_list_mutex);
 	mutex_unlock(&regulator_list_mutex);

+ 26 - 6
drivers/regulator/dummy.c

@@ -36,6 +36,29 @@ static struct regulator_desc dummy_desc = {
 	.ops = &dummy_ops,
 	.ops = &dummy_ops,
 };
 };
 
 
+static int __devinit dummy_regulator_probe(struct platform_device *pdev)
+{
+	int ret;
+
+	dummy_regulator_rdev = regulator_register(&dummy_desc, NULL,
+						  &dummy_initdata, NULL);
+	if (IS_ERR(dummy_regulator_rdev)) {
+		ret = PTR_ERR(dummy_regulator_rdev);
+		pr_err("Failed to register regulator: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static struct platform_driver dummy_regulator_driver = {
+	.probe		= dummy_regulator_probe,
+	.driver		= {
+		.name		= "reg-dummy",
+		.owner		= THIS_MODULE,
+	},
+};
+
 static struct platform_device *dummy_pdev;
 static struct platform_device *dummy_pdev;
 
 
 void __init regulator_dummy_init(void)
 void __init regulator_dummy_init(void)
@@ -55,12 +78,9 @@ void __init regulator_dummy_init(void)
 		return;
 		return;
 	}
 	}
 
 
-	dummy_regulator_rdev = regulator_register(&dummy_desc, NULL,
-						  &dummy_initdata, NULL);
-	if (IS_ERR(dummy_regulator_rdev)) {
-		ret = PTR_ERR(dummy_regulator_rdev);
-		pr_err("Failed to register regulator: %d\n", ret);
+	ret = platform_driver_register(&dummy_regulator_driver);
+	if (ret != 0) {
+		pr_err("Failed to register dummy regulator driver: %d\n", ret);
 		platform_device_unregister(dummy_pdev);
 		platform_device_unregister(dummy_pdev);
-		return;
 	}
 	}
 }
 }

+ 51 - 12
drivers/regulator/tps65910-regulator.c

@@ -49,7 +49,6 @@
 #define TPS65911_REG_LDO7		11
 #define TPS65911_REG_LDO7		11
 #define TPS65911_REG_LDO8		12
 #define TPS65911_REG_LDO8		12
 
 
-#define TPS65910_NUM_REGULATOR		13
 #define TPS65910_SUPPLY_STATE_ENABLED	0x1
 #define TPS65910_SUPPLY_STATE_ENABLED	0x1
 
 
 /* supported VIO voltages in milivolts */
 /* supported VIO voltages in milivolts */
@@ -264,11 +263,12 @@ static struct tps_info tps65911_regs[] = {
 };
 };
 
 
 struct tps65910_reg {
 struct tps65910_reg {
-	struct regulator_desc desc[TPS65910_NUM_REGULATOR];
+	struct regulator_desc *desc;
 	struct tps65910 *mfd;
 	struct tps65910 *mfd;
-	struct regulator_dev *rdev[TPS65910_NUM_REGULATOR];
-	struct tps_info *info[TPS65910_NUM_REGULATOR];
+	struct regulator_dev **rdev;
+	struct tps_info **info;
 	struct mutex mutex;
 	struct mutex mutex;
+	int num_regulators;
 	int mode;
 	int mode;
 	int  (*get_ctrl_reg)(int);
 	int  (*get_ctrl_reg)(int);
 };
 };
@@ -759,8 +759,13 @@ static int tps65910_list_voltage_dcdc(struct regulator_dev *dev,
 		mult = (selector / VDD1_2_NUM_VOLTS) + 1;
 		mult = (selector / VDD1_2_NUM_VOLTS) + 1;
 		volt = VDD1_2_MIN_VOLT +
 		volt = VDD1_2_MIN_VOLT +
 				(selector % VDD1_2_NUM_VOLTS) * VDD1_2_OFFSET;
 				(selector % VDD1_2_NUM_VOLTS) * VDD1_2_OFFSET;
+		break;
 	case TPS65911_REG_VDDCTRL:
 	case TPS65911_REG_VDDCTRL:
 		volt = VDDCTRL_MIN_VOLT + (selector * VDDCTRL_OFFSET);
 		volt = VDDCTRL_MIN_VOLT + (selector * VDDCTRL_OFFSET);
+		break;
+	default:
+		BUG();
+		return -EINVAL;
 	}
 	}
 
 
 	return  volt * 100 * mult;
 	return  volt * 100 * mult;
@@ -897,16 +902,42 @@ static __devinit int tps65910_probe(struct platform_device *pdev)
 	switch(tps65910_chip_id(tps65910)) {
 	switch(tps65910_chip_id(tps65910)) {
 	case TPS65910:
 	case TPS65910:
 		pmic->get_ctrl_reg = &tps65910_get_ctrl_register;
 		pmic->get_ctrl_reg = &tps65910_get_ctrl_register;
+		pmic->num_regulators = ARRAY_SIZE(tps65910_regs);
 		info = tps65910_regs;
 		info = tps65910_regs;
+		break;
 	case TPS65911:
 	case TPS65911:
 		pmic->get_ctrl_reg = &tps65911_get_ctrl_register;
 		pmic->get_ctrl_reg = &tps65911_get_ctrl_register;
+		pmic->num_regulators = ARRAY_SIZE(tps65911_regs);
 		info = tps65911_regs;
 		info = tps65911_regs;
+		break;
 	default:
 	default:
 		pr_err("Invalid tps chip version\n");
 		pr_err("Invalid tps chip version\n");
+		kfree(pmic);
 		return -ENODEV;
 		return -ENODEV;
 	}
 	}
 
 
-	for (i = 0; i < TPS65910_NUM_REGULATOR; i++, info++, reg_data++) {
+	pmic->desc = kcalloc(pmic->num_regulators,
+			sizeof(struct regulator_desc), GFP_KERNEL);
+	if (!pmic->desc) {
+		err = -ENOMEM;
+		goto err_free_pmic;
+	}
+
+	pmic->info = kcalloc(pmic->num_regulators,
+			sizeof(struct tps_info *), GFP_KERNEL);
+	if (!pmic->info) {
+		err = -ENOMEM;
+		goto err_free_desc;
+	}
+
+	pmic->rdev = kcalloc(pmic->num_regulators,
+			sizeof(struct regulator_dev *), GFP_KERNEL);
+	if (!pmic->rdev) {
+		err = -ENOMEM;
+		goto err_free_info;
+	}
+
+	for (i = 0; i < pmic->num_regulators; i++, info++, reg_data++) {
 		/* Register the regulators */
 		/* Register the regulators */
 		pmic->info[i] = info;
 		pmic->info[i] = info;
 
 
@@ -938,7 +969,7 @@ static __devinit int tps65910_probe(struct platform_device *pdev)
 				"failed to register %s regulator\n",
 				"failed to register %s regulator\n",
 				pdev->name);
 				pdev->name);
 			err = PTR_ERR(rdev);
 			err = PTR_ERR(rdev);
-			goto err;
+			goto err_unregister_regulator;
 		}
 		}
 
 
 		/* Save regulator for cleanup */
 		/* Save regulator for cleanup */
@@ -946,23 +977,31 @@ static __devinit int tps65910_probe(struct platform_device *pdev)
 	}
 	}
 	return 0;
 	return 0;
 
 
-err:
+err_unregister_regulator:
 	while (--i >= 0)
 	while (--i >= 0)
 		regulator_unregister(pmic->rdev[i]);
 		regulator_unregister(pmic->rdev[i]);
-
+	kfree(pmic->rdev);
+err_free_info:
+	kfree(pmic->info);
+err_free_desc:
+	kfree(pmic->desc);
+err_free_pmic:
 	kfree(pmic);
 	kfree(pmic);
 	return err;
 	return err;
 }
 }
 
 
 static int __devexit tps65910_remove(struct platform_device *pdev)
 static int __devexit tps65910_remove(struct platform_device *pdev)
 {
 {
-	struct tps65910_reg *tps65910_reg = platform_get_drvdata(pdev);
+	struct tps65910_reg *pmic = platform_get_drvdata(pdev);
 	int i;
 	int i;
 
 
-	for (i = 0; i < TPS65910_NUM_REGULATOR; i++)
-		regulator_unregister(tps65910_reg->rdev[i]);
+	for (i = 0; i < pmic->num_regulators; i++)
+		regulator_unregister(pmic->rdev[i]);
 
 
-	kfree(tps65910_reg);
+	kfree(pmic->rdev);
+	kfree(pmic->info);
+	kfree(pmic->desc);
+	kfree(pmic);
 	return 0;
 	return 0;
 }
 }
 
 

+ 31 - 35
drivers/regulator/twl-regulator.c

@@ -835,8 +835,8 @@ static struct regulator_ops twlsmps_ops = {
 			remap_conf) \
 			remap_conf) \
 		TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \
 		TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \
 			remap_conf, TWL4030, twl4030fixed_ops)
 			remap_conf, TWL4030, twl4030fixed_ops)
-#define TWL6030_FIXED_LDO(label, offset, mVolts, num, turnon_delay) \
-		TWL_FIXED_LDO(label, offset, mVolts, num, turnon_delay, \
+#define TWL6030_FIXED_LDO(label, offset, mVolts, turnon_delay) \
+		TWL_FIXED_LDO(label, offset, mVolts, 0x0, turnon_delay, \
 			0x0, TWL6030, twl6030fixed_ops)
 			0x0, TWL6030, twl6030fixed_ops)
 
 
 #define TWL4030_ADJUSTABLE_LDO(label, offset, num, turnon_delay, remap_conf) { \
 #define TWL4030_ADJUSTABLE_LDO(label, offset, num, turnon_delay, remap_conf) { \
@@ -856,24 +856,22 @@ static struct regulator_ops twlsmps_ops = {
 		}, \
 		}, \
 	}
 	}
 
 
-#define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts, num) { \
+#define TWL6030_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) { \
 	.base = offset, \
 	.base = offset, \
-	.id = num, \
 	.min_mV = min_mVolts, \
 	.min_mV = min_mVolts, \
 	.max_mV = max_mVolts, \
 	.max_mV = max_mVolts, \
 	.desc = { \
 	.desc = { \
 		.name = #label, \
 		.name = #label, \
 		.id = TWL6030_REG_##label, \
 		.id = TWL6030_REG_##label, \
-		.n_voltages = (max_mVolts - min_mVolts)/100, \
+		.n_voltages = (max_mVolts - min_mVolts)/100 + 1, \
 		.ops = &twl6030ldo_ops, \
 		.ops = &twl6030ldo_ops, \
 		.type = REGULATOR_VOLTAGE, \
 		.type = REGULATOR_VOLTAGE, \
 		.owner = THIS_MODULE, \
 		.owner = THIS_MODULE, \
 		}, \
 		}, \
 	}
 	}
 
 
-#define TWL6025_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts, num) { \
+#define TWL6025_ADJUSTABLE_LDO(label, offset, min_mVolts, max_mVolts) { \
 	.base = offset, \
 	.base = offset, \
-	.id = num, \
 	.min_mV = min_mVolts, \
 	.min_mV = min_mVolts, \
 	.max_mV = max_mVolts, \
 	.max_mV = max_mVolts, \
 	.desc = { \
 	.desc = { \
@@ -903,9 +901,8 @@ static struct regulator_ops twlsmps_ops = {
 		}, \
 		}, \
 	}
 	}
 
 
-#define TWL6030_FIXED_RESOURCE(label, offset, num, turnon_delay) { \
+#define TWL6030_FIXED_RESOURCE(label, offset, turnon_delay) { \
 	.base = offset, \
 	.base = offset, \
-	.id = num, \
 	.delay = turnon_delay, \
 	.delay = turnon_delay, \
 	.desc = { \
 	.desc = { \
 		.name = #label, \
 		.name = #label, \
@@ -916,9 +913,8 @@ static struct regulator_ops twlsmps_ops = {
 		}, \
 		}, \
 	}
 	}
 
 
-#define TWL6025_ADJUSTABLE_SMPS(label, offset, num) { \
+#define TWL6025_ADJUSTABLE_SMPS(label, offset) { \
 	.base = offset, \
 	.base = offset, \
-	.id = num, \
 	.min_mV = 600, \
 	.min_mV = 600, \
 	.max_mV = 2100, \
 	.max_mV = 2100, \
 	.desc = { \
 	.desc = { \
@@ -961,32 +957,32 @@ static struct twlreg_info twl_regs[] = {
 	/* 6030 REG with base as PMC Slave Misc : 0x0030 */
 	/* 6030 REG with base as PMC Slave Misc : 0x0030 */
 	/* Turnon-delay and remap configuration values for 6030 are not
 	/* Turnon-delay and remap configuration values for 6030 are not
 	   verified since the specification is not public */
 	   verified since the specification is not public */
-	TWL6030_ADJUSTABLE_LDO(VAUX1_6030, 0x54, 1000, 3300, 1),
-	TWL6030_ADJUSTABLE_LDO(VAUX2_6030, 0x58, 1000, 3300, 2),
-	TWL6030_ADJUSTABLE_LDO(VAUX3_6030, 0x5c, 1000, 3300, 3),
-	TWL6030_ADJUSTABLE_LDO(VMMC, 0x68, 1000, 3300, 4),
-	TWL6030_ADJUSTABLE_LDO(VPP, 0x6c, 1000, 3300, 5),
-	TWL6030_ADJUSTABLE_LDO(VUSIM, 0x74, 1000, 3300, 7),
-	TWL6030_FIXED_LDO(VANA, 0x50, 2100, 15, 0),
-	TWL6030_FIXED_LDO(VCXIO, 0x60, 1800, 16, 0),
-	TWL6030_FIXED_LDO(VDAC, 0x64, 1800, 17, 0),
-	TWL6030_FIXED_LDO(VUSB, 0x70, 3300, 18, 0),
-	TWL6030_FIXED_RESOURCE(CLK32KG, 0x8C, 48, 0),
+	TWL6030_ADJUSTABLE_LDO(VAUX1_6030, 0x54, 1000, 3300),
+	TWL6030_ADJUSTABLE_LDO(VAUX2_6030, 0x58, 1000, 3300),
+	TWL6030_ADJUSTABLE_LDO(VAUX3_6030, 0x5c, 1000, 3300),
+	TWL6030_ADJUSTABLE_LDO(VMMC, 0x68, 1000, 3300),
+	TWL6030_ADJUSTABLE_LDO(VPP, 0x6c, 1000, 3300),
+	TWL6030_ADJUSTABLE_LDO(VUSIM, 0x74, 1000, 3300),
+	TWL6030_FIXED_LDO(VANA, 0x50, 2100, 0),
+	TWL6030_FIXED_LDO(VCXIO, 0x60, 1800, 0),
+	TWL6030_FIXED_LDO(VDAC, 0x64, 1800, 0),
+	TWL6030_FIXED_LDO(VUSB, 0x70, 3300, 0),
+	TWL6030_FIXED_RESOURCE(CLK32KG, 0x8C, 0),
 
 
 	/* 6025 are renamed compared to 6030 versions */
 	/* 6025 are renamed compared to 6030 versions */
-	TWL6025_ADJUSTABLE_LDO(LDO2, 0x54, 1000, 3300, 1),
-	TWL6025_ADJUSTABLE_LDO(LDO4, 0x58, 1000, 3300, 2),
-	TWL6025_ADJUSTABLE_LDO(LDO3, 0x5c, 1000, 3300, 3),
-	TWL6025_ADJUSTABLE_LDO(LDO5, 0x68, 1000, 3300, 4),
-	TWL6025_ADJUSTABLE_LDO(LDO1, 0x6c, 1000, 3300, 5),
-	TWL6025_ADJUSTABLE_LDO(LDO7, 0x74, 1000, 3300, 7),
-	TWL6025_ADJUSTABLE_LDO(LDO6, 0x60, 1000, 3300, 16),
-	TWL6025_ADJUSTABLE_LDO(LDOLN, 0x64, 1000, 3300, 17),
-	TWL6025_ADJUSTABLE_LDO(LDOUSB, 0x70, 1000, 3300, 18),
-
-	TWL6025_ADJUSTABLE_SMPS(SMPS3, 0x34, 1),
-	TWL6025_ADJUSTABLE_SMPS(SMPS4, 0x10, 2),
-	TWL6025_ADJUSTABLE_SMPS(VIO, 0x16, 3),
+	TWL6025_ADJUSTABLE_LDO(LDO2, 0x54, 1000, 3300),
+	TWL6025_ADJUSTABLE_LDO(LDO4, 0x58, 1000, 3300),
+	TWL6025_ADJUSTABLE_LDO(LDO3, 0x5c, 1000, 3300),
+	TWL6025_ADJUSTABLE_LDO(LDO5, 0x68, 1000, 3300),
+	TWL6025_ADJUSTABLE_LDO(LDO1, 0x6c, 1000, 3300),
+	TWL6025_ADJUSTABLE_LDO(LDO7, 0x74, 1000, 3300),
+	TWL6025_ADJUSTABLE_LDO(LDO6, 0x60, 1000, 3300),
+	TWL6025_ADJUSTABLE_LDO(LDOLN, 0x64, 1000, 3300),
+	TWL6025_ADJUSTABLE_LDO(LDOUSB, 0x70, 1000, 3300),
+
+	TWL6025_ADJUSTABLE_SMPS(SMPS3, 0x34),
+	TWL6025_ADJUSTABLE_SMPS(SMPS4, 0x10),
+	TWL6025_ADJUSTABLE_SMPS(VIO, 0x16),
 };
 };
 
 
 static u8 twl_get_smps_offset(void)
 static u8 twl_get_smps_offset(void)

+ 65 - 61
drivers/regulator/wm831x-dcdc.c

@@ -267,23 +267,6 @@ static int wm831x_buckv_select_min_voltage(struct regulator_dev *rdev,
 	return vsel;
 	return vsel;
 }
 }
 
 
-static int wm831x_buckv_select_max_voltage(struct regulator_dev *rdev,
-					   int min_uV, int max_uV)
-{
-	u16 vsel;
-
-	if (max_uV < 600000 || max_uV > 1800000)
-		return -EINVAL;
-
-	vsel = ((max_uV - 600000) / 12500) + 8;
-
-	if (wm831x_buckv_list_voltage(rdev, vsel) < min_uV ||
-	    wm831x_buckv_list_voltage(rdev, vsel) < max_uV)
-		return -EINVAL;
-
-	return vsel;
-}
-
 static int wm831x_buckv_set_dvs(struct regulator_dev *rdev, int state)
 static int wm831x_buckv_set_dvs(struct regulator_dev *rdev, int state)
 {
 {
 	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
 	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
@@ -338,28 +321,23 @@ static int wm831x_buckv_set_voltage(struct regulator_dev *rdev,
 	if (ret < 0)
 	if (ret < 0)
 		return ret;
 		return ret;
 
 
-	/* Set the high voltage as the DVS voltage.  This is optimised
-	 * for CPUfreq usage, most processors will keep the maximum
-	 * voltage constant and lower the minimum with the frequency. */
-	vsel = wm831x_buckv_select_max_voltage(rdev, min_uV, max_uV);
-	if (vsel < 0) {
-		/* This should never happen - at worst the same vsel
-		 * should be chosen */
-		WARN_ON(vsel < 0);
-		return 0;
+	/*
+	 * If this VSEL is higher than the last one we've seen then
+	 * remember it as the DVS VSEL.  This is optimised for CPUfreq
+	 * usage where we want to get to the highest voltage very
+	 * quickly.
+	 */
+	if (vsel > dcdc->dvs_vsel) {
+		ret = wm831x_set_bits(wm831x, dvs_reg,
+				      WM831X_DC1_DVS_VSEL_MASK,
+				      dcdc->dvs_vsel);
+		if (ret == 0)
+			dcdc->dvs_vsel = vsel;
+		else
+			dev_warn(wm831x->dev,
+				 "Failed to set DCDC DVS VSEL: %d\n", ret);
 	}
 	}
 
 
-	/* Don't bother if it's the same VSEL we're already using */
-	if (vsel == dcdc->on_vsel)
-		return 0;
-
-	ret = wm831x_set_bits(wm831x, dvs_reg, WM831X_DC1_DVS_VSEL_MASK, vsel);
-	if (ret == 0)
-		dcdc->dvs_vsel = vsel;
-	else
-		dev_warn(wm831x->dev, "Failed to set DCDC DVS VSEL: %d\n",
-			 ret);
-
 	return 0;
 	return 0;
 }
 }
 
 
@@ -456,27 +434,6 @@ static __devinit void wm831x_buckv_dvs_init(struct wm831x_dcdc *dcdc,
 	if (!pdata || !pdata->dvs_gpio)
 	if (!pdata || !pdata->dvs_gpio)
 		return;
 		return;
 
 
-	switch (pdata->dvs_control_src) {
-	case 1:
-		ctrl = 2 << WM831X_DC1_DVS_SRC_SHIFT;
-		break;
-	case 2:
-		ctrl = 3 << WM831X_DC1_DVS_SRC_SHIFT;
-		break;
-	default:
-		dev_err(wm831x->dev, "Invalid DVS control source %d for %s\n",
-			pdata->dvs_control_src, dcdc->name);
-		return;
-	}
-
-	ret = wm831x_set_bits(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL,
-			      WM831X_DC1_DVS_SRC_MASK, ctrl);
-	if (ret < 0) {
-		dev_err(wm831x->dev, "Failed to set %s DVS source: %d\n",
-			dcdc->name, ret);
-		return;
-	}
-
 	ret = gpio_request(pdata->dvs_gpio, "DCDC DVS");
 	ret = gpio_request(pdata->dvs_gpio, "DCDC DVS");
 	if (ret < 0) {
 	if (ret < 0) {
 		dev_err(wm831x->dev, "Failed to get %s DVS GPIO: %d\n",
 		dev_err(wm831x->dev, "Failed to get %s DVS GPIO: %d\n",
@@ -498,17 +455,57 @@ static __devinit void wm831x_buckv_dvs_init(struct wm831x_dcdc *dcdc,
 	}
 	}
 
 
 	dcdc->dvs_gpio = pdata->dvs_gpio;
 	dcdc->dvs_gpio = pdata->dvs_gpio;
+
+	switch (pdata->dvs_control_src) {
+	case 1:
+		ctrl = 2 << WM831X_DC1_DVS_SRC_SHIFT;
+		break;
+	case 2:
+		ctrl = 3 << WM831X_DC1_DVS_SRC_SHIFT;
+		break;
+	default:
+		dev_err(wm831x->dev, "Invalid DVS control source %d for %s\n",
+			pdata->dvs_control_src, dcdc->name);
+		return;
+	}
+
+	/* If DVS_VSEL is set to the minimum value then raise it to ON_VSEL
+	 * to make bootstrapping a bit smoother.
+	 */
+	if (!dcdc->dvs_vsel) {
+		ret = wm831x_set_bits(wm831x,
+				      dcdc->base + WM831X_DCDC_DVS_CONTROL,
+				      WM831X_DC1_DVS_VSEL_MASK, dcdc->on_vsel);
+		if (ret == 0)
+			dcdc->dvs_vsel = dcdc->on_vsel;
+		else
+			dev_warn(wm831x->dev, "Failed to set DVS_VSEL: %d\n",
+				 ret);
+	}
+
+	ret = wm831x_set_bits(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL,
+			      WM831X_DC1_DVS_SRC_MASK, ctrl);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to set %s DVS source: %d\n",
+			dcdc->name, ret);
+	}
 }
 }
 
 
 static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
 static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
 {
 {
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
 	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
 	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
-	int id = pdev->id % ARRAY_SIZE(pdata->dcdc);
+	int id;
 	struct wm831x_dcdc *dcdc;
 	struct wm831x_dcdc *dcdc;
 	struct resource *res;
 	struct resource *res;
 	int ret, irq;
 	int ret, irq;
 
 
+	if (pdata && pdata->wm831x_num)
+		id = (pdata->wm831x_num * 10) + 1;
+	else
+		id = 0;
+	id = pdev->id - id;
+
 	dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1);
 	dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1);
 
 
 	if (pdata == NULL || pdata->dcdc[id] == NULL)
 	if (pdata == NULL || pdata->dcdc[id] == NULL)
@@ -545,7 +542,7 @@ static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
 	}
 	}
 	dcdc->on_vsel = ret & WM831X_DC1_ON_VSEL_MASK;
 	dcdc->on_vsel = ret & WM831X_DC1_ON_VSEL_MASK;
 
 
-	ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_ON_CONFIG);
+	ret = wm831x_reg_read(wm831x, dcdc->base + WM831X_DCDC_DVS_CONTROL);
 	if (ret < 0) {
 	if (ret < 0) {
 		dev_err(wm831x->dev, "Failed to read DVS VSEL: %d\n", ret);
 		dev_err(wm831x->dev, "Failed to read DVS VSEL: %d\n", ret);
 		goto err;
 		goto err;
@@ -709,11 +706,17 @@ static __devinit int wm831x_buckp_probe(struct platform_device *pdev)
 {
 {
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
 	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
 	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
-	int id = pdev->id % ARRAY_SIZE(pdata->dcdc);
+	int id;
 	struct wm831x_dcdc *dcdc;
 	struct wm831x_dcdc *dcdc;
 	struct resource *res;
 	struct resource *res;
 	int ret, irq;
 	int ret, irq;
 
 
+	if (pdata && pdata->wm831x_num)
+		id = (pdata->wm831x_num * 10) + 1;
+	else
+		id = 0;
+	id = pdev->id - id;
+
 	dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1);
 	dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1);
 
 
 	if (pdata == NULL || pdata->dcdc[id] == NULL)
 	if (pdata == NULL || pdata->dcdc[id] == NULL)
@@ -1046,3 +1049,4 @@ MODULE_DESCRIPTION("WM831x DC-DC convertor driver");
 MODULE_LICENSE("GPL");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:wm831x-buckv");
 MODULE_ALIAS("platform:wm831x-buckv");
 MODULE_ALIAS("platform:wm831x-buckp");
 MODULE_ALIAS("platform:wm831x-buckp");
+MODULE_ALIAS("platform:wm831x-epe");

+ 22 - 3
drivers/regulator/wm831x-ldo.c

@@ -310,11 +310,17 @@ static __devinit int wm831x_gp_ldo_probe(struct platform_device *pdev)
 {
 {
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
 	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
 	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
-	int id = pdev->id % ARRAY_SIZE(pdata->ldo);
+	int id;
 	struct wm831x_ldo *ldo;
 	struct wm831x_ldo *ldo;
 	struct resource *res;
 	struct resource *res;
 	int ret, irq;
 	int ret, irq;
 
 
+	if (pdata && pdata->wm831x_num)
+		id = (pdata->wm831x_num * 10) + 1;
+	else
+		id = 0;
+	id = pdev->id - id;
+
 	dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
 	dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
 
 
 	if (pdata == NULL || pdata->ldo[id] == NULL)
 	if (pdata == NULL || pdata->ldo[id] == NULL)
@@ -574,11 +580,17 @@ static __devinit int wm831x_aldo_probe(struct platform_device *pdev)
 {
 {
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
 	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
 	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
-	int id = pdev->id % ARRAY_SIZE(pdata->ldo);
+	int id;
 	struct wm831x_ldo *ldo;
 	struct wm831x_ldo *ldo;
 	struct resource *res;
 	struct resource *res;
 	int ret, irq;
 	int ret, irq;
 
 
+	if (pdata && pdata->wm831x_num)
+		id = (pdata->wm831x_num * 10) + 1;
+	else
+		id = 0;
+	id = pdev->id - id;
+
 	dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
 	dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
 
 
 	if (pdata == NULL || pdata->ldo[id] == NULL)
 	if (pdata == NULL || pdata->ldo[id] == NULL)
@@ -764,11 +776,18 @@ static __devinit int wm831x_alive_ldo_probe(struct platform_device *pdev)
 {
 {
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
 	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
 	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
 	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
-	int id = pdev->id % ARRAY_SIZE(pdata->ldo);
+	int id;
 	struct wm831x_ldo *ldo;
 	struct wm831x_ldo *ldo;
 	struct resource *res;
 	struct resource *res;
 	int ret;
 	int ret;
 
 
+	if (pdata && pdata->wm831x_num)
+		id = (pdata->wm831x_num * 10) + 1;
+	else
+		id = 0;
+	id = pdev->id - id;
+
+
 	dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
 	dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
 
 
 	if (pdata == NULL || pdata->ldo[id] == NULL)
 	if (pdata == NULL || pdata->ldo[id] == NULL)

+ 2 - 2
drivers/regulator/wm8994-regulator.c

@@ -43,7 +43,7 @@ static int wm8994_ldo_enable(struct regulator_dev *rdev)
 	if (!ldo->enable)
 	if (!ldo->enable)
 		return 0;
 		return 0;
 
 
-	gpio_set_value(ldo->enable, 1);
+	gpio_set_value_cansleep(ldo->enable, 1);
 	ldo->is_enabled = true;
 	ldo->is_enabled = true;
 
 
 	return 0;
 	return 0;
@@ -57,7 +57,7 @@ static int wm8994_ldo_disable(struct regulator_dev *rdev)
 	if (!ldo->enable)
 	if (!ldo->enable)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	gpio_set_value(ldo->enable, 0);
+	gpio_set_value_cansleep(ldo->enable, 0);
 	ldo->is_enabled = false;
 	ldo->is_enabled = false;
 
 
 	return 0;
 	return 0;

+ 3 - 3
fs/9p/acl.c

@@ -182,11 +182,11 @@ int v9fs_set_create_acl(struct dentry *dentry,
 	return 0;
 	return 0;
 }
 }
 
 
-int v9fs_acl_mode(struct inode *dir, mode_t *modep,
+int v9fs_acl_mode(struct inode *dir, umode_t *modep,
 		  struct posix_acl **dpacl, struct posix_acl **pacl)
 		  struct posix_acl **dpacl, struct posix_acl **pacl)
 {
 {
 	int retval = 0;
 	int retval = 0;
-	mode_t mode = *modep;
+	umode_t mode = *modep;
 	struct posix_acl *acl = NULL;
 	struct posix_acl *acl = NULL;
 
 
 	if (!S_ISLNK(mode)) {
 	if (!S_ISLNK(mode)) {
@@ -319,7 +319,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
 	case ACL_TYPE_ACCESS:
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
 		name = POSIX_ACL_XATTR_ACCESS;
 		if (acl) {
 		if (acl) {
-			mode_t mode = inode->i_mode;
+			umode_t mode = inode->i_mode;
 			retval = posix_acl_equiv_mode(acl, &mode);
 			retval = posix_acl_equiv_mode(acl, &mode);
 			if (retval < 0)
 			if (retval < 0)
 				goto err_out;
 				goto err_out;

+ 2 - 2
fs/9p/acl.h

@@ -20,7 +20,7 @@ extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type);
 extern int v9fs_acl_chmod(struct dentry *);
 extern int v9fs_acl_chmod(struct dentry *);
 extern int v9fs_set_create_acl(struct dentry *,
 extern int v9fs_set_create_acl(struct dentry *,
 			       struct posix_acl **, struct posix_acl **);
 			       struct posix_acl **, struct posix_acl **);
-extern int v9fs_acl_mode(struct inode *dir, mode_t *modep,
+extern int v9fs_acl_mode(struct inode *dir, umode_t *modep,
 			 struct posix_acl **dpacl, struct posix_acl **pacl);
 			 struct posix_acl **dpacl, struct posix_acl **pacl);
 #else
 #else
 #define v9fs_iop_get_acl NULL
 #define v9fs_iop_get_acl NULL
@@ -38,7 +38,7 @@ static inline int v9fs_set_create_acl(struct dentry *dentry,
 {
 {
 	return 0;
 	return 0;
 }
 }
-static inline int v9fs_acl_mode(struct inode *dir, mode_t *modep,
+static inline int v9fs_acl_mode(struct inode *dir, umode_t *modep,
 				struct posix_acl **dpacl,
 				struct posix_acl **dpacl,
 				struct posix_acl **pacl)
 				struct posix_acl **pacl)
 {
 {

+ 3 - 3
fs/9p/vfs_inode_dotl.c

@@ -206,7 +206,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
 	int err = 0;
 	int err = 0;
 	gid_t gid;
 	gid_t gid;
 	int flags;
 	int flags;
-	mode_t mode;
+	umode_t mode;
 	char *name = NULL;
 	char *name = NULL;
 	struct file *filp;
 	struct file *filp;
 	struct p9_qid qid;
 	struct p9_qid qid;
@@ -348,7 +348,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
 	struct p9_fid *fid = NULL, *dfid = NULL;
 	struct p9_fid *fid = NULL, *dfid = NULL;
 	gid_t gid;
 	gid_t gid;
 	char *name;
 	char *name;
-	mode_t mode;
+	umode_t mode;
 	struct inode *inode;
 	struct inode *inode;
 	struct p9_qid qid;
 	struct p9_qid qid;
 	struct dentry *dir_dentry;
 	struct dentry *dir_dentry;
@@ -751,7 +751,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
 	int err;
 	int err;
 	gid_t gid;
 	gid_t gid;
 	char *name;
 	char *name;
-	mode_t mode;
+	umode_t mode;
 	struct v9fs_session_info *v9ses;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid = NULL, *dfid = NULL;
 	struct p9_fid *fid = NULL, *dfid = NULL;
 	struct inode *inode;
 	struct inode *inode;

+ 1 - 0
fs/block_dev.c

@@ -552,6 +552,7 @@ struct block_device *bdget(dev_t dev)
 
 
 	if (inode->i_state & I_NEW) {
 	if (inode->i_state & I_NEW) {
 		bdev->bd_contains = NULL;
 		bdev->bd_contains = NULL;
+		bdev->bd_super = NULL;
 		bdev->bd_inode = inode;
 		bdev->bd_inode = inode;
 		bdev->bd_block_size = (1 << inode->i_blkbits);
 		bdev->bd_block_size = (1 << inode->i_blkbits);
 		bdev->bd_part_count = 0;
 		bdev->bd_part_count = 0;

+ 2 - 8
fs/btrfs/acl.c

@@ -111,7 +111,6 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans,
 	int ret, size = 0;
 	int ret, size = 0;
 	const char *name;
 	const char *name;
 	char *value = NULL;
 	char *value = NULL;
-	mode_t mode;
 
 
 	if (acl) {
 	if (acl) {
 		ret = posix_acl_valid(acl);
 		ret = posix_acl_valid(acl);
@@ -122,13 +121,11 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans,
 
 
 	switch (type) {
 	switch (type) {
 	case ACL_TYPE_ACCESS:
 	case ACL_TYPE_ACCESS:
-		mode = inode->i_mode;
 		name = POSIX_ACL_XATTR_ACCESS;
 		name = POSIX_ACL_XATTR_ACCESS;
 		if (acl) {
 		if (acl) {
-			ret = posix_acl_equiv_mode(acl, &mode);
+			ret = posix_acl_equiv_mode(acl, &inode->i_mode);
 			if (ret < 0)
 			if (ret < 0)
 				return ret;
 				return ret;
-			inode->i_mode = mode;
 		}
 		}
 		ret = 0;
 		ret = 0;
 		break;
 		break;
@@ -222,19 +219,16 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
 	}
 	}
 
 
 	if (IS_POSIXACL(dir) && acl) {
 	if (IS_POSIXACL(dir) && acl) {
-		mode_t mode = inode->i_mode;
-
 		if (S_ISDIR(inode->i_mode)) {
 		if (S_ISDIR(inode->i_mode)) {
 			ret = btrfs_set_acl(trans, inode, acl,
 			ret = btrfs_set_acl(trans, inode, acl,
 					    ACL_TYPE_DEFAULT);
 					    ACL_TYPE_DEFAULT);
 			if (ret)
 			if (ret)
 				goto failed;
 				goto failed;
 		}
 		}
-		ret = posix_acl_create(&acl, GFP_NOFS, &mode);
+		ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
 		if (ret < 0)
 		if (ret < 0)
 			return ret;
 			return ret;
 
 
-		inode->i_mode = mode;
 		if (ret > 0) {
 		if (ret > 0) {
 			/* we need an acl */
 			/* we need an acl */
 			ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
 			ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);

+ 45 - 2
fs/btrfs/inode.c

@@ -3993,12 +3993,19 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 	struct btrfs_root *sub_root = root;
 	struct btrfs_root *sub_root = root;
 	struct btrfs_key location;
 	struct btrfs_key location;
 	int index;
 	int index;
-	int ret;
+	int ret = 0;
 
 
 	if (dentry->d_name.len > BTRFS_NAME_LEN)
 	if (dentry->d_name.len > BTRFS_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
 		return ERR_PTR(-ENAMETOOLONG);
 
 
-	ret = btrfs_inode_by_name(dir, dentry, &location);
+	if (unlikely(d_need_lookup(dentry))) {
+		memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key));
+		kfree(dentry->d_fsdata);
+		dentry->d_fsdata = NULL;
+		d_clear_need_lookup(dentry);
+	} else {
+		ret = btrfs_inode_by_name(dir, dentry, &location);
+	}
 
 
 	if (ret < 0)
 	if (ret < 0)
 		return ERR_PTR(ret);
 		return ERR_PTR(ret);
@@ -4053,6 +4060,12 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
 	return 0;
 	return 0;
 }
 }
 
 
+static void btrfs_dentry_release(struct dentry *dentry)
+{
+	if (dentry->d_fsdata)
+		kfree(dentry->d_fsdata);
+}
+
 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 				   struct nameidata *nd)
 				   struct nameidata *nd)
 {
 {
@@ -4075,6 +4088,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 	struct btrfs_path *path;
 	struct btrfs_path *path;
 	struct list_head ins_list;
 	struct list_head ins_list;
 	struct list_head del_list;
 	struct list_head del_list;
+	struct qstr q;
 	int ret;
 	int ret;
 	struct extent_buffer *leaf;
 	struct extent_buffer *leaf;
 	int slot;
 	int slot;
@@ -4164,6 +4178,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 
 
 		while (di_cur < di_total) {
 		while (di_cur < di_total) {
 			struct btrfs_key location;
 			struct btrfs_key location;
+			struct dentry *tmp;
 
 
 			if (verify_dir_item(root, leaf, di))
 			if (verify_dir_item(root, leaf, di))
 				break;
 				break;
@@ -4184,6 +4199,33 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 			d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
 			d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
 			btrfs_dir_item_key_to_cpu(leaf, di, &location);
 			btrfs_dir_item_key_to_cpu(leaf, di, &location);
 
 
+			q.name = name_ptr;
+			q.len = name_len;
+			q.hash = full_name_hash(q.name, q.len);
+			tmp = d_lookup(filp->f_dentry, &q);
+			if (!tmp) {
+				struct btrfs_key *newkey;
+
+				newkey = kzalloc(sizeof(struct btrfs_key),
+						 GFP_NOFS);
+				if (!newkey)
+					goto no_dentry;
+				tmp = d_alloc(filp->f_dentry, &q);
+				if (!tmp) {
+					kfree(newkey);
+					dput(tmp);
+					goto no_dentry;
+				}
+				memcpy(newkey, &location,
+				       sizeof(struct btrfs_key));
+				tmp->d_fsdata = newkey;
+				tmp->d_flags |= DCACHE_NEED_LOOKUP;
+				d_rehash(tmp);
+				dput(tmp);
+			} else {
+				dput(tmp);
+			}
+no_dentry:
 			/* is this a reference to our own snapshot? If so
 			/* is this a reference to our own snapshot? If so
 			 * skip it
 			 * skip it
 			 */
 			 */
@@ -7430,4 +7472,5 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
 
 
 const struct dentry_operations btrfs_dentry_operations = {
 const struct dentry_operations btrfs_dentry_operations = {
 	.d_delete	= btrfs_dentry_delete,
 	.d_delete	= btrfs_dentry_delete,
+	.d_release	= btrfs_dentry_release,
 };
 };

+ 27 - 42
fs/dcache.c

@@ -301,6 +301,27 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 	return parent;
 	return parent;
 }
 }
 
 
+/*
+ * Unhash a dentry without inserting an RCU walk barrier or checking that
+ * dentry->d_lock is locked.  The caller must take care of that, if
+ * appropriate.
+ */
+static void __d_shrink(struct dentry *dentry)
+{
+	if (!d_unhashed(dentry)) {
+		struct hlist_bl_head *b;
+		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
+			b = &dentry->d_sb->s_anon;
+		else
+			b = d_hash(dentry->d_parent, dentry->d_name.hash);
+
+		hlist_bl_lock(b);
+		__hlist_bl_del(&dentry->d_hash);
+		dentry->d_hash.pprev = NULL;
+		hlist_bl_unlock(b);
+	}
+}
+
 /**
 /**
  * d_drop - drop a dentry
  * d_drop - drop a dentry
  * @dentry: dentry to drop
  * @dentry: dentry to drop
@@ -319,17 +340,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 void __d_drop(struct dentry *dentry)
 void __d_drop(struct dentry *dentry)
 {
 {
 	if (!d_unhashed(dentry)) {
 	if (!d_unhashed(dentry)) {
-		struct hlist_bl_head *b;
-		if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
-			b = &dentry->d_sb->s_anon;
-		else
-			b = d_hash(dentry->d_parent, dentry->d_name.hash);
-
-		hlist_bl_lock(b);
-		__hlist_bl_del(&dentry->d_hash);
-		dentry->d_hash.pprev = NULL;
-		hlist_bl_unlock(b);
-
+		__d_shrink(dentry);
 		dentry_rcuwalk_barrier(dentry);
 		dentry_rcuwalk_barrier(dentry);
 	}
 	}
 }
 }
@@ -828,44 +839,24 @@ EXPORT_SYMBOL(shrink_dcache_sb);
 static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 {
 {
 	struct dentry *parent;
 	struct dentry *parent;
-	unsigned detached = 0;
 
 
 	BUG_ON(!IS_ROOT(dentry));
 	BUG_ON(!IS_ROOT(dentry));
 
 
-	/* detach this root from the system */
-	spin_lock(&dentry->d_lock);
-	dentry_lru_del(dentry);
-	__d_drop(dentry);
-	spin_unlock(&dentry->d_lock);
-
 	for (;;) {
 	for (;;) {
 		/* descend to the first leaf in the current subtree */
 		/* descend to the first leaf in the current subtree */
-		while (!list_empty(&dentry->d_subdirs)) {
-			struct dentry *loop;
-
-			/* this is a branch with children - detach all of them
-			 * from the system in one go */
-			spin_lock(&dentry->d_lock);
-			list_for_each_entry(loop, &dentry->d_subdirs,
-					    d_u.d_child) {
-				spin_lock_nested(&loop->d_lock,
-						DENTRY_D_LOCK_NESTED);
-				dentry_lru_del(loop);
-				__d_drop(loop);
-				spin_unlock(&loop->d_lock);
-			}
-			spin_unlock(&dentry->d_lock);
-
-			/* move to the first child */
+		while (!list_empty(&dentry->d_subdirs))
 			dentry = list_entry(dentry->d_subdirs.next,
 			dentry = list_entry(dentry->d_subdirs.next,
 					    struct dentry, d_u.d_child);
 					    struct dentry, d_u.d_child);
-		}
 
 
 		/* consume the dentries from this leaf up through its parents
 		/* consume the dentries from this leaf up through its parents
 		 * until we find one with children or run out altogether */
 		 * until we find one with children or run out altogether */
 		do {
 		do {
 			struct inode *inode;
 			struct inode *inode;
 
 
+			/* detach from the system */
+			dentry_lru_del(dentry);
+			__d_shrink(dentry);
+
 			if (dentry->d_count != 0) {
 			if (dentry->d_count != 0) {
 				printk(KERN_ERR
 				printk(KERN_ERR
 				       "BUG: Dentry %p{i=%lx,n=%s}"
 				       "BUG: Dentry %p{i=%lx,n=%s}"
@@ -886,14 +877,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 				list_del(&dentry->d_u.d_child);
 				list_del(&dentry->d_u.d_child);
 			} else {
 			} else {
 				parent = dentry->d_parent;
 				parent = dentry->d_parent;
-				spin_lock(&parent->d_lock);
 				parent->d_count--;
 				parent->d_count--;
 				list_del(&dentry->d_u.d_child);
 				list_del(&dentry->d_u.d_child);
-				spin_unlock(&parent->d_lock);
 			}
 			}
 
 
-			detached++;
-
 			inode = dentry->d_inode;
 			inode = dentry->d_inode;
 			if (inode) {
 			if (inode) {
 				dentry->d_inode = NULL;
 				dentry->d_inode = NULL;
@@ -938,9 +925,7 @@ void shrink_dcache_for_umount(struct super_block *sb)
 
 
 	dentry = sb->s_root;
 	dentry = sb->s_root;
 	sb->s_root = NULL;
 	sb->s_root = NULL;
-	spin_lock(&dentry->d_lock);
 	dentry->d_count--;
 	dentry->d_count--;
-	spin_unlock(&dentry->d_lock);
 	shrink_dcache_for_umount_subtree(dentry);
 	shrink_dcache_for_umount_subtree(dentry);
 
 
 	while (!hlist_bl_empty(&sb->s_anon)) {
 	while (!hlist_bl_empty(&sb->s_anon)) {

+ 2 - 6
fs/ext2/acl.c

@@ -194,12 +194,10 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 		case ACL_TYPE_ACCESS:
 		case ACL_TYPE_ACCESS:
 			name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
 			name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
 			if (acl) {
 			if (acl) {
-				mode_t mode = inode->i_mode;
-				error = posix_acl_equiv_mode(acl, &mode);
+				error = posix_acl_equiv_mode(acl, &inode->i_mode);
 				if (error < 0)
 				if (error < 0)
 					return error;
 					return error;
 				else {
 				else {
-					inode->i_mode = mode;
 					inode->i_ctime = CURRENT_TIME_SEC;
 					inode->i_ctime = CURRENT_TIME_SEC;
 					mark_inode_dirty(inode);
 					mark_inode_dirty(inode);
 					if (error == 0)
 					if (error == 0)
@@ -253,16 +251,14 @@ ext2_init_acl(struct inode *inode, struct inode *dir)
 			inode->i_mode &= ~current_umask();
 			inode->i_mode &= ~current_umask();
 	}
 	}
 	if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
 	if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
-		mode_t mode = inode->i_mode;
 		if (S_ISDIR(inode->i_mode)) {
 		if (S_ISDIR(inode->i_mode)) {
 			error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl);
 			error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl);
 			if (error)
 			if (error)
 				goto cleanup;
 				goto cleanup;
 		}
 		}
-		error = posix_acl_create(&acl, GFP_KERNEL, &mode);
+		error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
 		if (error < 0)
 		if (error < 0)
 			return error;
 			return error;
-		inode->i_mode = mode;
 		if (error > 0) {
 		if (error > 0) {
 			/* This is an extended ACL */
 			/* This is an extended ACL */
 			error = ext2_set_acl(inode, ACL_TYPE_ACCESS, acl);
 			error = ext2_set_acl(inode, ACL_TYPE_ACCESS, acl);

+ 2 - 7
fs/ext3/acl.c

@@ -199,12 +199,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 		case ACL_TYPE_ACCESS:
 		case ACL_TYPE_ACCESS:
 			name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
 			name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
 			if (acl) {
 			if (acl) {
-				mode_t mode = inode->i_mode;
-				error = posix_acl_equiv_mode(acl, &mode);
+				error = posix_acl_equiv_mode(acl, &inode->i_mode);
 				if (error < 0)
 				if (error < 0)
 					return error;
 					return error;
 				else {
 				else {
-					inode->i_mode = mode;
 					inode->i_ctime = CURRENT_TIME_SEC;
 					inode->i_ctime = CURRENT_TIME_SEC;
 					ext3_mark_inode_dirty(handle, inode);
 					ext3_mark_inode_dirty(handle, inode);
 					if (error == 0)
 					if (error == 0)
@@ -261,19 +259,16 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
 			inode->i_mode &= ~current_umask();
 			inode->i_mode &= ~current_umask();
 	}
 	}
 	if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
 	if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
-		mode_t mode = inode->i_mode;
-
 		if (S_ISDIR(inode->i_mode)) {
 		if (S_ISDIR(inode->i_mode)) {
 			error = ext3_set_acl(handle, inode,
 			error = ext3_set_acl(handle, inode,
 					     ACL_TYPE_DEFAULT, acl);
 					     ACL_TYPE_DEFAULT, acl);
 			if (error)
 			if (error)
 				goto cleanup;
 				goto cleanup;
 		}
 		}
-		error = posix_acl_create(&acl, GFP_NOFS, &mode);
+		error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
 		if (error < 0)
 		if (error < 0)
 			return error;
 			return error;
 
 
-		inode->i_mode = mode;
 		if (error > 0) {
 		if (error > 0) {
 			/* This is an extended ACL */
 			/* This is an extended ACL */
 			error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
 			error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);

+ 1 - 1
fs/ext4/Makefile

@@ -7,7 +7,7 @@ obj-$(CONFIG_EXT4_FS) += ext4.o
 ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
 ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
 		ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
 		ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
 		ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
 		ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o \
-		mmp.o
+		mmp.o indirect.o
 
 
 ext4-$(CONFIG_EXT4_FS_XATTR)		+= xattr.o xattr_user.o xattr_trusted.o
 ext4-$(CONFIG_EXT4_FS_XATTR)		+= xattr.o xattr_user.o xattr_trusted.o
 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o
 ext4-$(CONFIG_EXT4_FS_POSIX_ACL)	+= acl.o

+ 2 - 7
fs/ext4/acl.c

@@ -198,12 +198,10 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
 	case ACL_TYPE_ACCESS:
 	case ACL_TYPE_ACCESS:
 		name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
 		name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
 		if (acl) {
 		if (acl) {
-			mode_t mode = inode->i_mode;
-			error = posix_acl_equiv_mode(acl, &mode);
+			error = posix_acl_equiv_mode(acl, &inode->i_mode);
 			if (error < 0)
 			if (error < 0)
 				return error;
 				return error;
 			else {
 			else {
-				inode->i_mode = mode;
 				inode->i_ctime = ext4_current_time(inode);
 				inode->i_ctime = ext4_current_time(inode);
 				ext4_mark_inode_dirty(handle, inode);
 				ext4_mark_inode_dirty(handle, inode);
 				if (error == 0)
 				if (error == 0)
@@ -259,19 +257,16 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
 			inode->i_mode &= ~current_umask();
 			inode->i_mode &= ~current_umask();
 	}
 	}
 	if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
 	if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
-		mode_t mode = inode->i_mode;
-
 		if (S_ISDIR(inode->i_mode)) {
 		if (S_ISDIR(inode->i_mode)) {
 			error = ext4_set_acl(handle, inode,
 			error = ext4_set_acl(handle, inode,
 					     ACL_TYPE_DEFAULT, acl);
 					     ACL_TYPE_DEFAULT, acl);
 			if (error)
 			if (error)
 				goto cleanup;
 				goto cleanup;
 		}
 		}
-		error = posix_acl_create(&acl, GFP_NOFS, &mode);
+		error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
 		if (error < 0)
 		if (error < 0)
 			return error;
 			return error;
 
 
-		inode->i_mode = mode;
 		if (error > 0) {
 		if (error > 0) {
 			/* This is an extended ACL */
 			/* This is an extended ACL */
 			error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
 			error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);

+ 48 - 0
fs/ext4/balloc.c

@@ -620,3 +620,51 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
 
 
 }
 }
 
 
+/**
+ *	ext4_inode_to_goal_block - return a hint for block allocation
+ *	@inode: inode for block allocation
+ *
+ *	Return the ideal location to start allocating blocks for a
+ *	newly created inode.
+ */
+ext4_fsblk_t ext4_inode_to_goal_block(struct inode *inode)
+{
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	ext4_group_t block_group;
+	ext4_grpblk_t colour;
+	int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
+	ext4_fsblk_t bg_start;
+	ext4_fsblk_t last_block;
+
+	block_group = ei->i_block_group;
+	if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
+		/*
+		 * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
+		 * block groups per flexgroup, reserve the first block
+		 * group for directories and special files.  Regular
+		 * files will start at the second block group.  This
+		 * tends to speed up directory access and improves
+		 * fsck times.
+		 */
+		block_group &= ~(flex_size-1);
+		if (S_ISREG(inode->i_mode))
+			block_group++;
+	}
+	bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
+	last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
+
+	/*
+	 * If we are doing delayed allocation, we don't need take
+	 * colour into account.
+	 */
+	if (test_opt(inode->i_sb, DELALLOC))
+		return bg_start;
+
+	if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
+		colour = (current->pid % 16) *
+			(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+	else
+		colour = (current->pid % 16) * ((last_block - bg_start) / 16);
+	return bg_start + colour;
+}
+

+ 21 - 0
fs/ext4/block_validity.c

@@ -246,3 +246,24 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
 	return 1;
 	return 1;
 }
 }
 
 
+int ext4_check_blockref(const char *function, unsigned int line,
+			struct inode *inode, __le32 *p, unsigned int max)
+{
+	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+	__le32 *bref = p;
+	unsigned int blk;
+
+	while (bref < p+max) {
+		blk = le32_to_cpu(*bref++);
+		if (blk &&
+		    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
+						    blk, 1))) {
+			es->s_last_error_block = cpu_to_le64(blk);
+			ext4_error_inode(inode, function, line, blk,
+					 "invalid block");
+			return -EIO;
+		}
+	}
+	return 0;
+}
+

+ 52 - 3
fs/ext4/ext4.h

@@ -526,6 +526,7 @@ struct ext4_new_group_data {
 #define EXT4_FREE_BLOCKS_METADATA	0x0001
 #define EXT4_FREE_BLOCKS_METADATA	0x0001
 #define EXT4_FREE_BLOCKS_FORGET		0x0002
 #define EXT4_FREE_BLOCKS_FORGET		0x0002
 #define EXT4_FREE_BLOCKS_VALIDATED	0x0004
 #define EXT4_FREE_BLOCKS_VALIDATED	0x0004
+#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE	0x0008
 
 
 /*
 /*
  * ioctl commands
  * ioctl commands
@@ -939,6 +940,8 @@ struct ext4_inode_info {
 #define ext4_find_next_zero_bit		find_next_zero_bit_le
 #define ext4_find_next_zero_bit		find_next_zero_bit_le
 #define ext4_find_next_bit		find_next_bit_le
 #define ext4_find_next_bit		find_next_bit_le
 
 
+extern void ext4_set_bits(void *bm, int cur, int len);
+
 /*
 /*
  * Maximal mount counts between two filesystem checks
  * Maximal mount counts between two filesystem checks
  */
  */
@@ -1126,7 +1129,8 @@ struct ext4_sb_info {
 	struct journal_s *s_journal;
 	struct journal_s *s_journal;
 	struct list_head s_orphan;
 	struct list_head s_orphan;
 	struct mutex s_orphan_lock;
 	struct mutex s_orphan_lock;
-	struct mutex s_resize_lock;
+	unsigned long s_resize_flags;		/* Flags indicating if there
+						   is a resizer */
 	unsigned long s_commit_interval;
 	unsigned long s_commit_interval;
 	u32 s_max_batch_time;
 	u32 s_max_batch_time;
 	u32 s_min_batch_time;
 	u32 s_min_batch_time;
@@ -1214,6 +1218,9 @@ struct ext4_sb_info {
 
 
 	/* Kernel thread for multiple mount protection */
 	/* Kernel thread for multiple mount protection */
 	struct task_struct *s_mmp_tsk;
 	struct task_struct *s_mmp_tsk;
+
+	/* record the last minlen when FITRIM is called. */
+	atomic_t s_last_trim_minblks;
 };
 };
 
 
 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1743,6 +1750,7 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb,
 				       struct ext4_group_desc *desc);
 				       struct ext4_group_desc *desc);
 #define ext4_free_blocks_after_init(sb, group, desc)			\
 #define ext4_free_blocks_after_init(sb, group, desc)			\
 		ext4_init_block_bitmap(sb, NULL, group, desc)
 		ext4_init_block_bitmap(sb, NULL, group, desc)
+ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
 
 
 /* dir.c */
 /* dir.c */
 extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
 extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
@@ -1793,7 +1801,7 @@ extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
 			     unsigned long count, int flags);
 			     unsigned long count, int flags);
 extern int ext4_mb_add_groupinfo(struct super_block *sb,
 extern int ext4_mb_add_groupinfo(struct super_block *sb,
 		ext4_group_t i, struct ext4_group_desc *desc);
 		ext4_group_t i, struct ext4_group_desc *desc);
-extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
+extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 				ext4_fsblk_t block, unsigned long count);
 				ext4_fsblk_t block, unsigned long count);
 extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
 extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
 
 
@@ -1834,6 +1842,17 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 extern void ext4_da_update_reserve_space(struct inode *inode,
 extern void ext4_da_update_reserve_space(struct inode *inode,
 					int used, int quota_claim);
 					int used, int quota_claim);
+
+/* indirect.c */
+extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
+				struct ext4_map_blocks *map, int flags);
+extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
+				const struct iovec *iov, loff_t offset,
+				unsigned long nr_segs);
+extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock);
+extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk);
+extern void ext4_ind_truncate(struct inode *inode);
+
 /* ioctl.c */
 /* ioctl.c */
 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
 extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
 extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
 extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
@@ -1855,6 +1874,9 @@ extern int ext4_group_extend(struct super_block *sb,
 				ext4_fsblk_t n_blocks_count);
 				ext4_fsblk_t n_blocks_count);
 
 
 /* super.c */
 /* super.c */
+extern void *ext4_kvmalloc(size_t size, gfp_t flags);
+extern void *ext4_kvzalloc(size_t size, gfp_t flags);
+extern void ext4_kvfree(void *ptr);
 extern void __ext4_error(struct super_block *, const char *, unsigned int,
 extern void __ext4_error(struct super_block *, const char *, unsigned int,
 			 const char *, ...)
 			 const char *, ...)
 	__attribute__ ((format (printf, 4, 5)));
 	__attribute__ ((format (printf, 4, 5)));
@@ -2067,11 +2089,19 @@ struct ext4_group_info {
 					 * 5 free 8-block regions. */
 					 * 5 free 8-block regions. */
 };
 };
 
 
-#define EXT4_GROUP_INFO_NEED_INIT_BIT	0
+#define EXT4_GROUP_INFO_NEED_INIT_BIT		0
+#define EXT4_GROUP_INFO_WAS_TRIMMED_BIT		1
 
 
 #define EXT4_MB_GRP_NEED_INIT(grp)	\
 #define EXT4_MB_GRP_NEED_INIT(grp)	\
 	(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
 	(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
 
 
+#define EXT4_MB_GRP_WAS_TRIMMED(grp)	\
+	(test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
+#define EXT4_MB_GRP_SET_TRIMMED(grp)	\
+	(set_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
+#define EXT4_MB_GRP_CLEAR_TRIMMED(grp)	\
+	(clear_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
+
 #define EXT4_MAX_CONTENTION		8
 #define EXT4_MAX_CONTENTION		8
 #define EXT4_CONTENTION_THRESHOLD	2
 #define EXT4_CONTENTION_THRESHOLD	2
 
 
@@ -2122,6 +2152,19 @@ static inline void ext4_mark_super_dirty(struct super_block *sb)
 		sb->s_dirt =1;
 		sb->s_dirt =1;
 }
 }
 
 
+/*
+ * Block validity checking
+ */
+#define ext4_check_indirect_blockref(inode, bh)				\
+	ext4_check_blockref(__func__, __LINE__, inode,			\
+			    (__le32 *)(bh)->b_data,			\
+			    EXT4_ADDR_PER_BLOCK((inode)->i_sb))
+
+#define ext4_ind_check_inode(inode)					\
+	ext4_check_blockref(__func__, __LINE__, inode,			\
+			    EXT4_I(inode)->i_data,			\
+			    EXT4_NDIR_BLOCKS)
+
 /*
 /*
  * Inodes and files operations
  * Inodes and files operations
  */
  */
@@ -2151,6 +2194,8 @@ extern void ext4_exit_system_zone(void);
 extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
 extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
 				 ext4_fsblk_t start_blk,
 				 ext4_fsblk_t start_blk,
 				 unsigned int count);
 				 unsigned int count);
+extern int ext4_check_blockref(const char *, unsigned int,
+			       struct inode *, __le32 *, unsigned int);
 
 
 /* extents.c */
 /* extents.c */
 extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
 extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
@@ -2230,6 +2275,10 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
 extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
 extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
 extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
 extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
 
 
+#define EXT4_RESIZING	0
+extern int ext4_resize_begin(struct super_block *sb);
+extern void ext4_resize_end(struct super_block *sb);
+
 #endif	/* __KERNEL__ */
 #endif	/* __KERNEL__ */
 
 
 #endif	/* _EXT4_H */
 #endif	/* _EXT4_H */

+ 59 - 70
fs/ext4/extents.c

@@ -114,12 +114,6 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 			      struct ext4_ext_path *path,
 			      struct ext4_ext_path *path,
 			      ext4_lblk_t block)
 			      ext4_lblk_t block)
 {
 {
-	struct ext4_inode_info *ei = EXT4_I(inode);
-	ext4_fsblk_t bg_start;
-	ext4_fsblk_t last_block;
-	ext4_grpblk_t colour;
-	ext4_group_t block_group;
-	int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
 	int depth;
 	int depth;
 
 
 	if (path) {
 	if (path) {
@@ -161,36 +155,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 	}
 	}
 
 
 	/* OK. use inode's group */
 	/* OK. use inode's group */
-	block_group = ei->i_block_group;
-	if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
-		/*
-		 * If there are at least EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME
-		 * block groups per flexgroup, reserve the first block
-		 * group for directories and special files.  Regular
-		 * files will start at the second block group.  This
-		 * tends to speed up directory access and improves
-		 * fsck times.
-		 */
-		block_group &= ~(flex_size-1);
-		if (S_ISREG(inode->i_mode))
-			block_group++;
-	}
-	bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
-	last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
-
-	/*
-	 * If we are doing delayed allocation, we don't need take
-	 * colour into account.
-	 */
-	if (test_opt(inode->i_sb, DELALLOC))
-		return bg_start;
-
-	if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
-		colour = (current->pid % 16) *
-			(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
-	else
-		colour = (current->pid % 16) * ((last_block - bg_start) / 16);
-	return bg_start + colour + block;
+	return ext4_inode_to_goal_block(inode);
 }
 }
 
 
 /*
 /*
@@ -776,6 +741,16 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 				 logical, le32_to_cpu(curp->p_idx->ei_block));
 				 logical, le32_to_cpu(curp->p_idx->ei_block));
 		return -EIO;
 		return -EIO;
 	}
 	}
+
+	if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
+			     >= le16_to_cpu(curp->p_hdr->eh_max))) {
+		EXT4_ERROR_INODE(inode,
+				 "eh_entries %d >= eh_max %d!",
+				 le16_to_cpu(curp->p_hdr->eh_entries),
+				 le16_to_cpu(curp->p_hdr->eh_max));
+		return -EIO;
+	}
+
 	len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
 	len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
 	if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
 	if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
 		/* insert after */
 		/* insert after */
@@ -805,13 +780,6 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 	ext4_idx_store_pblock(ix, ptr);
 	ext4_idx_store_pblock(ix, ptr);
 	le16_add_cpu(&curp->p_hdr->eh_entries, 1);
 	le16_add_cpu(&curp->p_hdr->eh_entries, 1);
 
 
-	if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
-			     > le16_to_cpu(curp->p_hdr->eh_max))) {
-		EXT4_ERROR_INODE(inode,
-				 "logical %d == ei_block %d!",
-				 logical, le32_to_cpu(curp->p_idx->ei_block));
-		return -EIO;
-	}
 	if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
 	if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
 		EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
 		EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
 		return -EIO;
 		return -EIO;
@@ -1446,8 +1414,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
  * ext4_ext_next_leaf_block:
  * ext4_ext_next_leaf_block:
  * returns first allocated block from next leaf or EXT_MAX_BLOCKS
  * returns first allocated block from next leaf or EXT_MAX_BLOCKS
  */
  */
-static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
-					struct ext4_ext_path *path)
+static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
 {
 {
 	int depth;
 	int depth;
 
 
@@ -1757,7 +1724,6 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 		goto merge;
 		goto merge;
 	}
 	}
 
 
-repeat:
 	depth = ext_depth(inode);
 	depth = ext_depth(inode);
 	eh = path[depth].p_hdr;
 	eh = path[depth].p_hdr;
 	if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
 	if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
@@ -1765,9 +1731,10 @@ repeat:
 
 
 	/* probably next leaf has space for us? */
 	/* probably next leaf has space for us? */
 	fex = EXT_LAST_EXTENT(eh);
 	fex = EXT_LAST_EXTENT(eh);
-	next = ext4_ext_next_leaf_block(inode, path);
-	if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)
-	    && next != EXT_MAX_BLOCKS) {
+	next = EXT_MAX_BLOCKS;
+	if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
+		next = ext4_ext_next_leaf_block(path);
+	if (next != EXT_MAX_BLOCKS) {
 		ext_debug("next leaf block - %d\n", next);
 		ext_debug("next leaf block - %d\n", next);
 		BUG_ON(npath != NULL);
 		BUG_ON(npath != NULL);
 		npath = ext4_ext_find_extent(inode, next, NULL);
 		npath = ext4_ext_find_extent(inode, next, NULL);
@@ -1779,7 +1746,7 @@ repeat:
 			ext_debug("next leaf isn't full(%d)\n",
 			ext_debug("next leaf isn't full(%d)\n",
 				  le16_to_cpu(eh->eh_entries));
 				  le16_to_cpu(eh->eh_entries));
 			path = npath;
 			path = npath;
-			goto repeat;
+			goto has_space;
 		}
 		}
 		ext_debug("next leaf has no free space(%d,%d)\n",
 		ext_debug("next leaf has no free space(%d,%d)\n",
 			  le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
 			  le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
@@ -1839,7 +1806,7 @@ has_space:
 				ext4_ext_pblock(newext),
 				ext4_ext_pblock(newext),
 				ext4_ext_is_uninitialized(newext),
 				ext4_ext_is_uninitialized(newext),
 				ext4_ext_get_actual_len(newext),
 				ext4_ext_get_actual_len(newext),
-				nearex, len, nearex + 1, nearex + 2);
+				nearex, len, nearex, nearex + 1);
 		memmove(nearex + 1, nearex, len);
 		memmove(nearex + 1, nearex, len);
 		path[depth].p_ext = nearex;
 		path[depth].p_ext = nearex;
 	}
 	}
@@ -2052,7 +2019,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
 }
 }
 
 
 /*
 /*
- * ext4_ext_in_cache()
+ * ext4_ext_check_cache()
  * Checks to see if the given block is in the cache.
  * Checks to see if the given block is in the cache.
  * If it is, the cached extent is stored in the given
  * If it is, the cached extent is stored in the given
  * cache extent pointer.  If the cached extent is a hole,
  * cache extent pointer.  If the cached extent is a hole,
@@ -2134,8 +2101,6 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
 /*
 /*
  * ext4_ext_rm_idx:
  * ext4_ext_rm_idx:
  * removes index from the index block.
  * removes index from the index block.
- * It's used in truncate case only, thus all requests are for
- * last index in the block only.
  */
  */
 static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 			struct ext4_ext_path *path)
 			struct ext4_ext_path *path)
@@ -2153,6 +2118,13 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 	err = ext4_ext_get_access(handle, inode, path);
 	err = ext4_ext_get_access(handle, inode, path);
 	if (err)
 	if (err)
 		return err;
 		return err;
+
+	if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) {
+		int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx;
+		len *= sizeof(struct ext4_extent_idx);
+		memmove(path->p_idx, path->p_idx + 1, len);
+	}
+
 	le16_add_cpu(&path->p_hdr->eh_entries, -1);
 	le16_add_cpu(&path->p_hdr->eh_entries, -1);
 	err = ext4_ext_dirty(handle, inode, path);
 	err = ext4_ext_dirty(handle, inode, path);
 	if (err)
 	if (err)
@@ -2534,8 +2506,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
 	return 1;
 	return 1;
 }
 }
 
 
-static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
-				ext4_lblk_t end)
+static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
 {
 {
 	struct super_block *sb = inode->i_sb;
 	struct super_block *sb = inode->i_sb;
 	int depth = ext_depth(inode);
 	int depth = ext_depth(inode);
@@ -2575,7 +2546,7 @@ again:
 		if (i == depth) {
 		if (i == depth) {
 			/* this is leaf block */
 			/* this is leaf block */
 			err = ext4_ext_rm_leaf(handle, inode, path,
 			err = ext4_ext_rm_leaf(handle, inode, path,
-					start, end);
+					start, EXT_MAX_BLOCKS - 1);
 			/* root level has p_bh == NULL, brelse() eats this */
 			/* root level has p_bh == NULL, brelse() eats this */
 			brelse(path[i].p_bh);
 			brelse(path[i].p_bh);
 			path[i].p_bh = NULL;
 			path[i].p_bh = NULL;
@@ -3107,12 +3078,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
 					      struct ext4_ext_path *path)
 					      struct ext4_ext_path *path)
 {
 {
 	struct ext4_extent *ex;
 	struct ext4_extent *ex;
-	struct ext4_extent_header *eh;
 	int depth;
 	int depth;
 	int err = 0;
 	int err = 0;
 
 
 	depth = ext_depth(inode);
 	depth = ext_depth(inode);
-	eh = path[depth].p_hdr;
 	ex = path[depth].p_ext;
 	ex = path[depth].p_ext;
 
 
 	ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
 	ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
@@ -3357,8 +3326,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 	trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
 	trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
 
 
 	/* check in cache */
 	/* check in cache */
-	if (ext4_ext_in_cache(inode, map->m_lblk, &newex) &&
-		((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0)) {
+	if (!(flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) &&
+		ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
 		if (!newex.ee_start_lo && !newex.ee_start_hi) {
 		if (!newex.ee_start_lo && !newex.ee_start_hi) {
 			if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
 			if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
 				/*
 				/*
@@ -3497,8 +3466,27 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 
 
 			ext4_ext_mark_uninitialized(ex);
 			ext4_ext_mark_uninitialized(ex);
 
 
-			err = ext4_ext_remove_space(inode, map->m_lblk,
-				map->m_lblk + punched_out);
+			ext4_ext_invalidate_cache(inode);
+
+			err = ext4_ext_rm_leaf(handle, inode, path,
+				map->m_lblk, map->m_lblk + punched_out);
+
+			if (!err && path->p_hdr->eh_entries == 0) {
+				/*
+				 * Punch hole freed all of this sub tree,
+				 * so we need to correct eh_depth
+				 */
+				err = ext4_ext_get_access(handle, inode, path);
+				if (err == 0) {
+					ext_inode_hdr(inode)->eh_depth = 0;
+					ext_inode_hdr(inode)->eh_max =
+					cpu_to_le16(ext4_ext_space_root(
+						inode, 0));
+
+					err = ext4_ext_dirty(
+						handle, inode, path);
+				}
+			}
 
 
 			goto out2;
 			goto out2;
 		}
 		}
@@ -3596,17 +3584,18 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 	}
 	}
 
 
 	err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
 	err = check_eofblocks_fl(handle, inode, map->m_lblk, path, ar.len);
-	if (err)
-		goto out2;
-
-	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
+	if (!err)
+		err = ext4_ext_insert_extent(handle, inode, path,
+					     &newex, flags);
 	if (err) {
 	if (err) {
+		int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
+			EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
 		/* free data blocks we just allocated */
 		/* free data blocks we just allocated */
 		/* not a good idea to call discard here directly,
 		/* not a good idea to call discard here directly,
 		 * but otherwise we'd need to call it every free() */
 		 * but otherwise we'd need to call it every free() */
 		ext4_discard_preallocations(inode);
 		ext4_discard_preallocations(inode);
 		ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
 		ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
-				 ext4_ext_get_actual_len(&newex), 0);
+				 ext4_ext_get_actual_len(&newex), fb_flags);
 		goto out2;
 		goto out2;
 	}
 	}
 
 
@@ -3699,7 +3688,7 @@ void ext4_ext_truncate(struct inode *inode)
 
 
 	last_block = (inode->i_size + sb->s_blocksize - 1)
 	last_block = (inode->i_size + sb->s_blocksize - 1)
 			>> EXT4_BLOCK_SIZE_BITS(sb);
 			>> EXT4_BLOCK_SIZE_BITS(sb);
-	err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
+	err = ext4_ext_remove_space(inode, last_block);
 
 
 	/* In a multi-transaction truncate, we only make the final
 	/* In a multi-transaction truncate, we only make the final
 	 * transaction synchronous.
 	 * transaction synchronous.
@@ -3835,7 +3824,7 @@ retry:
 						blkbits) >> blkbits))
 						blkbits) >> blkbits))
 			new_size = offset + len;
 			new_size = offset + len;
 		else
 		else
-			new_size = (map.m_lblk + ret) << blkbits;
+			new_size = ((loff_t) map.m_lblk + ret) << blkbits;
 
 
 		ext4_falloc_update_inode(inode, mode, new_size,
 		ext4_falloc_update_inode(inode, mode, new_size,
 					 (map.m_flags & EXT4_MAP_NEW));
 					 (map.m_flags & EXT4_MAP_NEW));

+ 21 - 5
fs/ext4/fsync.c

@@ -129,15 +129,30 @@ static int ext4_sync_parent(struct inode *inode)
 {
 {
 	struct writeback_control wbc;
 	struct writeback_control wbc;
 	struct dentry *dentry = NULL;
 	struct dentry *dentry = NULL;
+	struct inode *next;
 	int ret = 0;
 	int ret = 0;
 
 
-	while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
+	if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY))
+		return 0;
+	inode = igrab(inode);
+	while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
 		ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
 		ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
-		dentry = list_entry(inode->i_dentry.next,
-				    struct dentry, d_alias);
-		if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode)
+		dentry = NULL;
+		spin_lock(&inode->i_lock);
+		if (!list_empty(&inode->i_dentry)) {
+			dentry = list_first_entry(&inode->i_dentry,
+						  struct dentry, d_alias);
+			dget(dentry);
+		}
+		spin_unlock(&inode->i_lock);
+		if (!dentry)
 			break;
 			break;
-		inode = dentry->d_parent->d_inode;
+		next = igrab(dentry->d_parent->d_inode);
+		dput(dentry);
+		if (!next)
+			break;
+		iput(inode);
+		inode = next;
 		ret = sync_mapping_buffers(inode->i_mapping);
 		ret = sync_mapping_buffers(inode->i_mapping);
 		if (ret)
 		if (ret)
 			break;
 			break;
@@ -148,6 +163,7 @@ static int ext4_sync_parent(struct inode *inode)
 		if (ret)
 		if (ret)
 			break;
 			break;
 	}
 	}
+	iput(inode);
 	return ret;
 	return ret;
 }
 }
 
 

+ 1 - 1
fs/ext4/ialloc.c

@@ -1287,7 +1287,7 @@ extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
 			   group, used_blks,
 			   group, used_blks,
 			   ext4_itable_unused_count(sb, gdp));
 			   ext4_itable_unused_count(sb, gdp));
 		ret = 1;
 		ret = 1;
-		goto out;
+		goto err_out;
 	}
 	}
 
 
 	blk = ext4_inode_table(sb, gdp) + used_blks;
 	blk = ext4_inode_table(sb, gdp) + used_blks;

+ 1482 - 0
fs/ext4/indirect.c

@@ -0,0 +1,1482 @@
+/*
+ *  linux/fs/ext4/indirect.c
+ *
+ *  from
+ *
+ *  linux/fs/ext4/inode.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/inode.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Goal-directed block allocation by Stephen Tweedie
+ *	(sct@redhat.com), 1993, 1998
+ */
+
+#include <linux/module.h>
+#include "ext4_jbd2.h"
+#include "truncate.h"
+
+#include <trace/events/ext4.h>
+
+typedef struct {
+	__le32	*p;
+	__le32	key;
+	struct buffer_head *bh;
+} Indirect;
+
+static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
+{
+	p->key = *(p->p = v);
+	p->bh = bh;
+}
+
+/**
+ *	ext4_block_to_path - parse the block number into array of offsets
+ *	@inode: inode in question (we are only interested in its superblock)
+ *	@i_block: block number to be parsed
+ *	@offsets: array to store the offsets in
+ *	@boundary: set this non-zero if the referred-to block is likely to be
+ *	       followed (on disk) by an indirect block.
+ *
+ *	To store the locations of file's data ext4 uses a data structure common
+ *	for UNIX filesystems - tree of pointers anchored in the inode, with
+ *	data blocks at leaves and indirect blocks in intermediate nodes.
+ *	This function translates the block number into path in that tree -
+ *	return value is the path length and @offsets[n] is the offset of
+ *	pointer to (n+1)th node in the nth one. If @block is out of range
+ *	(negative or too large) warning is printed and zero returned.
+ *
+ *	Note: function doesn't find node addresses, so no IO is needed. All
+ *	we need to know is the capacity of indirect blocks (taken from the
+ *	inode->i_sb).
+ */
+
+/*
+ * Portability note: the last comparison (check that we fit into triple
+ * indirect block) is spelled differently, because otherwise on an
+ * architecture with 32-bit longs and 8Kb pages we might get into trouble
+ * if our filesystem had 8Kb blocks. We might use long long, but that would
+ * kill us on x86. Oh, well, at least the sign propagation does not matter -
+ * i_block would have to be negative in the very beginning, so we would not
+ * get there at all.
+ */
+
+static int ext4_block_to_path(struct inode *inode,
+			      ext4_lblk_t i_block,
+			      ext4_lblk_t offsets[4], int *boundary)
+{
+	int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+	int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
+	const long direct_blocks = EXT4_NDIR_BLOCKS,
+		indirect_blocks = ptrs,
+		double_blocks = (1 << (ptrs_bits * 2));
+	int n = 0;
+	int final = 0;
+
+	if (i_block < direct_blocks) {
+		offsets[n++] = i_block;
+		final = direct_blocks;
+	} else if ((i_block -= direct_blocks) < indirect_blocks) {
+		offsets[n++] = EXT4_IND_BLOCK;
+		offsets[n++] = i_block;
+		final = ptrs;
+	} else if ((i_block -= indirect_blocks) < double_blocks) {
+		offsets[n++] = EXT4_DIND_BLOCK;
+		offsets[n++] = i_block >> ptrs_bits;
+		offsets[n++] = i_block & (ptrs - 1);
+		final = ptrs;
+	} else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
+		offsets[n++] = EXT4_TIND_BLOCK;
+		offsets[n++] = i_block >> (ptrs_bits * 2);
+		offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
+		offsets[n++] = i_block & (ptrs - 1);
+		final = ptrs;
+	} else {
+		ext4_warning(inode->i_sb, "block %lu > max in inode %lu",
+			     i_block + direct_blocks +
+			     indirect_blocks + double_blocks, inode->i_ino);
+	}
+	if (boundary)
+		*boundary = final - 1 - (i_block & (ptrs - 1));
+	return n;
+}
+
+/**
+ *	ext4_get_branch - read the chain of indirect blocks leading to data
+ *	@inode: inode in question
+ *	@depth: depth of the chain (1 - direct pointer, etc.)
+ *	@offsets: offsets of pointers in inode/indirect blocks
+ *	@chain: place to store the result
+ *	@err: here we store the error value
+ *
+ *	Function fills the array of triples <key, p, bh> and returns %NULL
+ *	if everything went OK or the pointer to the last filled triple
+ *	(incomplete one) otherwise. Upon the return chain[i].key contains
+ *	the number of (i+1)-th block in the chain (as it is stored in memory,
+ *	i.e. little-endian 32-bit), chain[i].p contains the address of that
+ *	number (it points into struct inode for i==0 and into the bh->b_data
+ *	for i>0) and chain[i].bh points to the buffer_head of i-th indirect
+ *	block for i>0 and NULL for i==0. In other words, it holds the block
+ *	numbers of the chain, addresses they were taken from (and where we can
+ *	verify that chain did not change) and buffer_heads hosting these
+ *	numbers.
+ *
+ *	Function stops when it stumbles upon zero pointer (absent block)
+ *		(pointer to last triple returned, *@err == 0)
+ *	or when it gets an IO error reading an indirect block
+ *		(ditto, *@err == -EIO)
+ *	or when it reads all @depth-1 indirect blocks successfully and finds
+ *	the whole chain, all way to the data (returns %NULL, *err == 0).
+ *
+ *      Need to be called with
+ *      down_read(&EXT4_I(inode)->i_data_sem)
+ */
+static Indirect *ext4_get_branch(struct inode *inode, int depth,
+				 ext4_lblk_t  *offsets,
+				 Indirect chain[4], int *err)
+{
+	struct super_block *sb = inode->i_sb;
+	Indirect *p = chain;
+	struct buffer_head *bh;
+
+	*err = 0;
+	/* i_data is not going away, no lock needed */
+	add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets);
+	if (!p->key)
+		goto no_block;
+	while (--depth) {
+		bh = sb_getblk(sb, le32_to_cpu(p->key));
+		if (unlikely(!bh))
+			goto failure;
+
+		if (!bh_uptodate_or_lock(bh)) {
+			if (bh_submit_read(bh) < 0) {
+				put_bh(bh);
+				goto failure;
+			}
+			/* validate block references */
+			if (ext4_check_indirect_blockref(inode, bh)) {
+				put_bh(bh);
+				goto failure;
+			}
+		}
+
+		add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
+		/* Reader: end */
+		if (!p->key)
+			goto no_block;
+	}
+	return NULL;
+
+failure:
+	*err = -EIO;
+no_block:
+	return p;
+}
+
+/**
+ *	ext4_find_near - find a place for allocation with sufficient locality
+ *	@inode: owner
+ *	@ind: descriptor of indirect block.
+ *
+ *	This function returns the preferred place for block allocation.
+ *	It is used when heuristic for sequential allocation fails.
+ *	Rules are:
+ *	  + if there is a block to the left of our position - allocate near it.
+ *	  + if pointer will live in indirect block - allocate near that block.
+ *	  + if pointer will live in inode - allocate in the same
+ *	    cylinder group.
+ *
+ * In the latter case we colour the starting block by the callers PID to
+ * prevent it from clashing with concurrent allocations for a different inode
+ * in the same block group.   The PID is used here so that functionally related
+ * files will be close-by on-disk.
+ *
+ *	Caller must make sure that @ind is valid and will stay that way.
+ */
+static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
+{
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	__le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
+	__le32 *p;
+
+	/* Try to find previous block */
+	for (p = ind->p - 1; p >= start; p--) {
+		if (*p)
+			return le32_to_cpu(*p);
+	}
+
+	/* No such thing, so let's try location of indirect block */
+	if (ind->bh)
+		return ind->bh->b_blocknr;
+
+	/*
+	 * It is going to be referred to from the inode itself? OK, just put it
+	 * into the same cylinder group then.
+	 */
+	return ext4_inode_to_goal_block(inode);
+}
+
+/**
+ *	ext4_find_goal - find a preferred place for allocation.
+ *	@inode: owner
+ *	@block:  block we want
+ *	@partial: pointer to the last triple within a chain
+ *
+ *	Normally this function find the preferred place for block allocation,
+ *	returns it.
+ *	Because this is only used for non-extent files, we limit the block nr
+ *	to 32 bits.
+ */
+static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
+				   Indirect *partial)
+{
+	ext4_fsblk_t goal;
+
+	/*
+	 * XXX need to get goal block from mballoc's data structures
+	 */
+
+	goal = ext4_find_near(inode, partial);
+	goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
+	return goal;
+}
+
+/**
+ *	ext4_blks_to_allocate - Look up the block map and count the number
+ *	of direct blocks need to be allocated for the given branch.
+ *
+ *	@branch: chain of indirect blocks
+ *	@k: number of blocks need for indirect blocks
+ *	@blks: number of data blocks to be mapped.
+ *	@blocks_to_boundary:  the offset in the indirect block
+ *
+ *	return the total number of blocks to be allocate, including the
+ *	direct and indirect blocks.
+ */
+static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
+				 int blocks_to_boundary)
+{
+	unsigned int count = 0;
+
+	/*
+	 * Simple case, [t,d]Indirect block(s) has not allocated yet
+	 * then it's clear blocks on that path have not allocated
+	 */
+	if (k > 0) {
+		/* right now we don't handle cross boundary allocation */
+		if (blks < blocks_to_boundary + 1)
+			count += blks;
+		else
+			count += blocks_to_boundary + 1;
+		return count;
+	}
+
+	count++;
+	while (count < blks && count <= blocks_to_boundary &&
+		le32_to_cpu(*(branch[0].p + count)) == 0) {
+		count++;
+	}
+	return count;
+}
+
+/**
+ *	ext4_alloc_blocks: multiple allocate blocks needed for a branch
+ *	@handle: handle for this transaction
+ *	@inode: inode which needs allocated blocks
+ *	@iblock: the logical block to start allocated at
+ *	@goal: preferred physical block of allocation
+ *	@indirect_blks: the number of blocks need to allocate for indirect
+ *			blocks
+ *	@blks: number of desired blocks
+ *	@new_blocks: on return it will store the new block numbers for
+ *	the indirect blocks(if needed) and the first direct block,
+ *	@err: on return it will store the error code
+ *
+ *	This function will return the number of blocks allocated as
+ *	requested by the passed-in parameters.
+ */
+static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
+			     ext4_lblk_t iblock, ext4_fsblk_t goal,
+			     int indirect_blks, int blks,
+			     ext4_fsblk_t new_blocks[4], int *err)
+{
+	struct ext4_allocation_request ar;
+	int target, i;
+	unsigned long count = 0, blk_allocated = 0;
+	int index = 0;
+	ext4_fsblk_t current_block = 0;
+	int ret = 0;
+
+	/*
+	 * Here we try to allocate the requested multiple blocks at once,
+	 * on a best-effort basis.
+	 * To build a branch, we should allocate blocks for
+	 * the indirect blocks(if not allocated yet), and at least
+	 * the first direct block of this branch.  That's the
+	 * minimum number of blocks need to allocate(required)
+	 */
+	/* first we try to allocate the indirect blocks */
+	target = indirect_blks;
+	while (target > 0) {
+		count = target;
+		/* allocating blocks for indirect blocks and direct blocks */
+		current_block = ext4_new_meta_blocks(handle, inode, goal,
+						     0, &count, err);
+		if (*err)
+			goto failed_out;
+
+		if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) {
+			EXT4_ERROR_INODE(inode,
+					 "current_block %llu + count %lu > %d!",
+					 current_block, count,
+					 EXT4_MAX_BLOCK_FILE_PHYS);
+			*err = -EIO;
+			goto failed_out;
+		}
+
+		target -= count;
+		/* allocate blocks for indirect blocks */
+		while (index < indirect_blks && count) {
+			new_blocks[index++] = current_block++;
+			count--;
+		}
+		if (count > 0) {
+			/*
+			 * save the new block number
+			 * for the first direct block
+			 */
+			new_blocks[index] = current_block;
+			printk(KERN_INFO "%s returned more blocks than "
+						"requested\n", __func__);
+			WARN_ON(1);
+			break;
+		}
+	}
+
+	target = blks - count ;
+	blk_allocated = count;
+	if (!target)
+		goto allocated;
+	/* Now allocate data blocks */
+	memset(&ar, 0, sizeof(ar));
+	ar.inode = inode;
+	ar.goal = goal;
+	ar.len = target;
+	ar.logical = iblock;
+	if (S_ISREG(inode->i_mode))
+		/* enable in-core preallocation only for regular files */
+		ar.flags = EXT4_MB_HINT_DATA;
+
+	current_block = ext4_mb_new_blocks(handle, &ar, err);
+	if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) {
+		EXT4_ERROR_INODE(inode,
+				 "current_block %llu + ar.len %d > %d!",
+				 current_block, ar.len,
+				 EXT4_MAX_BLOCK_FILE_PHYS);
+		*err = -EIO;
+		goto failed_out;
+	}
+
+	if (*err && (target == blks)) {
+		/*
+		 * if the allocation failed and we didn't allocate
+		 * any blocks before
+		 */
+		goto failed_out;
+	}
+	if (!*err) {
+		if (target == blks) {
+			/*
+			 * save the new block number
+			 * for the first direct block
+			 */
+			new_blocks[index] = current_block;
+		}
+		blk_allocated += ar.len;
+	}
+allocated:
+	/* total number of blocks allocated for direct blocks */
+	ret = blk_allocated;
+	*err = 0;
+	return ret;
+failed_out:
+	for (i = 0; i < index; i++)
+		ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
+	return ret;
+}
+
+/**
+ *	ext4_alloc_branch - allocate and set up a chain of blocks.
+ *	@handle: handle for this transaction
+ *	@inode: owner
+ *	@indirect_blks: number of allocated indirect blocks
+ *	@blks: number of allocated direct blocks
+ *	@goal: preferred place for allocation
+ *	@offsets: offsets (in the blocks) to store the pointers to next.
+ *	@branch: place to store the chain in.
+ *
+ *	This function allocates blocks, zeroes out all but the last one,
+ *	links them into chain and (if we are synchronous) writes them to disk.
+ *	In other words, it prepares a branch that can be spliced onto the
+ *	inode. It stores the information about that chain in the branch[], in
+ *	the same format as ext4_get_branch() would do. We are calling it after
+ *	we had read the existing part of chain and partial points to the last
+ *	triple of that (one with zero ->key). Upon the exit we have the same
+ *	picture as after the successful ext4_get_block(), except that in one
+ *	place chain is disconnected - *branch->p is still zero (we did not
+ *	set the last link), but branch->key contains the number that should
+ *	be placed into *branch->p to fill that gap.
+ *
+ *	If allocation fails we free all blocks we've allocated (and forget
+ *	their buffer_heads) and return the error value the from failed
+ *	ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
+ *	as described above and return 0.
+ */
+static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
+			     ext4_lblk_t iblock, int indirect_blks,
+			     int *blks, ext4_fsblk_t goal,
+			     ext4_lblk_t *offsets, Indirect *branch)
+{
+	int blocksize = inode->i_sb->s_blocksize;
+	int i, n = 0;
+	int err = 0;
+	struct buffer_head *bh;
+	int num;
+	ext4_fsblk_t new_blocks[4];
+	ext4_fsblk_t current_block;
+
+	num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks,
+				*blks, new_blocks, &err);
+	if (err)
+		return err;
+
+	branch[0].key = cpu_to_le32(new_blocks[0]);
+	/*
+	 * metadata blocks and data blocks are allocated.
+	 */
+	for (n = 1; n <= indirect_blks;  n++) {
+		/*
+		 * Get buffer_head for parent block, zero it out
+		 * and set the pointer to new one, then send
+		 * parent to disk.
+		 */
+		bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+		if (unlikely(!bh)) {
+			err = -EIO;
+			goto failed;
+		}
+
+		branch[n].bh = bh;
+		lock_buffer(bh);
+		BUFFER_TRACE(bh, "call get_create_access");
+		err = ext4_journal_get_create_access(handle, bh);
+		if (err) {
+			/* Don't brelse(bh) here; it's done in
+			 * ext4_journal_forget() below */
+			unlock_buffer(bh);
+			goto failed;
+		}
+
+		memset(bh->b_data, 0, blocksize);
+		branch[n].p = (__le32 *) bh->b_data + offsets[n];
+		branch[n].key = cpu_to_le32(new_blocks[n]);
+		*branch[n].p = branch[n].key;
+		if (n == indirect_blks) {
+			current_block = new_blocks[n];
+			/*
+			 * End of chain, update the last new metablock of
+			 * the chain to point to the new allocated
+			 * data blocks numbers
+			 */
+			for (i = 1; i < num; i++)
+				*(branch[n].p + i) = cpu_to_le32(++current_block);
+		}
+		BUFFER_TRACE(bh, "marking uptodate");
+		set_buffer_uptodate(bh);
+		unlock_buffer(bh);
+
+		BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+		err = ext4_handle_dirty_metadata(handle, inode, bh);
+		if (err)
+			goto failed;
+	}
+	*blks = num;
+	return err;
+failed:
+	/* Allocation failed, free what we already allocated */
+	ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
+	for (i = 1; i <= n ; i++) {
+		/*
+		 * branch[i].bh is newly allocated, so there is no
+		 * need to revoke the block, which is why we don't
+		 * need to set EXT4_FREE_BLOCKS_METADATA.
+		 */
+		ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
+				 EXT4_FREE_BLOCKS_FORGET);
+	}
+	for (i = n+1; i < indirect_blks; i++)
+		ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
+
+	ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
+
+	return err;
+}
+
+/**
+ * ext4_splice_branch - splice the allocated branch onto inode.
+ * @handle: handle for this transaction
+ * @inode: owner
+ * @block: (logical) number of block we are adding
+ * @chain: chain of indirect blocks (with a missing link - see
+ *	ext4_alloc_branch)
+ * @where: location of missing link
+ * @num:   number of indirect blocks we are adding
+ * @blks:  number of direct blocks we are adding
+ *
+ * This function fills the missing link and does all housekeeping needed in
+ * inode (->i_blocks, etc.). In case of success we end up with the full
+ * chain to new block and return 0.
+ */
+static int ext4_splice_branch(handle_t *handle, struct inode *inode,
+			      ext4_lblk_t block, Indirect *where, int num,
+			      int blks)
+{
+	int i;
+	int err = 0;
+	ext4_fsblk_t current_block;
+
+	/*
+	 * If we're splicing into a [td]indirect block (as opposed to the
+	 * inode) then we need to get write access to the [td]indirect block
+	 * before the splice.
+	 */
+	if (where->bh) {
+		BUFFER_TRACE(where->bh, "get_write_access");
+		err = ext4_journal_get_write_access(handle, where->bh);
+		if (err)
+			goto err_out;
+	}
+	/* That's it */
+
+	*where->p = where->key;
+
+	/*
+	 * Update the host buffer_head or inode to point to more just allocated
+	 * direct blocks blocks
+	 */
+	if (num == 0 && blks > 1) {
+		current_block = le32_to_cpu(where->key) + 1;
+		for (i = 1; i < blks; i++)
+			*(where->p + i) = cpu_to_le32(current_block++);
+	}
+
+	/* We are done with atomic stuff, now do the rest of housekeeping */
+	/* had we spliced it onto indirect block? */
+	if (where->bh) {
+		/*
+		 * If we spliced it onto an indirect block, we haven't
+		 * altered the inode.  Note however that if it is being spliced
+		 * onto an indirect block at the very end of the file (the
+		 * file is growing) then we *will* alter the inode to reflect
+		 * the new i_size.  But that is not done here - it is done in
+		 * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
+		 */
+		jbd_debug(5, "splicing indirect only\n");
+		BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
+		err = ext4_handle_dirty_metadata(handle, inode, where->bh);
+		if (err)
+			goto err_out;
+	} else {
+		/*
+		 * OK, we spliced it into the inode itself on a direct block.
+		 */
+		ext4_mark_inode_dirty(handle, inode);
+		jbd_debug(5, "splicing direct\n");
+	}
+	return err;
+
+err_out:
+	for (i = 1; i <= num; i++) {
+		/*
+		 * branch[i].bh is newly allocated, so there is no
+		 * need to revoke the block, which is why we don't
+		 * need to set EXT4_FREE_BLOCKS_METADATA.
+		 */
+		ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
+				 EXT4_FREE_BLOCKS_FORGET);
+	}
+	ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
+			 blks, 0);
+
+	return err;
+}
+
+/*
+ * The ext4_ind_map_blocks() function handles non-extents inodes
+ * (i.e., using the traditional indirect/double-indirect i_blocks
+ * scheme) for ext4_map_blocks().
+ *
+ * Allocation strategy is simple: if we have to allocate something, we will
+ * have to go the whole way to leaf. So let's do it before attaching anything
+ * to tree, set linkage between the newborn blocks, write them if sync is
+ * required, recheck the path, free and repeat if check fails, otherwise
+ * set the last missing link (that will protect us from any truncate-generated
+ * removals - all blocks on the path are immune now) and possibly force the
+ * write on the parent block.
+ * That has a nice additional property: no special recovery from the failed
+ * allocations is needed - we simply release blocks and do not touch anything
+ * reachable from inode.
+ *
+ * `handle' can be NULL if create == 0.
+ *
+ * return > 0, # of blocks mapped or allocated.
+ * return = 0, if plain lookup failed.
+ * return < 0, error case.
+ *
+ * The ext4_ind_get_blocks() function should be called with
+ * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem
+ * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or
+ * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
+ * blocks.
+ */
+int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
+			struct ext4_map_blocks *map,
+			int flags)
+{
+	int err = -EIO;
+	ext4_lblk_t offsets[4];
+	Indirect chain[4];
+	Indirect *partial;
+	ext4_fsblk_t goal;
+	int indirect_blks;
+	int blocks_to_boundary = 0;
+	int depth;
+	int count = 0;
+	ext4_fsblk_t first_block = 0;
+
+	trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
+	J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
+	J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
+	depth = ext4_block_to_path(inode, map->m_lblk, offsets,
+				   &blocks_to_boundary);
+
+	if (depth == 0)
+		goto out;
+
+	partial = ext4_get_branch(inode, depth, offsets, chain, &err);
+
+	/* Simplest case - block found, no allocation needed */
+	if (!partial) {
+		first_block = le32_to_cpu(chain[depth - 1].key);
+		count++;
+		/*map more blocks*/
+		while (count < map->m_len && count <= blocks_to_boundary) {
+			ext4_fsblk_t blk;
+
+			blk = le32_to_cpu(*(chain[depth-1].p + count));
+
+			if (blk == first_block + count)
+				count++;
+			else
+				break;
+		}
+		goto got_it;
+	}
+
+	/* Next simple case - plain lookup or failed read of indirect block */
+	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
+		goto cleanup;
+
+	/*
+	 * Okay, we need to do block allocation.
+	*/
+	goal = ext4_find_goal(inode, map->m_lblk, partial);
+
+	/* the number of blocks need to allocate for [d,t]indirect blocks */
+	indirect_blks = (chain + depth) - partial - 1;
+
+	/*
+	 * Next look up the indirect map to count the totoal number of
+	 * direct blocks to allocate for this branch.
+	 */
+	count = ext4_blks_to_allocate(partial, indirect_blks,
+				      map->m_len, blocks_to_boundary);
+	/*
+	 * Block out ext4_truncate while we alter the tree
+	 */
+	err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
+				&count, goal,
+				offsets + (partial - chain), partial);
+
+	/*
+	 * The ext4_splice_branch call will free and forget any buffers
+	 * on the new chain if there is a failure, but that risks using
+	 * up transaction credits, especially for bitmaps where the
+	 * credits cannot be returned.  Can we handle this somehow?  We
+	 * may need to return -EAGAIN upwards in the worst case.  --sct
+	 */
+	if (!err)
+		err = ext4_splice_branch(handle, inode, map->m_lblk,
+					 partial, indirect_blks, count);
+	if (err)
+		goto cleanup;
+
+	map->m_flags |= EXT4_MAP_NEW;
+
+	ext4_update_inode_fsync_trans(handle, inode, 1);
+got_it:
+	map->m_flags |= EXT4_MAP_MAPPED;
+	map->m_pblk = le32_to_cpu(chain[depth-1].key);
+	map->m_len = count;
+	if (count > blocks_to_boundary)
+		map->m_flags |= EXT4_MAP_BOUNDARY;
+	err = count;
+	/* Clean up and exit */
+	partial = chain + depth - 1;	/* the whole chain */
+cleanup:
+	while (partial > chain) {
+		BUFFER_TRACE(partial->bh, "call brelse");
+		brelse(partial->bh);
+		partial--;
+	}
+out:
+	trace_ext4_ind_map_blocks_exit(inode, map->m_lblk,
+				map->m_pblk, map->m_len, err);
+	return err;
+}
+
+/*
+ * O_DIRECT for ext3 (or indirect map) based files
+ *
+ * If the O_DIRECT write will extend the file then add this inode to the
+ * orphan list.  So recovery will truncate it back to the original size
+ * if the machine crashes during the write.
+ *
+ * If the O_DIRECT write is intantiating holes inside i_size and the machine
+ * crashes then stale disk data _may_ be exposed inside the file. But current
+ * VFS code falls back into buffered path in that case so we are safe.
+ */
+ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
+			   const struct iovec *iov, loff_t offset,
+			   unsigned long nr_segs)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	handle_t *handle;
+	ssize_t ret;
+	int orphan = 0;
+	size_t count = iov_length(iov, nr_segs);
+	int retries = 0;
+
+	if (rw == WRITE) {
+		loff_t final_size = offset + count;
+
+		if (final_size > inode->i_size) {
+			/* Credits for sb + inode write */
+			handle = ext4_journal_start(inode, 2);
+			if (IS_ERR(handle)) {
+				ret = PTR_ERR(handle);
+				goto out;
+			}
+			ret = ext4_orphan_add(handle, inode);
+			if (ret) {
+				ext4_journal_stop(handle);
+				goto out;
+			}
+			orphan = 1;
+			ei->i_disksize = inode->i_size;
+			ext4_journal_stop(handle);
+		}
+	}
+
+retry:
+	if (rw == READ && ext4_should_dioread_nolock(inode))
+		ret = __blockdev_direct_IO(rw, iocb, inode,
+				 inode->i_sb->s_bdev, iov,
+				 offset, nr_segs,
+				 ext4_get_block, NULL, NULL, 0);
+	else {
+		ret = blockdev_direct_IO(rw, iocb, inode, iov,
+				 offset, nr_segs, ext4_get_block);
+
+		if (unlikely((rw & WRITE) && ret < 0)) {
+			loff_t isize = i_size_read(inode);
+			loff_t end = offset + iov_length(iov, nr_segs);
+
+			if (end > isize)
+				ext4_truncate_failed_write(inode);
+		}
+	}
+	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+		goto retry;
+
+	if (orphan) {
+		int err;
+
+		/* Credits for sb + inode write */
+		handle = ext4_journal_start(inode, 2);
+		if (IS_ERR(handle)) {
+			/* This is really bad luck. We've written the data
+			 * but cannot extend i_size. Bail out and pretend
+			 * the write failed... */
+			ret = PTR_ERR(handle);
+			if (inode->i_nlink)
+				ext4_orphan_del(NULL, inode);
+
+			goto out;
+		}
+		if (inode->i_nlink)
+			ext4_orphan_del(handle, inode);
+		if (ret > 0) {
+			loff_t end = offset + ret;
+			if (end > inode->i_size) {
+				ei->i_disksize = end;
+				i_size_write(inode, end);
+				/*
+				 * We're going to return a positive `ret'
+				 * here due to non-zero-length I/O, so there's
+				 * no way of reporting error returns from
+				 * ext4_mark_inode_dirty() to userspace.  So
+				 * ignore it.
+				 */
+				ext4_mark_inode_dirty(handle, inode);
+			}
+		}
+		err = ext4_journal_stop(handle);
+		if (ret == 0)
+			ret = err;
+	}
+out:
+	return ret;
+}
+
+/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate a new block at @lblocks for non extent file based file
+ */
+int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock)
+{
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1);
+	int blk_bits;
+
+	if (lblock < EXT4_NDIR_BLOCKS)
+		return 0;
+
+	lblock -= EXT4_NDIR_BLOCKS;
+
+	if (ei->i_da_metadata_calc_len &&
+	    (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
+		ei->i_da_metadata_calc_len++;
+		return 0;
+	}
+	ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
+	ei->i_da_metadata_calc_len = 1;
+	blk_bits = order_base_2(lblock);
+	return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
+}
+
+int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+{
+	int indirects;
+
+	/* if nrblocks are contiguous */
+	if (chunk) {
+		/*
+		 * With N contiguous data blocks, we need at most
+		 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
+		 * 2 dindirect blocks, and 1 tindirect block
+		 */
+		return DIV_ROUND_UP(nrblocks,
+				    EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
+	}
+	/*
+	 * if nrblocks are not contiguous, worse case, each block touch
+	 * a indirect block, and each indirect block touch a double indirect
+	 * block, plus a triple indirect block
+	 */
+	indirects = nrblocks * 2 + 1;
+	return indirects;
+}
+
+/*
+ * Truncate transactions can be complex and absolutely huge.  So we need to
+ * be able to restart the transaction at a conventient checkpoint to make
+ * sure we don't overflow the journal.
+ *
+ * start_transaction gets us a new handle for a truncate transaction,
+ * and extend_transaction tries to extend the existing one a bit.  If
+ * extend fails, we need to propagate the failure up and restart the
+ * transaction in the top-level truncate loop. --sct
+ */
+static handle_t *start_transaction(struct inode *inode)
+{
+	handle_t *result;
+
+	result = ext4_journal_start(inode, ext4_blocks_for_truncate(inode));
+	if (!IS_ERR(result))
+		return result;
+
+	ext4_std_error(inode->i_sb, PTR_ERR(result));
+	return result;
+}
+
+/*
+ * Try to extend this transaction for the purposes of truncation.
+ *
+ * Returns 0 if we managed to create more room.  If we can't create more
+ * room, and the transaction must be restarted we return 1.
+ */
+static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
+{
+	if (!ext4_handle_valid(handle))
+		return 0;
+	if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
+		return 0;
+	if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode)))
+		return 0;
+	return 1;
+}
+
+/*
+ * Probably it should be a library function... search for first non-zero word
+ * or memcmp with zero_page, whatever is better for particular architecture.
+ * Linus?
+ */
+static inline int all_zeroes(__le32 *p, __le32 *q)
+{
+	while (p < q)
+		if (*p++)
+			return 0;
+	return 1;
+}
+
+/**
+ *	ext4_find_shared - find the indirect blocks for partial truncation.
+ *	@inode:	  inode in question
+ *	@depth:	  depth of the affected branch
+ *	@offsets: offsets of pointers in that branch (see ext4_block_to_path)
+ *	@chain:	  place to store the pointers to partial indirect blocks
+ *	@top:	  place to the (detached) top of branch
+ *
+ *	This is a helper function used by ext4_truncate().
+ *
+ *	When we do truncate() we may have to clean the ends of several
+ *	indirect blocks but leave the blocks themselves alive. Block is
+ *	partially truncated if some data below the new i_size is referred
+ *	from it (and it is on the path to the first completely truncated
+ *	data block, indeed).  We have to free the top of that path along
+ *	with everything to the right of the path. Since no allocation
+ *	past the truncation point is possible until ext4_truncate()
+ *	finishes, we may safely do the latter, but top of branch may
+ *	require special attention - pageout below the truncation point
+ *	might try to populate it.
+ *
+ *	We atomically detach the top of branch from the tree, store the
+ *	block number of its root in *@top, pointers to buffer_heads of
+ *	partially truncated blocks - in @chain[].bh and pointers to
+ *	their last elements that should not be removed - in
+ *	@chain[].p. Return value is the pointer to last filled element
+ *	of @chain.
+ *
+ *	The work left to caller to do the actual freeing of subtrees:
+ *		a) free the subtree starting from *@top
+ *		b) free the subtrees whose roots are stored in
+ *			(@chain[i].p+1 .. end of @chain[i].bh->b_data)
+ *		c) free the subtrees growing from the inode past the @chain[0].
+ *			(no partially truncated stuff there).  */
+
+static Indirect *ext4_find_shared(struct inode *inode, int depth,
+				  ext4_lblk_t offsets[4], Indirect chain[4],
+				  __le32 *top)
+{
+	Indirect *partial, *p;
+	int k, err;
+
+	*top = 0;
+	/* Make k index the deepest non-null offset + 1 */
+	for (k = depth; k > 1 && !offsets[k-1]; k--)
+		;
+	partial = ext4_get_branch(inode, k, offsets, chain, &err);
+	/* Writer: pointers */
+	if (!partial)
+		partial = chain + k-1;
+	/*
+	 * If the branch acquired continuation since we've looked at it -
+	 * fine, it should all survive and (new) top doesn't belong to us.
+	 */
+	if (!partial->key && *partial->p)
+		/* Writer: end */
+		goto no_top;
+	for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--)
+		;
+	/*
+	 * OK, we've found the last block that must survive. The rest of our
+	 * branch should be detached before unlocking. However, if that rest
+	 * of branch is all ours and does not grow immediately from the inode
+	 * it's easier to cheat and just decrement partial->p.
+	 */
+	if (p == chain + k - 1 && p > chain) {
+		p->p--;
+	} else {
+		*top = *p->p;
+		/* Nope, don't do this in ext4.  Must leave the tree intact */
+#if 0
+		*p->p = 0;
+#endif
+	}
+	/* Writer: end */
+
+	while (partial > p) {
+		brelse(partial->bh);
+		partial--;
+	}
+no_top:
+	return partial;
+}
+
+/*
+ * Zero a number of block pointers in either an inode or an indirect block.
+ * If we restart the transaction we must again get write access to the
+ * indirect block for further modification.
+ *
+ * We release `count' blocks on disk, but (last - first) may be greater
+ * than `count' because there can be holes in there.
+ *
+ * Return 0 on success, 1 on invalid block range
+ * and < 0 on fatal error.
+ */
+static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
+			     struct buffer_head *bh,
+			     ext4_fsblk_t block_to_free,
+			     unsigned long count, __le32 *first,
+			     __le32 *last)
+{
+	__le32 *p;
+	int	flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED;
+	int	err;
+
+	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+		flags |= EXT4_FREE_BLOCKS_METADATA;
+
+	if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
+				   count)) {
+		EXT4_ERROR_INODE(inode, "attempt to clear invalid "
+				 "blocks %llu len %lu",
+				 (unsigned long long) block_to_free, count);
+		return 1;
+	}
+
+	if (try_to_extend_transaction(handle, inode)) {
+		if (bh) {
+			BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
+			err = ext4_handle_dirty_metadata(handle, inode, bh);
+			if (unlikely(err))
+				goto out_err;
+		}
+		err = ext4_mark_inode_dirty(handle, inode);
+		if (unlikely(err))
+			goto out_err;
+		err = ext4_truncate_restart_trans(handle, inode,
+					ext4_blocks_for_truncate(inode));
+		if (unlikely(err))
+			goto out_err;
+		if (bh) {
+			BUFFER_TRACE(bh, "retaking write access");
+			err = ext4_journal_get_write_access(handle, bh);
+			if (unlikely(err))
+				goto out_err;
+		}
+	}
+
+	for (p = first; p < last; p++)
+		*p = 0;
+
+	ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
+	return 0;
+out_err:
+	ext4_std_error(inode->i_sb, err);
+	return err;
+}
+
+/**
+ * ext4_free_data - free a list of data blocks
+ * @handle:	handle for this transaction
+ * @inode:	inode we are dealing with
+ * @this_bh:	indirect buffer_head which contains *@first and *@last
+ * @first:	array of block numbers
+ * @last:	points immediately past the end of array
+ *
+ * We are freeing all blocks referred from that array (numbers are stored as
+ * little-endian 32-bit) and updating @inode->i_blocks appropriately.
+ *
+ * We accumulate contiguous runs of blocks to free.  Conveniently, if these
+ * blocks are contiguous then releasing them at one time will only affect one
+ * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't
+ * actually use a lot of journal space.
+ *
+ * @this_bh will be %NULL if @first and @last point into the inode's direct
+ * block pointers.
+ */
+static void ext4_free_data(handle_t *handle, struct inode *inode,
+			   struct buffer_head *this_bh,
+			   __le32 *first, __le32 *last)
+{
+	ext4_fsblk_t block_to_free = 0;    /* Starting block # of a run */
+	unsigned long count = 0;	    /* Number of blocks in the run */
+	__le32 *block_to_free_p = NULL;	    /* Pointer into inode/ind
+					       corresponding to
+					       block_to_free */
+	ext4_fsblk_t nr;		    /* Current block # */
+	__le32 *p;			    /* Pointer into inode/ind
+					       for current block */
+	int err = 0;
+
+	if (this_bh) {				/* For indirect block */
+		BUFFER_TRACE(this_bh, "get_write_access");
+		err = ext4_journal_get_write_access(handle, this_bh);
+		/* Important: if we can't update the indirect pointers
+		 * to the blocks, we can't free them. */
+		if (err)
+			return;
+	}
+
+	for (p = first; p < last; p++) {
+		nr = le32_to_cpu(*p);
+		if (nr) {
+			/* accumulate blocks to free if they're contiguous */
+			if (count == 0) {
+				block_to_free = nr;
+				block_to_free_p = p;
+				count = 1;
+			} else if (nr == block_to_free + count) {
+				count++;
+			} else {
+				err = ext4_clear_blocks(handle, inode, this_bh,
+						        block_to_free, count,
+						        block_to_free_p, p);
+				if (err)
+					break;
+				block_to_free = nr;
+				block_to_free_p = p;
+				count = 1;
+			}
+		}
+	}
+
+	if (!err && count > 0)
+		err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
+					count, block_to_free_p, p);
+	if (err < 0)
+		/* fatal error */
+		return;
+
+	if (this_bh) {
+		BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
+
+		/*
+		 * The buffer head should have an attached journal head at this
+		 * point. However, if the data is corrupted and an indirect
+		 * block pointed to itself, it would have been detached when
+		 * the block was cleared. Check for this instead of OOPSing.
+		 */
+		if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
+			ext4_handle_dirty_metadata(handle, inode, this_bh);
+		else
+			EXT4_ERROR_INODE(inode,
+					 "circular indirect block detected at "
+					 "block %llu",
+				(unsigned long long) this_bh->b_blocknr);
+	}
+}
+
+/**
+ *	ext4_free_branches - free an array of branches
+ *	@handle: JBD handle for this transaction
+ *	@inode:	inode we are dealing with
+ *	@parent_bh: the buffer_head which contains *@first and *@last
+ *	@first:	array of block numbers
+ *	@last:	pointer immediately past the end of array
+ *	@depth:	depth of the branches to free
+ *
+ *	We are freeing all blocks referred from these branches (numbers are
+ *	stored as little-endian 32-bit) and updating @inode->i_blocks
+ *	appropriately.
+ */
+static void ext4_free_branches(handle_t *handle, struct inode *inode,
+			       struct buffer_head *parent_bh,
+			       __le32 *first, __le32 *last, int depth)
+{
+	ext4_fsblk_t nr;
+	__le32 *p;
+
+	if (ext4_handle_is_aborted(handle))
+		return;
+
+	if (depth--) {
+		struct buffer_head *bh;
+		int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+		p = last;
+		while (--p >= first) {
+			nr = le32_to_cpu(*p);
+			if (!nr)
+				continue;		/* A hole */
+
+			if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
+						   nr, 1)) {
+				EXT4_ERROR_INODE(inode,
+						 "invalid indirect mapped "
+						 "block %lu (level %d)",
+						 (unsigned long) nr, depth);
+				break;
+			}
+
+			/* Go read the buffer for the next level down */
+			bh = sb_bread(inode->i_sb, nr);
+
+			/*
+			 * A read failure? Report error and clear slot
+			 * (should be rare).
+			 */
+			if (!bh) {
+				EXT4_ERROR_INODE_BLOCK(inode, nr,
+						       "Read failure");
+				continue;
+			}
+
+			/* This zaps the entire block.  Bottom up. */
+			BUFFER_TRACE(bh, "free child branches");
+			ext4_free_branches(handle, inode, bh,
+					(__le32 *) bh->b_data,
+					(__le32 *) bh->b_data + addr_per_block,
+					depth);
+			brelse(bh);
+
+			/*
+			 * Everything below this this pointer has been
+			 * released.  Now let this top-of-subtree go.
+			 *
+			 * We want the freeing of this indirect block to be
+			 * atomic in the journal with the updating of the
+			 * bitmap block which owns it.  So make some room in
+			 * the journal.
+			 *
+			 * We zero the parent pointer *after* freeing its
+			 * pointee in the bitmaps, so if extend_transaction()
+			 * for some reason fails to put the bitmap changes and
+			 * the release into the same transaction, recovery
+			 * will merely complain about releasing a free block,
+			 * rather than leaking blocks.
+			 */
+			if (ext4_handle_is_aborted(handle))
+				return;
+			if (try_to_extend_transaction(handle, inode)) {
+				ext4_mark_inode_dirty(handle, inode);
+				ext4_truncate_restart_trans(handle, inode,
+					    ext4_blocks_for_truncate(inode));
+			}
+
+			/*
+			 * The forget flag here is critical because if
+			 * we are journaling (and not doing data
+			 * journaling), we have to make sure a revoke
+			 * record is written to prevent the journal
+			 * replay from overwriting the (former)
+			 * indirect block if it gets reallocated as a
+			 * data block.  This must happen in the same
+			 * transaction where the data blocks are
+			 * actually freed.
+			 */
+			ext4_free_blocks(handle, inode, NULL, nr, 1,
+					 EXT4_FREE_BLOCKS_METADATA|
+					 EXT4_FREE_BLOCKS_FORGET);
+
+			if (parent_bh) {
+				/*
+				 * The block which we have just freed is
+				 * pointed to by an indirect block: journal it
+				 */
+				BUFFER_TRACE(parent_bh, "get_write_access");
+				if (!ext4_journal_get_write_access(handle,
+								   parent_bh)){
+					*p = 0;
+					BUFFER_TRACE(parent_bh,
+					"call ext4_handle_dirty_metadata");
+					ext4_handle_dirty_metadata(handle,
+								   inode,
+								   parent_bh);
+				}
+			}
+		}
+	} else {
+		/* We have reached the bottom of the tree. */
+		BUFFER_TRACE(parent_bh, "free data blocks");
+		ext4_free_data(handle, inode, parent_bh, first, last);
+	}
+}
+
+void ext4_ind_truncate(struct inode *inode)
+{
+	handle_t *handle;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	__le32 *i_data = ei->i_data;
+	int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+	struct address_space *mapping = inode->i_mapping;
+	ext4_lblk_t offsets[4];
+	Indirect chain[4];
+	Indirect *partial;
+	__le32 nr = 0;
+	int n = 0;
+	ext4_lblk_t last_block, max_block;
+	unsigned blocksize = inode->i_sb->s_blocksize;
+
+	handle = start_transaction(inode);
+	if (IS_ERR(handle))
+		return;		/* AKPM: return what? */
+
+	last_block = (inode->i_size + blocksize-1)
+					>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
+	max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
+					>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
+
+	if (inode->i_size & (blocksize - 1))
+		if (ext4_block_truncate_page(handle, mapping, inode->i_size))
+			goto out_stop;
+
+	if (last_block != max_block) {
+		n = ext4_block_to_path(inode, last_block, offsets, NULL);
+		if (n == 0)
+			goto out_stop;	/* error */
+	}
+
+	/*
+	 * OK.  This truncate is going to happen.  We add the inode to the
+	 * orphan list, so that if this truncate spans multiple transactions,
+	 * and we crash, we will resume the truncate when the filesystem
+	 * recovers.  It also marks the inode dirty, to catch the new size.
+	 *
+	 * Implication: the file must always be in a sane, consistent
+	 * truncatable state while each transaction commits.
+	 */
+	if (ext4_orphan_add(handle, inode))
+		goto out_stop;
+
+	/*
+	 * From here we block out all ext4_get_block() callers who want to
+	 * modify the block allocation tree.
+	 */
+	down_write(&ei->i_data_sem);
+
+	ext4_discard_preallocations(inode);
+
+	/*
+	 * The orphan list entry will now protect us from any crash which
+	 * occurs before the truncate completes, so it is now safe to propagate
+	 * the new, shorter inode size (held for now in i_size) into the
+	 * on-disk inode. We do this via i_disksize, which is the value which
+	 * ext4 *really* writes onto the disk inode.
+	 */
+	ei->i_disksize = inode->i_size;
+
+	if (last_block == max_block) {
+		/*
+		 * It is unnecessary to free any data blocks if last_block is
+		 * equal to the indirect block limit.
+		 */
+		goto out_unlock;
+	} else if (n == 1) {		/* direct blocks */
+		ext4_free_data(handle, inode, NULL, i_data+offsets[0],
+			       i_data + EXT4_NDIR_BLOCKS);
+		goto do_indirects;
+	}
+
+	partial = ext4_find_shared(inode, n, offsets, chain, &nr);
+	/* Kill the top of shared branch (not detached) */
+	if (nr) {
+		if (partial == chain) {
+			/* Shared branch grows from the inode */
+			ext4_free_branches(handle, inode, NULL,
+					   &nr, &nr+1, (chain+n-1) - partial);
+			*partial->p = 0;
+			/*
+			 * We mark the inode dirty prior to restart,
+			 * and prior to stop.  No need for it here.
+			 */
+		} else {
+			/* Shared branch grows from an indirect block */
+			BUFFER_TRACE(partial->bh, "get_write_access");
+			ext4_free_branches(handle, inode, partial->bh,
+					partial->p,
+					partial->p+1, (chain+n-1) - partial);
+		}
+	}
+	/* Clear the ends of indirect blocks on the shared branch */
+	while (partial > chain) {
+		ext4_free_branches(handle, inode, partial->bh, partial->p + 1,
+				   (__le32*)partial->bh->b_data+addr_per_block,
+				   (chain+n-1) - partial);
+		BUFFER_TRACE(partial->bh, "call brelse");
+		brelse(partial->bh);
+		partial--;
+	}
+do_indirects:
+	/* Kill the remaining (whole) subtrees */
+	switch (offsets[0]) {
+	default:
+		nr = i_data[EXT4_IND_BLOCK];
+		if (nr) {
+			ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
+			i_data[EXT4_IND_BLOCK] = 0;
+		}
+	case EXT4_IND_BLOCK:
+		nr = i_data[EXT4_DIND_BLOCK];
+		if (nr) {
+			ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
+			i_data[EXT4_DIND_BLOCK] = 0;
+		}
+	case EXT4_DIND_BLOCK:
+		nr = i_data[EXT4_TIND_BLOCK];
+		if (nr) {
+			ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
+			i_data[EXT4_TIND_BLOCK] = 0;
+		}
+	case EXT4_TIND_BLOCK:
+		;
+	}
+
+out_unlock:
+	up_write(&ei->i_data_sem);
+	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+	ext4_mark_inode_dirty(handle, inode);
+
+	/*
+	 * In a multi-transaction truncate, we only make the final transaction
+	 * synchronous
+	 */
+	if (IS_SYNC(inode))
+		ext4_handle_sync(handle);
+out_stop:
+	/*
+	 * If this was a simple ftruncate(), and the file will remain alive
+	 * then we need to clear up the orphan record which we created above.
+	 * However, if this was a real unlink then we were called by
+	 * ext4_delete_inode(), and we allow that function to clean up the
+	 * orphan info for us.
+	 */
+	if (inode->i_nlink)
+		ext4_orphan_del(handle, inode);
+
+	ext4_journal_stop(handle);
+	trace_ext4_truncate_exit(inode);
+}
+

+ 37 - 1559
fs/ext4/inode.c

@@ -12,10 +12,6 @@
  *
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *
  *
- *  Goal-directed block allocation by Stephen Tweedie
- *	(sct@redhat.com), 1993, 1998
- *  Big-endian to little-endian byte-swapping/bitmaps by
- *        David S. Miller (davem@caip.rutgers.edu), 1995
  *  64-bit file support on 64-bit platforms by Jakub Jelinek
  *  64-bit file support on 64-bit platforms by Jakub Jelinek
  *	(jj@sunsite.ms.mff.cuni.cz)
  *	(jj@sunsite.ms.mff.cuni.cz)
  *
  *
@@ -47,6 +43,7 @@
 #include "xattr.h"
 #include "xattr.h"
 #include "acl.h"
 #include "acl.h"
 #include "ext4_extents.h"
 #include "ext4_extents.h"
+#include "truncate.h"
 
 
 #include <trace/events/ext4.h>
 #include <trace/events/ext4.h>
 
 
@@ -88,72 +85,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
 	return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
 	return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
 }
 }
 
 
-/*
- * Work out how many blocks we need to proceed with the next chunk of a
- * truncate transaction.
- */
-static unsigned long blocks_for_truncate(struct inode *inode)
-{
-	ext4_lblk_t needed;
-
-	needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);
-
-	/* Give ourselves just enough room to cope with inodes in which
-	 * i_blocks is corrupt: we've seen disk corruptions in the past
-	 * which resulted in random data in an inode which looked enough
-	 * like a regular file for ext4 to try to delete it.  Things
-	 * will go a bit crazy if that happens, but at least we should
-	 * try not to panic the whole kernel. */
-	if (needed < 2)
-		needed = 2;
-
-	/* But we need to bound the transaction so we don't overflow the
-	 * journal. */
-	if (needed > EXT4_MAX_TRANS_DATA)
-		needed = EXT4_MAX_TRANS_DATA;
-
-	return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
-}
-
-/*
- * Truncate transactions can be complex and absolutely huge.  So we need to
- * be able to restart the transaction at a conventient checkpoint to make
- * sure we don't overflow the journal.
- *
- * start_transaction gets us a new handle for a truncate transaction,
- * and extend_transaction tries to extend the existing one a bit.  If
- * extend fails, we need to propagate the failure up and restart the
- * transaction in the top-level truncate loop. --sct
- */
-static handle_t *start_transaction(struct inode *inode)
-{
-	handle_t *result;
-
-	result = ext4_journal_start(inode, blocks_for_truncate(inode));
-	if (!IS_ERR(result))
-		return result;
-
-	ext4_std_error(inode->i_sb, PTR_ERR(result));
-	return result;
-}
-
-/*
- * Try to extend this transaction for the purposes of truncation.
- *
- * Returns 0 if we managed to create more room.  If we can't create more
- * room, and the transaction must be restarted we return 1.
- */
-static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
-{
-	if (!ext4_handle_valid(handle))
-		return 0;
-	if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
-		return 0;
-	if (!ext4_journal_extend(handle, blocks_for_truncate(inode)))
-		return 0;
-	return 1;
-}
-
 /*
 /*
  * Restart the transaction associated with *handle.  This does a commit,
  * Restart the transaction associated with *handle.  This does a commit,
  * so before we call here everything must be consistently dirtied against
  * so before we call here everything must be consistently dirtied against
@@ -190,6 +121,33 @@ void ext4_evict_inode(struct inode *inode)
 
 
 	trace_ext4_evict_inode(inode);
 	trace_ext4_evict_inode(inode);
 	if (inode->i_nlink) {
 	if (inode->i_nlink) {
+		/*
+		 * When journalling data dirty buffers are tracked only in the
+		 * journal. So although mm thinks everything is clean and
+		 * ready for reaping the inode might still have some pages to
+		 * write in the running transaction or waiting to be
+		 * checkpointed. Thus calling jbd2_journal_invalidatepage()
+		 * (via truncate_inode_pages()) to discard these buffers can
+		 * cause data loss. Also even if we did not discard these
+		 * buffers, we would have no way to find them after the inode
+		 * is reaped and thus user could see stale data if he tries to
+		 * read them before the transaction is checkpointed. So be
+		 * careful and force everything to disk here... We use
+		 * ei->i_datasync_tid to store the newest transaction
+		 * containing inode's data.
+		 *
+		 * Note that directories do not have this problem because they
+		 * don't use page cache.
+		 */
+		if (ext4_should_journal_data(inode) &&
+		    (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+			journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+			tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
+
+			jbd2_log_start_commit(journal, commit_tid);
+			jbd2_log_wait_commit(journal, commit_tid);
+			filemap_write_and_wait(&inode->i_data);
+		}
 		truncate_inode_pages(&inode->i_data, 0);
 		truncate_inode_pages(&inode->i_data, 0);
 		goto no_delete;
 		goto no_delete;
 	}
 	}
@@ -204,7 +162,7 @@ void ext4_evict_inode(struct inode *inode)
 	if (is_bad_inode(inode))
 	if (is_bad_inode(inode))
 		goto no_delete;
 		goto no_delete;
 
 
-	handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3);
+	handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3);
 	if (IS_ERR(handle)) {
 	if (IS_ERR(handle)) {
 		ext4_std_error(inode->i_sb, PTR_ERR(handle));
 		ext4_std_error(inode->i_sb, PTR_ERR(handle));
 		/*
 		/*
@@ -277,793 +235,6 @@ no_delete:
 	ext4_clear_inode(inode);	/* We must guarantee clearing of inode... */
 	ext4_clear_inode(inode);	/* We must guarantee clearing of inode... */
 }
 }
 
 
-typedef struct {
-	__le32	*p;
-	__le32	key;
-	struct buffer_head *bh;
-} Indirect;
-
-static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
-{
-	p->key = *(p->p = v);
-	p->bh = bh;
-}
-
-/**
- *	ext4_block_to_path - parse the block number into array of offsets
- *	@inode: inode in question (we are only interested in its superblock)
- *	@i_block: block number to be parsed
- *	@offsets: array to store the offsets in
- *	@boundary: set this non-zero if the referred-to block is likely to be
- *	       followed (on disk) by an indirect block.
- *
- *	To store the locations of file's data ext4 uses a data structure common
- *	for UNIX filesystems - tree of pointers anchored in the inode, with
- *	data blocks at leaves and indirect blocks in intermediate nodes.
- *	This function translates the block number into path in that tree -
- *	return value is the path length and @offsets[n] is the offset of
- *	pointer to (n+1)th node in the nth one. If @block is out of range
- *	(negative or too large) warning is printed and zero returned.
- *
- *	Note: function doesn't find node addresses, so no IO is needed. All
- *	we need to know is the capacity of indirect blocks (taken from the
- *	inode->i_sb).
- */
-
-/*
- * Portability note: the last comparison (check that we fit into triple
- * indirect block) is spelled differently, because otherwise on an
- * architecture with 32-bit longs and 8Kb pages we might get into trouble
- * if our filesystem had 8Kb blocks. We might use long long, but that would
- * kill us on x86. Oh, well, at least the sign propagation does not matter -
- * i_block would have to be negative in the very beginning, so we would not
- * get there at all.
- */
-
-static int ext4_block_to_path(struct inode *inode,
-			      ext4_lblk_t i_block,
-			      ext4_lblk_t offsets[4], int *boundary)
-{
-	int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
-	int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
-	const long direct_blocks = EXT4_NDIR_BLOCKS,
-		indirect_blocks = ptrs,
-		double_blocks = (1 << (ptrs_bits * 2));
-	int n = 0;
-	int final = 0;
-
-	if (i_block < direct_blocks) {
-		offsets[n++] = i_block;
-		final = direct_blocks;
-	} else if ((i_block -= direct_blocks) < indirect_blocks) {
-		offsets[n++] = EXT4_IND_BLOCK;
-		offsets[n++] = i_block;
-		final = ptrs;
-	} else if ((i_block -= indirect_blocks) < double_blocks) {
-		offsets[n++] = EXT4_DIND_BLOCK;
-		offsets[n++] = i_block >> ptrs_bits;
-		offsets[n++] = i_block & (ptrs - 1);
-		final = ptrs;
-	} else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
-		offsets[n++] = EXT4_TIND_BLOCK;
-		offsets[n++] = i_block >> (ptrs_bits * 2);
-		offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
-		offsets[n++] = i_block & (ptrs - 1);
-		final = ptrs;
-	} else {
-		ext4_warning(inode->i_sb, "block %lu > max in inode %lu",
-			     i_block + direct_blocks +
-			     indirect_blocks + double_blocks, inode->i_ino);
-	}
-	if (boundary)
-		*boundary = final - 1 - (i_block & (ptrs - 1));
-	return n;
-}
-
-static int __ext4_check_blockref(const char *function, unsigned int line,
-				 struct inode *inode,
-				 __le32 *p, unsigned int max)
-{
-	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
-	__le32 *bref = p;
-	unsigned int blk;
-
-	while (bref < p+max) {
-		blk = le32_to_cpu(*bref++);
-		if (blk &&
-		    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
-						    blk, 1))) {
-			es->s_last_error_block = cpu_to_le64(blk);
-			ext4_error_inode(inode, function, line, blk,
-					 "invalid block");
-			return -EIO;
-		}
-	}
-	return 0;
-}
-
-
-#define ext4_check_indirect_blockref(inode, bh)                         \
-	__ext4_check_blockref(__func__, __LINE__, inode,		\
-			      (__le32 *)(bh)->b_data,			\
-			      EXT4_ADDR_PER_BLOCK((inode)->i_sb))
-
-#define ext4_check_inode_blockref(inode)                                \
-	__ext4_check_blockref(__func__, __LINE__, inode,		\
-			      EXT4_I(inode)->i_data,			\
-			      EXT4_NDIR_BLOCKS)
-
-/**
- *	ext4_get_branch - read the chain of indirect blocks leading to data
- *	@inode: inode in question
- *	@depth: depth of the chain (1 - direct pointer, etc.)
- *	@offsets: offsets of pointers in inode/indirect blocks
- *	@chain: place to store the result
- *	@err: here we store the error value
- *
- *	Function fills the array of triples <key, p, bh> and returns %NULL
- *	if everything went OK or the pointer to the last filled triple
- *	(incomplete one) otherwise. Upon the return chain[i].key contains
- *	the number of (i+1)-th block in the chain (as it is stored in memory,
- *	i.e. little-endian 32-bit), chain[i].p contains the address of that
- *	number (it points into struct inode for i==0 and into the bh->b_data
- *	for i>0) and chain[i].bh points to the buffer_head of i-th indirect
- *	block for i>0 and NULL for i==0. In other words, it holds the block
- *	numbers of the chain, addresses they were taken from (and where we can
- *	verify that chain did not change) and buffer_heads hosting these
- *	numbers.
- *
- *	Function stops when it stumbles upon zero pointer (absent block)
- *		(pointer to last triple returned, *@err == 0)
- *	or when it gets an IO error reading an indirect block
- *		(ditto, *@err == -EIO)
- *	or when it reads all @depth-1 indirect blocks successfully and finds
- *	the whole chain, all way to the data (returns %NULL, *err == 0).
- *
- *      Need to be called with
- *      down_read(&EXT4_I(inode)->i_data_sem)
- */
-static Indirect *ext4_get_branch(struct inode *inode, int depth,
-				 ext4_lblk_t  *offsets,
-				 Indirect chain[4], int *err)
-{
-	struct super_block *sb = inode->i_sb;
-	Indirect *p = chain;
-	struct buffer_head *bh;
-
-	*err = 0;
-	/* i_data is not going away, no lock needed */
-	add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets);
-	if (!p->key)
-		goto no_block;
-	while (--depth) {
-		bh = sb_getblk(sb, le32_to_cpu(p->key));
-		if (unlikely(!bh))
-			goto failure;
-
-		if (!bh_uptodate_or_lock(bh)) {
-			if (bh_submit_read(bh) < 0) {
-				put_bh(bh);
-				goto failure;
-			}
-			/* validate block references */
-			if (ext4_check_indirect_blockref(inode, bh)) {
-				put_bh(bh);
-				goto failure;
-			}
-		}
-
-		add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets);
-		/* Reader: end */
-		if (!p->key)
-			goto no_block;
-	}
-	return NULL;
-
-failure:
-	*err = -EIO;
-no_block:
-	return p;
-}
-
-/**
- *	ext4_find_near - find a place for allocation with sufficient locality
- *	@inode: owner
- *	@ind: descriptor of indirect block.
- *
- *	This function returns the preferred place for block allocation.
- *	It is used when heuristic for sequential allocation fails.
- *	Rules are:
- *	  + if there is a block to the left of our position - allocate near it.
- *	  + if pointer will live in indirect block - allocate near that block.
- *	  + if pointer will live in inode - allocate in the same
- *	    cylinder group.
- *
- * In the latter case we colour the starting block by the callers PID to
- * prevent it from clashing with concurrent allocations for a different inode
- * in the same block group.   The PID is used here so that functionally related
- * files will be close-by on-disk.
- *
- *	Caller must make sure that @ind is valid and will stay that way.
- */
-static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
-{
-	struct ext4_inode_info *ei = EXT4_I(inode);
-	__le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data;
-	__le32 *p;
-	ext4_fsblk_t bg_start;
-	ext4_fsblk_t last_block;
-	ext4_grpblk_t colour;
-	ext4_group_t block_group;
-	int flex_size = ext4_flex_bg_size(EXT4_SB(inode->i_sb));
-
-	/* Try to find previous block */
-	for (p = ind->p - 1; p >= start; p--) {
-		if (*p)
-			return le32_to_cpu(*p);
-	}
-
-	/* No such thing, so let's try location of indirect block */
-	if (ind->bh)
-		return ind->bh->b_blocknr;
-
-	/*
-	 * It is going to be referred to from the inode itself? OK, just put it
-	 * into the same cylinder group then.
-	 */
-	block_group = ei->i_block_group;
-	if (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) {
-		block_group &= ~(flex_size-1);
-		if (S_ISREG(inode->i_mode))
-			block_group++;
-	}
-	bg_start = ext4_group_first_block_no(inode->i_sb, block_group);
-	last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
-
-	/*
-	 * If we are doing delayed allocation, we don't need take
-	 * colour into account.
-	 */
-	if (test_opt(inode->i_sb, DELALLOC))
-		return bg_start;
-
-	if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
-		colour = (current->pid % 16) *
-			(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
-	else
-		colour = (current->pid % 16) * ((last_block - bg_start) / 16);
-	return bg_start + colour;
-}
-
-/**
- *	ext4_find_goal - find a preferred place for allocation.
- *	@inode: owner
- *	@block:  block we want
- *	@partial: pointer to the last triple within a chain
- *
- *	Normally this function find the preferred place for block allocation,
- *	returns it.
- *	Because this is only used for non-extent files, we limit the block nr
- *	to 32 bits.
- */
-static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
-				   Indirect *partial)
-{
-	ext4_fsblk_t goal;
-
-	/*
-	 * XXX need to get goal block from mballoc's data structures
-	 */
-
-	goal = ext4_find_near(inode, partial);
-	goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
-	return goal;
-}
-
-/**
- *	ext4_blks_to_allocate - Look up the block map and count the number
- *	of direct blocks need to be allocated for the given branch.
- *
- *	@branch: chain of indirect blocks
- *	@k: number of blocks need for indirect blocks
- *	@blks: number of data blocks to be mapped.
- *	@blocks_to_boundary:  the offset in the indirect block
- *
- *	return the total number of blocks to be allocate, including the
- *	direct and indirect blocks.
- */
-static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
-				 int blocks_to_boundary)
-{
-	unsigned int count = 0;
-
-	/*
-	 * Simple case, [t,d]Indirect block(s) has not allocated yet
-	 * then it's clear blocks on that path have not allocated
-	 */
-	if (k > 0) {
-		/* right now we don't handle cross boundary allocation */
-		if (blks < blocks_to_boundary + 1)
-			count += blks;
-		else
-			count += blocks_to_boundary + 1;
-		return count;
-	}
-
-	count++;
-	while (count < blks && count <= blocks_to_boundary &&
-		le32_to_cpu(*(branch[0].p + count)) == 0) {
-		count++;
-	}
-	return count;
-}
-
-/**
- *	ext4_alloc_blocks: multiple allocate blocks needed for a branch
- *	@handle: handle for this transaction
- *	@inode: inode which needs allocated blocks
- *	@iblock: the logical block to start allocated at
- *	@goal: preferred physical block of allocation
- *	@indirect_blks: the number of blocks need to allocate for indirect
- *			blocks
- *	@blks: number of desired blocks
- *	@new_blocks: on return it will store the new block numbers for
- *	the indirect blocks(if needed) and the first direct block,
- *	@err: on return it will store the error code
- *
- *	This function will return the number of blocks allocated as
- *	requested by the passed-in parameters.
- */
-static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
-			     ext4_lblk_t iblock, ext4_fsblk_t goal,
-			     int indirect_blks, int blks,
-			     ext4_fsblk_t new_blocks[4], int *err)
-{
-	struct ext4_allocation_request ar;
-	int target, i;
-	unsigned long count = 0, blk_allocated = 0;
-	int index = 0;
-	ext4_fsblk_t current_block = 0;
-	int ret = 0;
-
-	/*
-	 * Here we try to allocate the requested multiple blocks at once,
-	 * on a best-effort basis.
-	 * To build a branch, we should allocate blocks for
-	 * the indirect blocks(if not allocated yet), and at least
-	 * the first direct block of this branch.  That's the
-	 * minimum number of blocks need to allocate(required)
-	 */
-	/* first we try to allocate the indirect blocks */
-	target = indirect_blks;
-	while (target > 0) {
-		count = target;
-		/* allocating blocks for indirect blocks and direct blocks */
-		current_block = ext4_new_meta_blocks(handle, inode, goal,
-						     0, &count, err);
-		if (*err)
-			goto failed_out;
-
-		if (unlikely(current_block + count > EXT4_MAX_BLOCK_FILE_PHYS)) {
-			EXT4_ERROR_INODE(inode,
-					 "current_block %llu + count %lu > %d!",
-					 current_block, count,
-					 EXT4_MAX_BLOCK_FILE_PHYS);
-			*err = -EIO;
-			goto failed_out;
-		}
-
-		target -= count;
-		/* allocate blocks for indirect blocks */
-		while (index < indirect_blks && count) {
-			new_blocks[index++] = current_block++;
-			count--;
-		}
-		if (count > 0) {
-			/*
-			 * save the new block number
-			 * for the first direct block
-			 */
-			new_blocks[index] = current_block;
-			printk(KERN_INFO "%s returned more blocks than "
-						"requested\n", __func__);
-			WARN_ON(1);
-			break;
-		}
-	}
-
-	target = blks - count ;
-	blk_allocated = count;
-	if (!target)
-		goto allocated;
-	/* Now allocate data blocks */
-	memset(&ar, 0, sizeof(ar));
-	ar.inode = inode;
-	ar.goal = goal;
-	ar.len = target;
-	ar.logical = iblock;
-	if (S_ISREG(inode->i_mode))
-		/* enable in-core preallocation only for regular files */
-		ar.flags = EXT4_MB_HINT_DATA;
-
-	current_block = ext4_mb_new_blocks(handle, &ar, err);
-	if (unlikely(current_block + ar.len > EXT4_MAX_BLOCK_FILE_PHYS)) {
-		EXT4_ERROR_INODE(inode,
-				 "current_block %llu + ar.len %d > %d!",
-				 current_block, ar.len,
-				 EXT4_MAX_BLOCK_FILE_PHYS);
-		*err = -EIO;
-		goto failed_out;
-	}
-
-	if (*err && (target == blks)) {
-		/*
-		 * if the allocation failed and we didn't allocate
-		 * any blocks before
-		 */
-		goto failed_out;
-	}
-	if (!*err) {
-		if (target == blks) {
-			/*
-			 * save the new block number
-			 * for the first direct block
-			 */
-			new_blocks[index] = current_block;
-		}
-		blk_allocated += ar.len;
-	}
-allocated:
-	/* total number of blocks allocated for direct blocks */
-	ret = blk_allocated;
-	*err = 0;
-	return ret;
-failed_out:
-	for (i = 0; i < index; i++)
-		ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
-	return ret;
-}
-
-/**
- *	ext4_alloc_branch - allocate and set up a chain of blocks.
- *	@handle: handle for this transaction
- *	@inode: owner
- *	@indirect_blks: number of allocated indirect blocks
- *	@blks: number of allocated direct blocks
- *	@goal: preferred place for allocation
- *	@offsets: offsets (in the blocks) to store the pointers to next.
- *	@branch: place to store the chain in.
- *
- *	This function allocates blocks, zeroes out all but the last one,
- *	links them into chain and (if we are synchronous) writes them to disk.
- *	In other words, it prepares a branch that can be spliced onto the
- *	inode. It stores the information about that chain in the branch[], in
- *	the same format as ext4_get_branch() would do. We are calling it after
- *	we had read the existing part of chain and partial points to the last
- *	triple of that (one with zero ->key). Upon the exit we have the same
- *	picture as after the successful ext4_get_block(), except that in one
- *	place chain is disconnected - *branch->p is still zero (we did not
- *	set the last link), but branch->key contains the number that should
- *	be placed into *branch->p to fill that gap.
- *
- *	If allocation fails we free all blocks we've allocated (and forget
- *	their buffer_heads) and return the error value the from failed
- *	ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
- *	as described above and return 0.
- */
-static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
-			     ext4_lblk_t iblock, int indirect_blks,
-			     int *blks, ext4_fsblk_t goal,
-			     ext4_lblk_t *offsets, Indirect *branch)
-{
-	int blocksize = inode->i_sb->s_blocksize;
-	int i, n = 0;
-	int err = 0;
-	struct buffer_head *bh;
-	int num;
-	ext4_fsblk_t new_blocks[4];
-	ext4_fsblk_t current_block;
-
-	num = ext4_alloc_blocks(handle, inode, iblock, goal, indirect_blks,
-				*blks, new_blocks, &err);
-	if (err)
-		return err;
-
-	branch[0].key = cpu_to_le32(new_blocks[0]);
-	/*
-	 * metadata blocks and data blocks are allocated.
-	 */
-	for (n = 1; n <= indirect_blks;  n++) {
-		/*
-		 * Get buffer_head for parent block, zero it out
-		 * and set the pointer to new one, then send
-		 * parent to disk.
-		 */
-		bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
-		if (unlikely(!bh)) {
-			err = -EIO;
-			goto failed;
-		}
-
-		branch[n].bh = bh;
-		lock_buffer(bh);
-		BUFFER_TRACE(bh, "call get_create_access");
-		err = ext4_journal_get_create_access(handle, bh);
-		if (err) {
-			/* Don't brelse(bh) here; it's done in
-			 * ext4_journal_forget() below */
-			unlock_buffer(bh);
-			goto failed;
-		}
-
-		memset(bh->b_data, 0, blocksize);
-		branch[n].p = (__le32 *) bh->b_data + offsets[n];
-		branch[n].key = cpu_to_le32(new_blocks[n]);
-		*branch[n].p = branch[n].key;
-		if (n == indirect_blks) {
-			current_block = new_blocks[n];
-			/*
-			 * End of chain, update the last new metablock of
-			 * the chain to point to the new allocated
-			 * data blocks numbers
-			 */
-			for (i = 1; i < num; i++)
-				*(branch[n].p + i) = cpu_to_le32(++current_block);
-		}
-		BUFFER_TRACE(bh, "marking uptodate");
-		set_buffer_uptodate(bh);
-		unlock_buffer(bh);
-
-		BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
-		err = ext4_handle_dirty_metadata(handle, inode, bh);
-		if (err)
-			goto failed;
-	}
-	*blks = num;
-	return err;
-failed:
-	/* Allocation failed, free what we already allocated */
-	ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
-	for (i = 1; i <= n ; i++) {
-		/*
-		 * branch[i].bh is newly allocated, so there is no
-		 * need to revoke the block, which is why we don't
-		 * need to set EXT4_FREE_BLOCKS_METADATA.
-		 */
-		ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
-				 EXT4_FREE_BLOCKS_FORGET);
-	}
-	for (i = n+1; i < indirect_blks; i++)
-		ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
-
-	ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
-
-	return err;
-}
-
-/**
- * ext4_splice_branch - splice the allocated branch onto inode.
- * @handle: handle for this transaction
- * @inode: owner
- * @block: (logical) number of block we are adding
- * @chain: chain of indirect blocks (with a missing link - see
- *	ext4_alloc_branch)
- * @where: location of missing link
- * @num:   number of indirect blocks we are adding
- * @blks:  number of direct blocks we are adding
- *
- * This function fills the missing link and does all housekeeping needed in
- * inode (->i_blocks, etc.). In case of success we end up with the full
- * chain to new block and return 0.
- */
-static int ext4_splice_branch(handle_t *handle, struct inode *inode,
-			      ext4_lblk_t block, Indirect *where, int num,
-			      int blks)
-{
-	int i;
-	int err = 0;
-	ext4_fsblk_t current_block;
-
-	/*
-	 * If we're splicing into a [td]indirect block (as opposed to the
-	 * inode) then we need to get write access to the [td]indirect block
-	 * before the splice.
-	 */
-	if (where->bh) {
-		BUFFER_TRACE(where->bh, "get_write_access");
-		err = ext4_journal_get_write_access(handle, where->bh);
-		if (err)
-			goto err_out;
-	}
-	/* That's it */
-
-	*where->p = where->key;
-
-	/*
-	 * Update the host buffer_head or inode to point to more just allocated
-	 * direct blocks blocks
-	 */
-	if (num == 0 && blks > 1) {
-		current_block = le32_to_cpu(where->key) + 1;
-		for (i = 1; i < blks; i++)
-			*(where->p + i) = cpu_to_le32(current_block++);
-	}
-
-	/* We are done with atomic stuff, now do the rest of housekeeping */
-	/* had we spliced it onto indirect block? */
-	if (where->bh) {
-		/*
-		 * If we spliced it onto an indirect block, we haven't
-		 * altered the inode.  Note however that if it is being spliced
-		 * onto an indirect block at the very end of the file (the
-		 * file is growing) then we *will* alter the inode to reflect
-		 * the new i_size.  But that is not done here - it is done in
-		 * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
-		 */
-		jbd_debug(5, "splicing indirect only\n");
-		BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
-		err = ext4_handle_dirty_metadata(handle, inode, where->bh);
-		if (err)
-			goto err_out;
-	} else {
-		/*
-		 * OK, we spliced it into the inode itself on a direct block.
-		 */
-		ext4_mark_inode_dirty(handle, inode);
-		jbd_debug(5, "splicing direct\n");
-	}
-	return err;
-
-err_out:
-	for (i = 1; i <= num; i++) {
-		/*
-		 * branch[i].bh is newly allocated, so there is no
-		 * need to revoke the block, which is why we don't
-		 * need to set EXT4_FREE_BLOCKS_METADATA.
-		 */
-		ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
-				 EXT4_FREE_BLOCKS_FORGET);
-	}
-	ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
-			 blks, 0);
-
-	return err;
-}
-
-/*
- * The ext4_ind_map_blocks() function handles non-extents inodes
- * (i.e., using the traditional indirect/double-indirect i_blocks
- * scheme) for ext4_map_blocks().
- *
- * Allocation strategy is simple: if we have to allocate something, we will
- * have to go the whole way to leaf. So let's do it before attaching anything
- * to tree, set linkage between the newborn blocks, write them if sync is
- * required, recheck the path, free and repeat if check fails, otherwise
- * set the last missing link (that will protect us from any truncate-generated
- * removals - all blocks on the path are immune now) and possibly force the
- * write on the parent block.
- * That has a nice additional property: no special recovery from the failed
- * allocations is needed - we simply release blocks and do not touch anything
- * reachable from inode.
- *
- * `handle' can be NULL if create == 0.
- *
- * return > 0, # of blocks mapped or allocated.
- * return = 0, if plain lookup failed.
- * return < 0, error case.
- *
- * The ext4_ind_get_blocks() function should be called with
- * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem
- * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or
- * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
- * blocks.
- */
-static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
-			       struct ext4_map_blocks *map,
-			       int flags)
-{
-	int err = -EIO;
-	ext4_lblk_t offsets[4];
-	Indirect chain[4];
-	Indirect *partial;
-	ext4_fsblk_t goal;
-	int indirect_blks;
-	int blocks_to_boundary = 0;
-	int depth;
-	int count = 0;
-	ext4_fsblk_t first_block = 0;
-
-	trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
-	J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
-	J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
-	depth = ext4_block_to_path(inode, map->m_lblk, offsets,
-				   &blocks_to_boundary);
-
-	if (depth == 0)
-		goto out;
-
-	partial = ext4_get_branch(inode, depth, offsets, chain, &err);
-
-	/* Simplest case - block found, no allocation needed */
-	if (!partial) {
-		first_block = le32_to_cpu(chain[depth - 1].key);
-		count++;
-		/*map more blocks*/
-		while (count < map->m_len && count <= blocks_to_boundary) {
-			ext4_fsblk_t blk;
-
-			blk = le32_to_cpu(*(chain[depth-1].p + count));
-
-			if (blk == first_block + count)
-				count++;
-			else
-				break;
-		}
-		goto got_it;
-	}
-
-	/* Next simple case - plain lookup or failed read of indirect block */
-	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
-		goto cleanup;
-
-	/*
-	 * Okay, we need to do block allocation.
-	*/
-	goal = ext4_find_goal(inode, map->m_lblk, partial);
-
-	/* the number of blocks need to allocate for [d,t]indirect blocks */
-	indirect_blks = (chain + depth) - partial - 1;
-
-	/*
-	 * Next look up the indirect map to count the totoal number of
-	 * direct blocks to allocate for this branch.
-	 */
-	count = ext4_blks_to_allocate(partial, indirect_blks,
-				      map->m_len, blocks_to_boundary);
-	/*
-	 * Block out ext4_truncate while we alter the tree
-	 */
-	err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks,
-				&count, goal,
-				offsets + (partial - chain), partial);
-
-	/*
-	 * The ext4_splice_branch call will free and forget any buffers
-	 * on the new chain if there is a failure, but that risks using
-	 * up transaction credits, especially for bitmaps where the
-	 * credits cannot be returned.  Can we handle this somehow?  We
-	 * may need to return -EAGAIN upwards in the worst case.  --sct
-	 */
-	if (!err)
-		err = ext4_splice_branch(handle, inode, map->m_lblk,
-					 partial, indirect_blks, count);
-	if (err)
-		goto cleanup;
-
-	map->m_flags |= EXT4_MAP_NEW;
-
-	ext4_update_inode_fsync_trans(handle, inode, 1);
-got_it:
-	map->m_flags |= EXT4_MAP_MAPPED;
-	map->m_pblk = le32_to_cpu(chain[depth-1].key);
-	map->m_len = count;
-	if (count > blocks_to_boundary)
-		map->m_flags |= EXT4_MAP_BOUNDARY;
-	err = count;
-	/* Clean up and exit */
-	partial = chain + depth - 1;	/* the whole chain */
-cleanup:
-	while (partial > chain) {
-		BUFFER_TRACE(partial->bh, "call brelse");
-		brelse(partial->bh);
-		partial--;
-	}
-out:
-	trace_ext4_ind_map_blocks_exit(inode, map->m_lblk,
-				map->m_pblk, map->m_len, err);
-	return err;
-}
-
 #ifdef CONFIG_QUOTA
 #ifdef CONFIG_QUOTA
 qsize_t *ext4_get_reserved_space(struct inode *inode)
 qsize_t *ext4_get_reserved_space(struct inode *inode)
 {
 {
@@ -1071,33 +242,6 @@ qsize_t *ext4_get_reserved_space(struct inode *inode)
 }
 }
 #endif
 #endif
 
 
-/*
- * Calculate the number of metadata blocks need to reserve
- * to allocate a new block at @lblocks for non extent file based file
- */
-static int ext4_indirect_calc_metadata_amount(struct inode *inode,
-					      sector_t lblock)
-{
-	struct ext4_inode_info *ei = EXT4_I(inode);
-	sector_t dind_mask = ~((sector_t)EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1);
-	int blk_bits;
-
-	if (lblock < EXT4_NDIR_BLOCKS)
-		return 0;
-
-	lblock -= EXT4_NDIR_BLOCKS;
-
-	if (ei->i_da_metadata_calc_len &&
-	    (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
-		ei->i_da_metadata_calc_len++;
-		return 0;
-	}
-	ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
-	ei->i_da_metadata_calc_len = 1;
-	blk_bits = order_base_2(lblock);
-	return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
-}
-
 /*
 /*
  * Calculate the number of metadata blocks need to reserve
  * Calculate the number of metadata blocks need to reserve
  * to allocate a block located at @lblock
  * to allocate a block located at @lblock
@@ -1107,7 +251,7 @@ static int ext4_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblock)
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 		return ext4_ext_calc_metadata_amount(inode, lblock);
 		return ext4_ext_calc_metadata_amount(inode, lblock);
 
 
-	return ext4_indirect_calc_metadata_amount(inode, lblock);
+	return ext4_ind_calc_metadata_amount(inode, lblock);
 }
 }
 
 
 /*
 /*
@@ -1589,16 +733,6 @@ static int do_journal_get_write_access(handle_t *handle,
 	return ret;
 	return ret;
 }
 }
 
 
-/*
- * Truncate blocks that were not used by write. We have to truncate the
- * pagecache as well so that corresponding buffers get properly unmapped.
- */
-static void ext4_truncate_failed_write(struct inode *inode)
-{
-	truncate_inode_pages(inode->i_mapping, inode->i_size);
-	ext4_truncate(inode);
-}
-
 static int ext4_get_block_write(struct inode *inode, sector_t iblock,
 static int ext4_get_block_write(struct inode *inode, sector_t iblock,
 		   struct buffer_head *bh_result, int create);
 		   struct buffer_head *bh_result, int create);
 static int ext4_write_begin(struct file *file, struct address_space *mapping,
 static int ext4_write_begin(struct file *file, struct address_space *mapping,
@@ -1863,6 +997,7 @@ static int ext4_journalled_write_end(struct file *file,
 	if (new_i_size > inode->i_size)
 	if (new_i_size > inode->i_size)
 		i_size_write(inode, pos+copied);
 		i_size_write(inode, pos+copied);
 	ext4_set_inode_state(inode, EXT4_STATE_JDATA);
 	ext4_set_inode_state(inode, EXT4_STATE_JDATA);
+	EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
 	if (new_i_size > EXT4_I(inode)->i_disksize) {
 	if (new_i_size > EXT4_I(inode)->i_disksize) {
 		ext4_update_i_disksize(inode, new_i_size);
 		ext4_update_i_disksize(inode, new_i_size);
 		ret2 = ext4_mark_inode_dirty(handle, inode);
 		ret2 = ext4_mark_inode_dirty(handle, inode);
@@ -2571,6 +1706,7 @@ static int __ext4_journalled_writepage(struct page *page,
 				write_end_fn);
 				write_end_fn);
 	if (ret == 0)
 	if (ret == 0)
 		ret = err;
 		ret = err;
+	EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
 	err = ext4_journal_stop(handle);
 	err = ext4_journal_stop(handle);
 	if (!ret)
 	if (!ret)
 		ret = err;
 		ret = err;
@@ -3449,112 +2585,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
 		return try_to_free_buffers(page);
 		return try_to_free_buffers(page);
 }
 }
 
 
-/*
- * O_DIRECT for ext3 (or indirect map) based files
- *
- * If the O_DIRECT write will extend the file then add this inode to the
- * orphan list.  So recovery will truncate it back to the original size
- * if the machine crashes during the write.
- *
- * If the O_DIRECT write is intantiating holes inside i_size and the machine
- * crashes then stale disk data _may_ be exposed inside the file. But current
- * VFS code falls back into buffered path in that case so we are safe.
- */
-static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
-			      const struct iovec *iov, loff_t offset,
-			      unsigned long nr_segs)
-{
-	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_mapping->host;
-	struct ext4_inode_info *ei = EXT4_I(inode);
-	handle_t *handle;
-	ssize_t ret;
-	int orphan = 0;
-	size_t count = iov_length(iov, nr_segs);
-	int retries = 0;
-
-	if (rw == WRITE) {
-		loff_t final_size = offset + count;
-
-		if (final_size > inode->i_size) {
-			/* Credits for sb + inode write */
-			handle = ext4_journal_start(inode, 2);
-			if (IS_ERR(handle)) {
-				ret = PTR_ERR(handle);
-				goto out;
-			}
-			ret = ext4_orphan_add(handle, inode);
-			if (ret) {
-				ext4_journal_stop(handle);
-				goto out;
-			}
-			orphan = 1;
-			ei->i_disksize = inode->i_size;
-			ext4_journal_stop(handle);
-		}
-	}
-
-retry:
-	if (rw == READ && ext4_should_dioread_nolock(inode))
-		ret = __blockdev_direct_IO(rw, iocb, inode,
-				 inode->i_sb->s_bdev, iov,
-				 offset, nr_segs,
-				 ext4_get_block, NULL, NULL, 0);
-	else {
-		ret = blockdev_direct_IO(rw, iocb, inode, iov,
-				 offset, nr_segs, ext4_get_block);
-
-		if (unlikely((rw & WRITE) && ret < 0)) {
-			loff_t isize = i_size_read(inode);
-			loff_t end = offset + iov_length(iov, nr_segs);
-
-			if (end > isize)
-				ext4_truncate_failed_write(inode);
-		}
-	}
-	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
-		goto retry;
-
-	if (orphan) {
-		int err;
-
-		/* Credits for sb + inode write */
-		handle = ext4_journal_start(inode, 2);
-		if (IS_ERR(handle)) {
-			/* This is really bad luck. We've written the data
-			 * but cannot extend i_size. Bail out and pretend
-			 * the write failed... */
-			ret = PTR_ERR(handle);
-			if (inode->i_nlink)
-				ext4_orphan_del(NULL, inode);
-
-			goto out;
-		}
-		if (inode->i_nlink)
-			ext4_orphan_del(handle, inode);
-		if (ret > 0) {
-			loff_t end = offset + ret;
-			if (end > inode->i_size) {
-				ei->i_disksize = end;
-				i_size_write(inode, end);
-				/*
-				 * We're going to return a positive `ret'
-				 * here due to non-zero-length I/O, so there's
-				 * no way of reporting error returns from
-				 * ext4_mark_inode_dirty() to userspace.  So
-				 * ignore it.
-				 */
-				ext4_mark_inode_dirty(handle, inode);
-			}
-		}
-		err = ext4_journal_stop(handle);
-		if (ret == 0)
-			ret = err;
-	}
-out:
-	return ret;
-}
-
 /*
 /*
  * ext4_get_block used when preparing for a DIO write or buffer write.
  * ext4_get_block used when preparing for a DIO write or buffer write.
  * We allocate an uinitialized extent if blocks haven't been allocated.
  * We allocate an uinitialized extent if blocks haven't been allocated.
@@ -4033,383 +3063,6 @@ unlock:
 	return err;
 	return err;
 }
 }
 
 
-/*
- * Probably it should be a library function... search for first non-zero word
- * or memcmp with zero_page, whatever is better for particular architecture.
- * Linus?
- */
-static inline int all_zeroes(__le32 *p, __le32 *q)
-{
-	while (p < q)
-		if (*p++)
-			return 0;
-	return 1;
-}
-
-/**
- *	ext4_find_shared - find the indirect blocks for partial truncation.
- *	@inode:	  inode in question
- *	@depth:	  depth of the affected branch
- *	@offsets: offsets of pointers in that branch (see ext4_block_to_path)
- *	@chain:	  place to store the pointers to partial indirect blocks
- *	@top:	  place to the (detached) top of branch
- *
- *	This is a helper function used by ext4_truncate().
- *
- *	When we do truncate() we may have to clean the ends of several
- *	indirect blocks but leave the blocks themselves alive. Block is
- *	partially truncated if some data below the new i_size is referred
- *	from it (and it is on the path to the first completely truncated
- *	data block, indeed).  We have to free the top of that path along
- *	with everything to the right of the path. Since no allocation
- *	past the truncation point is possible until ext4_truncate()
- *	finishes, we may safely do the latter, but top of branch may
- *	require special attention - pageout below the truncation point
- *	might try to populate it.
- *
- *	We atomically detach the top of branch from the tree, store the
- *	block number of its root in *@top, pointers to buffer_heads of
- *	partially truncated blocks - in @chain[].bh and pointers to
- *	their last elements that should not be removed - in
- *	@chain[].p. Return value is the pointer to last filled element
- *	of @chain.
- *
- *	The work left to caller to do the actual freeing of subtrees:
- *		a) free the subtree starting from *@top
- *		b) free the subtrees whose roots are stored in
- *			(@chain[i].p+1 .. end of @chain[i].bh->b_data)
- *		c) free the subtrees growing from the inode past the @chain[0].
- *			(no partially truncated stuff there).  */
-
-static Indirect *ext4_find_shared(struct inode *inode, int depth,
-				  ext4_lblk_t offsets[4], Indirect chain[4],
-				  __le32 *top)
-{
-	Indirect *partial, *p;
-	int k, err;
-
-	*top = 0;
-	/* Make k index the deepest non-null offset + 1 */
-	for (k = depth; k > 1 && !offsets[k-1]; k--)
-		;
-	partial = ext4_get_branch(inode, k, offsets, chain, &err);
-	/* Writer: pointers */
-	if (!partial)
-		partial = chain + k-1;
-	/*
-	 * If the branch acquired continuation since we've looked at it -
-	 * fine, it should all survive and (new) top doesn't belong to us.
-	 */
-	if (!partial->key && *partial->p)
-		/* Writer: end */
-		goto no_top;
-	for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--)
-		;
-	/*
-	 * OK, we've found the last block that must survive. The rest of our
-	 * branch should be detached before unlocking. However, if that rest
-	 * of branch is all ours and does not grow immediately from the inode
-	 * it's easier to cheat and just decrement partial->p.
-	 */
-	if (p == chain + k - 1 && p > chain) {
-		p->p--;
-	} else {
-		*top = *p->p;
-		/* Nope, don't do this in ext4.  Must leave the tree intact */
-#if 0
-		*p->p = 0;
-#endif
-	}
-	/* Writer: end */
-
-	while (partial > p) {
-		brelse(partial->bh);
-		partial--;
-	}
-no_top:
-	return partial;
-}
-
-/*
- * Zero a number of block pointers in either an inode or an indirect block.
- * If we restart the transaction we must again get write access to the
- * indirect block for further modification.
- *
- * We release `count' blocks on disk, but (last - first) may be greater
- * than `count' because there can be holes in there.
- *
- * Return 0 on success, 1 on invalid block range
- * and < 0 on fatal error.
- */
-static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
-			     struct buffer_head *bh,
-			     ext4_fsblk_t block_to_free,
-			     unsigned long count, __le32 *first,
-			     __le32 *last)
-{
-	__le32 *p;
-	int	flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED;
-	int	err;
-
-	if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
-		flags |= EXT4_FREE_BLOCKS_METADATA;
-
-	if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
-				   count)) {
-		EXT4_ERROR_INODE(inode, "attempt to clear invalid "
-				 "blocks %llu len %lu",
-				 (unsigned long long) block_to_free, count);
-		return 1;
-	}
-
-	if (try_to_extend_transaction(handle, inode)) {
-		if (bh) {
-			BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
-			err = ext4_handle_dirty_metadata(handle, inode, bh);
-			if (unlikely(err))
-				goto out_err;
-		}
-		err = ext4_mark_inode_dirty(handle, inode);
-		if (unlikely(err))
-			goto out_err;
-		err = ext4_truncate_restart_trans(handle, inode,
-						  blocks_for_truncate(inode));
-		if (unlikely(err))
-			goto out_err;
-		if (bh) {
-			BUFFER_TRACE(bh, "retaking write access");
-			err = ext4_journal_get_write_access(handle, bh);
-			if (unlikely(err))
-				goto out_err;
-		}
-	}
-
-	for (p = first; p < last; p++)
-		*p = 0;
-
-	ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
-	return 0;
-out_err:
-	ext4_std_error(inode->i_sb, err);
-	return err;
-}
-
-/**
- * ext4_free_data - free a list of data blocks
- * @handle:	handle for this transaction
- * @inode:	inode we are dealing with
- * @this_bh:	indirect buffer_head which contains *@first and *@last
- * @first:	array of block numbers
- * @last:	points immediately past the end of array
- *
- * We are freeing all blocks referred from that array (numbers are stored as
- * little-endian 32-bit) and updating @inode->i_blocks appropriately.
- *
- * We accumulate contiguous runs of blocks to free.  Conveniently, if these
- * blocks are contiguous then releasing them at one time will only affect one
- * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't
- * actually use a lot of journal space.
- *
- * @this_bh will be %NULL if @first and @last point into the inode's direct
- * block pointers.
- */
-static void ext4_free_data(handle_t *handle, struct inode *inode,
-			   struct buffer_head *this_bh,
-			   __le32 *first, __le32 *last)
-{
-	ext4_fsblk_t block_to_free = 0;    /* Starting block # of a run */
-	unsigned long count = 0;	    /* Number of blocks in the run */
-	__le32 *block_to_free_p = NULL;	    /* Pointer into inode/ind
-					       corresponding to
-					       block_to_free */
-	ext4_fsblk_t nr;		    /* Current block # */
-	__le32 *p;			    /* Pointer into inode/ind
-					       for current block */
-	int err = 0;
-
-	if (this_bh) {				/* For indirect block */
-		BUFFER_TRACE(this_bh, "get_write_access");
-		err = ext4_journal_get_write_access(handle, this_bh);
-		/* Important: if we can't update the indirect pointers
-		 * to the blocks, we can't free them. */
-		if (err)
-			return;
-	}
-
-	for (p = first; p < last; p++) {
-		nr = le32_to_cpu(*p);
-		if (nr) {
-			/* accumulate blocks to free if they're contiguous */
-			if (count == 0) {
-				block_to_free = nr;
-				block_to_free_p = p;
-				count = 1;
-			} else if (nr == block_to_free + count) {
-				count++;
-			} else {
-				err = ext4_clear_blocks(handle, inode, this_bh,
-						        block_to_free, count,
-						        block_to_free_p, p);
-				if (err)
-					break;
-				block_to_free = nr;
-				block_to_free_p = p;
-				count = 1;
-			}
-		}
-	}
-
-	if (!err && count > 0)
-		err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
-					count, block_to_free_p, p);
-	if (err < 0)
-		/* fatal error */
-		return;
-
-	if (this_bh) {
-		BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
-
-		/*
-		 * The buffer head should have an attached journal head at this
-		 * point. However, if the data is corrupted and an indirect
-		 * block pointed to itself, it would have been detached when
-		 * the block was cleared. Check for this instead of OOPSing.
-		 */
-		if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
-			ext4_handle_dirty_metadata(handle, inode, this_bh);
-		else
-			EXT4_ERROR_INODE(inode,
-					 "circular indirect block detected at "
-					 "block %llu",
-				(unsigned long long) this_bh->b_blocknr);
-	}
-}
-
-/**
- *	ext4_free_branches - free an array of branches
- *	@handle: JBD handle for this transaction
- *	@inode:	inode we are dealing with
- *	@parent_bh: the buffer_head which contains *@first and *@last
- *	@first:	array of block numbers
- *	@last:	pointer immediately past the end of array
- *	@depth:	depth of the branches to free
- *
- *	We are freeing all blocks referred from these branches (numbers are
- *	stored as little-endian 32-bit) and updating @inode->i_blocks
- *	appropriately.
- */
-static void ext4_free_branches(handle_t *handle, struct inode *inode,
-			       struct buffer_head *parent_bh,
-			       __le32 *first, __le32 *last, int depth)
-{
-	ext4_fsblk_t nr;
-	__le32 *p;
-
-	if (ext4_handle_is_aborted(handle))
-		return;
-
-	if (depth--) {
-		struct buffer_head *bh;
-		int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
-		p = last;
-		while (--p >= first) {
-			nr = le32_to_cpu(*p);
-			if (!nr)
-				continue;		/* A hole */
-
-			if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
-						   nr, 1)) {
-				EXT4_ERROR_INODE(inode,
-						 "invalid indirect mapped "
-						 "block %lu (level %d)",
-						 (unsigned long) nr, depth);
-				break;
-			}
-
-			/* Go read the buffer for the next level down */
-			bh = sb_bread(inode->i_sb, nr);
-
-			/*
-			 * A read failure? Report error and clear slot
-			 * (should be rare).
-			 */
-			if (!bh) {
-				EXT4_ERROR_INODE_BLOCK(inode, nr,
-						       "Read failure");
-				continue;
-			}
-
-			/* This zaps the entire block.  Bottom up. */
-			BUFFER_TRACE(bh, "free child branches");
-			ext4_free_branches(handle, inode, bh,
-					(__le32 *) bh->b_data,
-					(__le32 *) bh->b_data + addr_per_block,
-					depth);
-			brelse(bh);
-
-			/*
-			 * Everything below this this pointer has been
-			 * released.  Now let this top-of-subtree go.
-			 *
-			 * We want the freeing of this indirect block to be
-			 * atomic in the journal with the updating of the
-			 * bitmap block which owns it.  So make some room in
-			 * the journal.
-			 *
-			 * We zero the parent pointer *after* freeing its
-			 * pointee in the bitmaps, so if extend_transaction()
-			 * for some reason fails to put the bitmap changes and
-			 * the release into the same transaction, recovery
-			 * will merely complain about releasing a free block,
-			 * rather than leaking blocks.
-			 */
-			if (ext4_handle_is_aborted(handle))
-				return;
-			if (try_to_extend_transaction(handle, inode)) {
-				ext4_mark_inode_dirty(handle, inode);
-				ext4_truncate_restart_trans(handle, inode,
-					    blocks_for_truncate(inode));
-			}
-
-			/*
-			 * The forget flag here is critical because if
-			 * we are journaling (and not doing data
-			 * journaling), we have to make sure a revoke
-			 * record is written to prevent the journal
-			 * replay from overwriting the (former)
-			 * indirect block if it gets reallocated as a
-			 * data block.  This must happen in the same
-			 * transaction where the data blocks are
-			 * actually freed.
-			 */
-			ext4_free_blocks(handle, inode, NULL, nr, 1,
-					 EXT4_FREE_BLOCKS_METADATA|
-					 EXT4_FREE_BLOCKS_FORGET);
-
-			if (parent_bh) {
-				/*
-				 * The block which we have just freed is
-				 * pointed to by an indirect block: journal it
-				 */
-				BUFFER_TRACE(parent_bh, "get_write_access");
-				if (!ext4_journal_get_write_access(handle,
-								   parent_bh)){
-					*p = 0;
-					BUFFER_TRACE(parent_bh,
-					"call ext4_handle_dirty_metadata");
-					ext4_handle_dirty_metadata(handle,
-								   inode,
-								   parent_bh);
-				}
-			}
-		}
-	} else {
-		/* We have reached the bottom of the tree. */
-		BUFFER_TRACE(parent_bh, "free data blocks");
-		ext4_free_data(handle, inode, parent_bh, first, last);
-	}
-}
-
 int ext4_can_truncate(struct inode *inode)
 int ext4_can_truncate(struct inode *inode)
 {
 {
 	if (S_ISREG(inode->i_mode))
 	if (S_ISREG(inode->i_mode))
@@ -4476,19 +3129,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
  */
  */
 void ext4_truncate(struct inode *inode)
 void ext4_truncate(struct inode *inode)
 {
 {
-	handle_t *handle;
-	struct ext4_inode_info *ei = EXT4_I(inode);
-	__le32 *i_data = ei->i_data;
-	int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
-	struct address_space *mapping = inode->i_mapping;
-	ext4_lblk_t offsets[4];
-	Indirect chain[4];
-	Indirect *partial;
-	__le32 nr = 0;
-	int n = 0;
-	ext4_lblk_t last_block, max_block;
-	unsigned blocksize = inode->i_sb->s_blocksize;
-
 	trace_ext4_truncate_enter(inode);
 	trace_ext4_truncate_enter(inode);
 
 
 	if (!ext4_can_truncate(inode))
 	if (!ext4_can_truncate(inode))
@@ -4499,149 +3139,11 @@ void ext4_truncate(struct inode *inode)
 	if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
 	if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
 		ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
 		ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
 
 
-	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 		ext4_ext_truncate(inode);
 		ext4_ext_truncate(inode);
-		trace_ext4_truncate_exit(inode);
-		return;
-	}
-
-	handle = start_transaction(inode);
-	if (IS_ERR(handle))
-		return;		/* AKPM: return what? */
-
-	last_block = (inode->i_size + blocksize-1)
-					>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
-	max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
-					>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
-
-	if (inode->i_size & (blocksize - 1))
-		if (ext4_block_truncate_page(handle, mapping, inode->i_size))
-			goto out_stop;
-
-	if (last_block != max_block) {
-		n = ext4_block_to_path(inode, last_block, offsets, NULL);
-		if (n == 0)
-			goto out_stop;	/* error */
-	}
-
-	/*
-	 * OK.  This truncate is going to happen.  We add the inode to the
-	 * orphan list, so that if this truncate spans multiple transactions,
-	 * and we crash, we will resume the truncate when the filesystem
-	 * recovers.  It also marks the inode dirty, to catch the new size.
-	 *
-	 * Implication: the file must always be in a sane, consistent
-	 * truncatable state while each transaction commits.
-	 */
-	if (ext4_orphan_add(handle, inode))
-		goto out_stop;
-
-	/*
-	 * From here we block out all ext4_get_block() callers who want to
-	 * modify the block allocation tree.
-	 */
-	down_write(&ei->i_data_sem);
-
-	ext4_discard_preallocations(inode);
-
-	/*
-	 * The orphan list entry will now protect us from any crash which
-	 * occurs before the truncate completes, so it is now safe to propagate
-	 * the new, shorter inode size (held for now in i_size) into the
-	 * on-disk inode. We do this via i_disksize, which is the value which
-	 * ext4 *really* writes onto the disk inode.
-	 */
-	ei->i_disksize = inode->i_size;
-
-	if (last_block == max_block) {
-		/*
-		 * It is unnecessary to free any data blocks if last_block is
-		 * equal to the indirect block limit.
-		 */
-		goto out_unlock;
-	} else if (n == 1) {		/* direct blocks */
-		ext4_free_data(handle, inode, NULL, i_data+offsets[0],
-			       i_data + EXT4_NDIR_BLOCKS);
-		goto do_indirects;
-	}
-
-	partial = ext4_find_shared(inode, n, offsets, chain, &nr);
-	/* Kill the top of shared branch (not detached) */
-	if (nr) {
-		if (partial == chain) {
-			/* Shared branch grows from the inode */
-			ext4_free_branches(handle, inode, NULL,
-					   &nr, &nr+1, (chain+n-1) - partial);
-			*partial->p = 0;
-			/*
-			 * We mark the inode dirty prior to restart,
-			 * and prior to stop.  No need for it here.
-			 */
-		} else {
-			/* Shared branch grows from an indirect block */
-			BUFFER_TRACE(partial->bh, "get_write_access");
-			ext4_free_branches(handle, inode, partial->bh,
-					partial->p,
-					partial->p+1, (chain+n-1) - partial);
-		}
-	}
-	/* Clear the ends of indirect blocks on the shared branch */
-	while (partial > chain) {
-		ext4_free_branches(handle, inode, partial->bh, partial->p + 1,
-				   (__le32*)partial->bh->b_data+addr_per_block,
-				   (chain+n-1) - partial);
-		BUFFER_TRACE(partial->bh, "call brelse");
-		brelse(partial->bh);
-		partial--;
-	}
-do_indirects:
-	/* Kill the remaining (whole) subtrees */
-	switch (offsets[0]) {
-	default:
-		nr = i_data[EXT4_IND_BLOCK];
-		if (nr) {
-			ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
-			i_data[EXT4_IND_BLOCK] = 0;
-		}
-	case EXT4_IND_BLOCK:
-		nr = i_data[EXT4_DIND_BLOCK];
-		if (nr) {
-			ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
-			i_data[EXT4_DIND_BLOCK] = 0;
-		}
-	case EXT4_DIND_BLOCK:
-		nr = i_data[EXT4_TIND_BLOCK];
-		if (nr) {
-			ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
-			i_data[EXT4_TIND_BLOCK] = 0;
-		}
-	case EXT4_TIND_BLOCK:
-		;
-	}
-
-out_unlock:
-	up_write(&ei->i_data_sem);
-	inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
-	ext4_mark_inode_dirty(handle, inode);
-
-	/*
-	 * In a multi-transaction truncate, we only make the final transaction
-	 * synchronous
-	 */
-	if (IS_SYNC(inode))
-		ext4_handle_sync(handle);
-out_stop:
-	/*
-	 * If this was a simple ftruncate(), and the file will remain alive
-	 * then we need to clear up the orphan record which we created above.
-	 * However, if this was a real unlink then we were called by
-	 * ext4_delete_inode(), and we allow that function to clean up the
-	 * orphan info for us.
-	 */
-	if (inode->i_nlink)
-		ext4_orphan_del(handle, inode);
+	else
+		ext4_ind_truncate(inode);
 
 
-	ext4_journal_stop(handle);
 	trace_ext4_truncate_exit(inode);
 	trace_ext4_truncate_exit(inode);
 }
 }
 
 
@@ -5012,7 +3514,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
 		   (S_ISLNK(inode->i_mode) &&
 		   (S_ISLNK(inode->i_mode) &&
 		    !ext4_inode_is_fast_symlink(inode))) {
 		    !ext4_inode_is_fast_symlink(inode))) {
 		/* Validate block references which are part of inode */
 		/* Validate block references which are part of inode */
-		ret = ext4_check_inode_blockref(inode);
+		ret = ext4_ind_check_inode(inode);
 	}
 	}
 	if (ret)
 	if (ret)
 		goto bad_inode;
 		goto bad_inode;
@@ -5459,34 +3961,10 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	return 0;
 	return 0;
 }
 }
 
 
-static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
-				      int chunk)
-{
-	int indirects;
-
-	/* if nrblocks are contiguous */
-	if (chunk) {
-		/*
-		 * With N contiguous data blocks, we need at most
-		 * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
-		 * 2 dindirect blocks, and 1 tindirect block
-		 */
-		return DIV_ROUND_UP(nrblocks,
-				    EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
-	}
-	/*
-	 * if nrblocks are not contiguous, worse case, each block touch
-	 * a indirect block, and each indirect block touch a double indirect
-	 * block, plus a triple indirect block
-	 */
-	indirects = nrblocks * 2 + 1;
-	return indirects;
-}
-
 static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 {
 {
 	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
 	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
-		return ext4_indirect_trans_blocks(inode, nrblocks, chunk);
+		return ext4_ind_trans_blocks(inode, nrblocks, chunk);
 	return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
 	return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
 }
 }
 
 

+ 8 - 4
fs/ext4/ioctl.c

@@ -202,8 +202,9 @@ setversion_out:
 		struct super_block *sb = inode->i_sb;
 		struct super_block *sb = inode->i_sb;
 		int err, err2=0;
 		int err, err2=0;
 
 
-		if (!capable(CAP_SYS_RESOURCE))
-			return -EPERM;
+		err = ext4_resize_begin(sb);
+		if (err)
+			return err;
 
 
 		if (get_user(n_blocks_count, (__u32 __user *)arg))
 		if (get_user(n_blocks_count, (__u32 __user *)arg))
 			return -EFAULT;
 			return -EFAULT;
@@ -221,6 +222,7 @@ setversion_out:
 		if (err == 0)
 		if (err == 0)
 			err = err2;
 			err = err2;
 		mnt_drop_write(filp->f_path.mnt);
 		mnt_drop_write(filp->f_path.mnt);
+		ext4_resize_end(sb);
 
 
 		return err;
 		return err;
 	}
 	}
@@ -271,8 +273,9 @@ mext_out:
 		struct super_block *sb = inode->i_sb;
 		struct super_block *sb = inode->i_sb;
 		int err, err2=0;
 		int err, err2=0;
 
 
-		if (!capable(CAP_SYS_RESOURCE))
-			return -EPERM;
+		err = ext4_resize_begin(sb);
+		if (err)
+			return err;
 
 
 		if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
 		if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
 				sizeof(input)))
 				sizeof(input)))
@@ -291,6 +294,7 @@ mext_out:
 		if (err == 0)
 		if (err == 0)
 			err = err2;
 			err = err2;
 		mnt_drop_write(filp->f_path.mnt);
 		mnt_drop_write(filp->f_path.mnt);
+		ext4_resize_end(sb);
 
 
 		return err;
 		return err;
 	}
 	}

+ 146 - 84
fs/ext4/mballoc.c

@@ -75,8 +75,8 @@
  *
  *
  * The inode preallocation space is used looking at the _logical_ start
  * The inode preallocation space is used looking at the _logical_ start
  * block. If only the logical file block falls within the range of prealloc
  * block. If only the logical file block falls within the range of prealloc
- * space we will consume the particular prealloc space. This make sure that
- * that the we have contiguous physical blocks representing the file blocks
+ * space we will consume the particular prealloc space. This makes sure that
+ * we have contiguous physical blocks representing the file blocks
  *
  *
  * The important thing to be noted in case of inode prealloc space is that
  * The important thing to be noted in case of inode prealloc space is that
  * we don't modify the values associated to inode prealloc space except
  * we don't modify the values associated to inode prealloc space except
@@ -84,7 +84,7 @@
  *
  *
  * If we are not able to find blocks in the inode prealloc space and if we
  * If we are not able to find blocks in the inode prealloc space and if we
  * have the group allocation flag set then we look at the locality group
  * have the group allocation flag set then we look at the locality group
- * prealloc space. These are per CPU prealloc list repreasented as
+ * prealloc space. These are per CPU prealloc list represented as
  *
  *
  * ext4_sb_info.s_locality_groups[smp_processor_id()]
  * ext4_sb_info.s_locality_groups[smp_processor_id()]
  *
  *
@@ -128,12 +128,13 @@
  * we are doing a group prealloc we try to normalize the request to
  * we are doing a group prealloc we try to normalize the request to
  * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is
  * sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is
  * 512 blocks. This can be tuned via
  * 512 blocks. This can be tuned via
- * /sys/fs/ext4/<partition/mb_group_prealloc. The value is represented in
+ * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
  * terms of number of blocks. If we have mounted the file system with -O
  * terms of number of blocks. If we have mounted the file system with -O
  * stripe=<value> option the group prealloc request is normalized to the
  * stripe=<value> option the group prealloc request is normalized to the
- * stripe value (sbi->s_stripe)
+ * the smallest multiple of the stripe value (sbi->s_stripe) which is
+ * greater than the default mb_group_prealloc.
  *
  *
- * The regular allocator(using the buddy cache) supports few tunables.
+ * The regular allocator (using the buddy cache) supports a few tunables.
  *
  *
  * /sys/fs/ext4/<partition>/mb_min_to_scan
  * /sys/fs/ext4/<partition>/mb_min_to_scan
  * /sys/fs/ext4/<partition>/mb_max_to_scan
  * /sys/fs/ext4/<partition>/mb_max_to_scan
@@ -152,7 +153,7 @@
  * best extent in the found extents. Searching for the blocks starts with
  * best extent in the found extents. Searching for the blocks starts with
  * the group specified as the goal value in allocation context via
  * the group specified as the goal value in allocation context via
  * ac_g_ex. Each group is first checked based on the criteria whether it
  * ac_g_ex. Each group is first checked based on the criteria whether it
- * can used for allocation. ext4_mb_good_group explains how the groups are
+ * can be used for allocation. ext4_mb_good_group explains how the groups are
  * checked.
  * checked.
  *
  *
  * Both the prealloc space are getting populated as above. So for the first
  * Both the prealloc space are getting populated as above. So for the first
@@ -492,10 +493,11 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap)
 		b2 = (unsigned char *) bitmap;
 		b2 = (unsigned char *) bitmap;
 		for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
 		for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
 			if (b1[i] != b2[i]) {
 			if (b1[i] != b2[i]) {
-				printk(KERN_ERR "corruption in group %u "
-				       "at byte %u(%u): %x in copy != %x "
-				       "on disk/prealloc\n",
-				       e4b->bd_group, i, i * 8, b1[i], b2[i]);
+				ext4_msg(e4b->bd_sb, KERN_ERR,
+					 "corruption in group %u "
+					 "at byte %u(%u): %x in copy != %x "
+					 "on disk/prealloc",
+					 e4b->bd_group, i, i * 8, b1[i], b2[i]);
 				BUG();
 				BUG();
 			}
 			}
 		}
 		}
@@ -1125,7 +1127,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 	grp = ext4_get_group_info(sb, group);
 	grp = ext4_get_group_info(sb, group);
 
 
 	e4b->bd_blkbits = sb->s_blocksize_bits;
 	e4b->bd_blkbits = sb->s_blocksize_bits;
-	e4b->bd_info = ext4_get_group_info(sb, group);
+	e4b->bd_info = grp;
 	e4b->bd_sb = sb;
 	e4b->bd_sb = sb;
 	e4b->bd_group = group;
 	e4b->bd_group = group;
 	e4b->bd_buddy_page = NULL;
 	e4b->bd_buddy_page = NULL;
@@ -1281,7 +1283,7 @@ static void mb_clear_bits(void *bm, int cur, int len)
 	}
 	}
 }
 }
 
 
-static void mb_set_bits(void *bm, int cur, int len)
+void ext4_set_bits(void *bm, int cur, int len)
 {
 {
 	__u32 *addr;
 	__u32 *addr;
 
 
@@ -1510,7 +1512,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
 	}
 	}
 	mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
 	mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
 
 
-	mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
+	ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
 	mb_check_buddy(e4b);
 	mb_check_buddy(e4b);
 
 
 	return ret;
 	return ret;
@@ -2223,8 +2225,8 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 			EXT4_DESC_PER_BLOCK_BITS(sb);
 			EXT4_DESC_PER_BLOCK_BITS(sb);
 		meta_group_info = kmalloc(metalen, GFP_KERNEL);
 		meta_group_info = kmalloc(metalen, GFP_KERNEL);
 		if (meta_group_info == NULL) {
 		if (meta_group_info == NULL) {
-			printk(KERN_ERR "EXT4-fs: can't allocate mem for a "
-			       "buddy group\n");
+			ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem "
+				 "for a buddy group");
 			goto exit_meta_group_info;
 			goto exit_meta_group_info;
 		}
 		}
 		sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
 		sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
@@ -2237,7 +2239,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 
 
 	meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
 	meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
 	if (meta_group_info[i] == NULL) {
 	if (meta_group_info[i] == NULL) {
-		printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
+		ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem");
 		goto exit_group_info;
 		goto exit_group_info;
 	}
 	}
 	memset(meta_group_info[i], 0, kmem_cache_size(cachep));
 	memset(meta_group_info[i], 0, kmem_cache_size(cachep));
@@ -2279,8 +2281,10 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 
 
 exit_group_info:
 exit_group_info:
 	/* If a meta_group_info table has been allocated, release it now */
 	/* If a meta_group_info table has been allocated, release it now */
-	if (group % EXT4_DESC_PER_BLOCK(sb) == 0)
+	if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
 		kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
 		kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
+		sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
+	}
 exit_meta_group_info:
 exit_meta_group_info:
 	return -ENOMEM;
 	return -ENOMEM;
 } /* ext4_mb_add_groupinfo */
 } /* ext4_mb_add_groupinfo */
@@ -2328,23 +2332,26 @@ static int ext4_mb_init_backend(struct super_block *sb)
 	/* An 8TB filesystem with 64-bit pointers requires a 4096 byte
 	/* An 8TB filesystem with 64-bit pointers requires a 4096 byte
 	 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
 	 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
 	 * So a two level scheme suffices for now. */
 	 * So a two level scheme suffices for now. */
-	sbi->s_group_info = kzalloc(array_size, GFP_KERNEL);
+	sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
 	if (sbi->s_group_info == NULL) {
 	if (sbi->s_group_info == NULL) {
-		printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
+		ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 	sbi->s_buddy_cache = new_inode(sb);
 	sbi->s_buddy_cache = new_inode(sb);
 	if (sbi->s_buddy_cache == NULL) {
 	if (sbi->s_buddy_cache == NULL) {
-		printk(KERN_ERR "EXT4-fs: can't get new inode\n");
+		ext4_msg(sb, KERN_ERR, "can't get new inode");
 		goto err_freesgi;
 		goto err_freesgi;
 	}
 	}
-	sbi->s_buddy_cache->i_ino = get_next_ino();
+	/* To avoid potentially colliding with an valid on-disk inode number,
+	 * use EXT4_BAD_INO for the buddy cache inode number.  This inode is
+	 * not in the inode hash, so it should never be found by iget(), but
+	 * this will avoid confusion if it ever shows up during debugging. */
+	sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
 	EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
 	EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
 	for (i = 0; i < ngroups; i++) {
 	for (i = 0; i < ngroups; i++) {
 		desc = ext4_get_group_desc(sb, i, NULL);
 		desc = ext4_get_group_desc(sb, i, NULL);
 		if (desc == NULL) {
 		if (desc == NULL) {
-			printk(KERN_ERR
-				"EXT4-fs: can't read descriptor %u\n", i);
+			ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
 			goto err_freebuddy;
 			goto err_freebuddy;
 		}
 		}
 		if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
 		if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
@@ -2362,7 +2369,7 @@ err_freebuddy:
 		kfree(sbi->s_group_info[i]);
 		kfree(sbi->s_group_info[i]);
 	iput(sbi->s_buddy_cache);
 	iput(sbi->s_buddy_cache);
 err_freesgi:
 err_freesgi:
-	kfree(sbi->s_group_info);
+	ext4_kvfree(sbi->s_group_info);
 	return -ENOMEM;
 	return -ENOMEM;
 }
 }
 
 
@@ -2404,14 +2411,15 @@ static int ext4_groupinfo_create_slab(size_t size)
 					slab_size, 0, SLAB_RECLAIM_ACCOUNT,
 					slab_size, 0, SLAB_RECLAIM_ACCOUNT,
 					NULL);
 					NULL);
 
 
+	ext4_groupinfo_caches[cache_index] = cachep;
+
 	mutex_unlock(&ext4_grpinfo_slab_create_mutex);
 	mutex_unlock(&ext4_grpinfo_slab_create_mutex);
 	if (!cachep) {
 	if (!cachep) {
-		printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
+		printk(KERN_EMERG
+		       "EXT4-fs: no memory for groupinfo slab cache\n");
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 
 
-	ext4_groupinfo_caches[cache_index] = cachep;
-
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2457,12 +2465,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 		i++;
 		i++;
 	} while (i <= sb->s_blocksize_bits + 1);
 	} while (i <= sb->s_blocksize_bits + 1);
 
 
-	/* init file for buddy data */
-	ret = ext4_mb_init_backend(sb);
-	if (ret != 0) {
-		goto out;
-	}
-
 	spin_lock_init(&sbi->s_md_lock);
 	spin_lock_init(&sbi->s_md_lock);
 	spin_lock_init(&sbi->s_bal_lock);
 	spin_lock_init(&sbi->s_bal_lock);
 
 
@@ -2472,6 +2474,18 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 	sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
 	sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
 	sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
 	sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
 	sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
 	sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
+	/*
+	 * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
+	 * to the lowest multiple of s_stripe which is bigger than
+	 * the s_mb_group_prealloc as determined above. We want
+	 * the preallocation size to be an exact multiple of the
+	 * RAID stripe size so that preallocations don't fragment
+	 * the stripes.
+	 */
+	if (sbi->s_stripe > 1) {
+		sbi->s_mb_group_prealloc = roundup(
+			sbi->s_mb_group_prealloc, sbi->s_stripe);
+	}
 
 
 	sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
 	sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
 	if (sbi->s_locality_groups == NULL) {
 	if (sbi->s_locality_groups == NULL) {
@@ -2487,6 +2501,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 		spin_lock_init(&lg->lg_prealloc_lock);
 		spin_lock_init(&lg->lg_prealloc_lock);
 	}
 	}
 
 
+	/* init file for buddy data */
+	ret = ext4_mb_init_backend(sb);
+	if (ret != 0) {
+		goto out;
+	}
+
 	if (sbi->s_proc)
 	if (sbi->s_proc)
 		proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
 		proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
 				 &ext4_mb_seq_groups_fops, sb);
 				 &ext4_mb_seq_groups_fops, sb);
@@ -2544,32 +2564,32 @@ int ext4_mb_release(struct super_block *sb)
 			EXT4_DESC_PER_BLOCK_BITS(sb);
 			EXT4_DESC_PER_BLOCK_BITS(sb);
 		for (i = 0; i < num_meta_group_infos; i++)
 		for (i = 0; i < num_meta_group_infos; i++)
 			kfree(sbi->s_group_info[i]);
 			kfree(sbi->s_group_info[i]);
-		kfree(sbi->s_group_info);
+		ext4_kvfree(sbi->s_group_info);
 	}
 	}
 	kfree(sbi->s_mb_offsets);
 	kfree(sbi->s_mb_offsets);
 	kfree(sbi->s_mb_maxs);
 	kfree(sbi->s_mb_maxs);
 	if (sbi->s_buddy_cache)
 	if (sbi->s_buddy_cache)
 		iput(sbi->s_buddy_cache);
 		iput(sbi->s_buddy_cache);
 	if (sbi->s_mb_stats) {
 	if (sbi->s_mb_stats) {
-		printk(KERN_INFO
-		       "EXT4-fs: mballoc: %u blocks %u reqs (%u success)\n",
+		ext4_msg(sb, KERN_INFO,
+		       "mballoc: %u blocks %u reqs (%u success)",
 				atomic_read(&sbi->s_bal_allocated),
 				atomic_read(&sbi->s_bal_allocated),
 				atomic_read(&sbi->s_bal_reqs),
 				atomic_read(&sbi->s_bal_reqs),
 				atomic_read(&sbi->s_bal_success));
 				atomic_read(&sbi->s_bal_success));
-		printk(KERN_INFO
-		      "EXT4-fs: mballoc: %u extents scanned, %u goal hits, "
-				"%u 2^N hits, %u breaks, %u lost\n",
+		ext4_msg(sb, KERN_INFO,
+		      "mballoc: %u extents scanned, %u goal hits, "
+				"%u 2^N hits, %u breaks, %u lost",
 				atomic_read(&sbi->s_bal_ex_scanned),
 				atomic_read(&sbi->s_bal_ex_scanned),
 				atomic_read(&sbi->s_bal_goals),
 				atomic_read(&sbi->s_bal_goals),
 				atomic_read(&sbi->s_bal_2orders),
 				atomic_read(&sbi->s_bal_2orders),
 				atomic_read(&sbi->s_bal_breaks),
 				atomic_read(&sbi->s_bal_breaks),
 				atomic_read(&sbi->s_mb_lost_chunks));
 				atomic_read(&sbi->s_mb_lost_chunks));
-		printk(KERN_INFO
-		       "EXT4-fs: mballoc: %lu generated and it took %Lu\n",
-				sbi->s_mb_buddies_generated++,
+		ext4_msg(sb, KERN_INFO,
+		       "mballoc: %lu generated and it took %Lu",
+				sbi->s_mb_buddies_generated,
 				sbi->s_mb_generation_time);
 				sbi->s_mb_generation_time);
-		printk(KERN_INFO
-		       "EXT4-fs: mballoc: %u preallocated, %u discarded\n",
+		ext4_msg(sb, KERN_INFO,
+		       "mballoc: %u preallocated, %u discarded",
 				atomic_read(&sbi->s_mb_preallocated),
 				atomic_read(&sbi->s_mb_preallocated),
 				atomic_read(&sbi->s_mb_discarded));
 				atomic_read(&sbi->s_mb_discarded));
 	}
 	}
@@ -2628,6 +2648,15 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
 		rb_erase(&entry->node, &(db->bb_free_root));
 		rb_erase(&entry->node, &(db->bb_free_root));
 		mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
 		mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
 
 
+		/*
+		 * Clear the trimmed flag for the group so that the next
+		 * ext4_trim_fs can trim it.
+		 * If the volume is mounted with -o discard, online discard
+		 * is supported and the free blocks will be trimmed online.
+		 */
+		if (!test_opt(sb, DISCARD))
+			EXT4_MB_GRP_CLEAR_TRIMMED(db);
+
 		if (!db->bb_free_root.rb_node) {
 		if (!db->bb_free_root.rb_node) {
 			/* No more items in the per group rb tree
 			/* No more items in the per group rb tree
 			 * balance refcounts from ext4_mb_free_metadata()
 			 * balance refcounts from ext4_mb_free_metadata()
@@ -2771,8 +2800,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 		 * We leak some of the blocks here.
 		 * We leak some of the blocks here.
 		 */
 		 */
 		ext4_lock_group(sb, ac->ac_b_ex.fe_group);
 		ext4_lock_group(sb, ac->ac_b_ex.fe_group);
-		mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
-			    ac->ac_b_ex.fe_len);
+		ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+			      ac->ac_b_ex.fe_len);
 		ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
 		ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
 		err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
 		err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
 		if (!err)
 		if (!err)
@@ -2790,7 +2819,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 		}
 		}
 	}
 	}
 #endif
 #endif
-	mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
+	ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+		      ac->ac_b_ex.fe_len);
 	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
 		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
 		gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
 		ext4_free_blks_set(sb, gdp,
 		ext4_free_blks_set(sb, gdp,
@@ -2830,8 +2860,9 @@ out_err:
 
 
 /*
 /*
  * here we normalize request for locality group
  * here we normalize request for locality group
- * Group request are normalized to s_strip size if we set the same via mount
- * option. If not we set it to s_mb_group_prealloc which can be configured via
+ * Group request are normalized to s_mb_group_prealloc, which goes to
+ * s_strip if we set the same via mount option.
+ * s_mb_group_prealloc can be configured via
  * /sys/fs/ext4/<partition>/mb_group_prealloc
  * /sys/fs/ext4/<partition>/mb_group_prealloc
  *
  *
  * XXX: should we try to preallocate more than the group has now?
  * XXX: should we try to preallocate more than the group has now?
@@ -2842,10 +2873,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
 	struct ext4_locality_group *lg = ac->ac_lg;
 	struct ext4_locality_group *lg = ac->ac_lg;
 
 
 	BUG_ON(lg == NULL);
 	BUG_ON(lg == NULL);
-	if (EXT4_SB(sb)->s_stripe)
-		ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
-	else
-		ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
+	ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
 	mb_debug(1, "#%u: goal %u blocks for locality group\n",
 	mb_debug(1, "#%u: goal %u blocks for locality group\n",
 		current->pid, ac->ac_g_ex.fe_len);
 		current->pid, ac->ac_g_ex.fe_len);
 }
 }
@@ -3001,9 +3029,10 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 
 
 	if (start + size <= ac->ac_o_ex.fe_logical &&
 	if (start + size <= ac->ac_o_ex.fe_logical &&
 			start > ac->ac_o_ex.fe_logical) {
 			start > ac->ac_o_ex.fe_logical) {
-		printk(KERN_ERR "start %lu, size %lu, fe_logical %lu\n",
-			(unsigned long) start, (unsigned long) size,
-			(unsigned long) ac->ac_o_ex.fe_logical);
+		ext4_msg(ac->ac_sb, KERN_ERR,
+			 "start %lu, size %lu, fe_logical %lu",
+			 (unsigned long) start, (unsigned long) size,
+			 (unsigned long) ac->ac_o_ex.fe_logical);
 	}
 	}
 	BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
 	BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
 			start > ac->ac_o_ex.fe_logical);
 			start > ac->ac_o_ex.fe_logical);
@@ -3262,7 +3291,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
 
 
 	while (n) {
 	while (n) {
 		entry = rb_entry(n, struct ext4_free_data, node);
 		entry = rb_entry(n, struct ext4_free_data, node);
-		mb_set_bits(bitmap, entry->start_blk, entry->count);
+		ext4_set_bits(bitmap, entry->start_blk, entry->count);
 		n = rb_next(n);
 		n = rb_next(n);
 	}
 	}
 	return;
 	return;
@@ -3304,7 +3333,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 		if (unlikely(len == 0))
 		if (unlikely(len == 0))
 			continue;
 			continue;
 		BUG_ON(groupnr != group);
 		BUG_ON(groupnr != group);
-		mb_set_bits(bitmap, start, len);
+		ext4_set_bits(bitmap, start, len);
 		preallocated += len;
 		preallocated += len;
 		count++;
 		count++;
 	}
 	}
@@ -3584,10 +3613,11 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 		bit = next + 1;
 		bit = next + 1;
 	}
 	}
 	if (free != pa->pa_free) {
 	if (free != pa->pa_free) {
-		printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
-			pa, (unsigned long) pa->pa_lstart,
-			(unsigned long) pa->pa_pstart,
-			(unsigned long) pa->pa_len);
+		ext4_msg(e4b->bd_sb, KERN_CRIT,
+			 "pa %p: logic %lu, phys. %lu, len %lu",
+			 pa, (unsigned long) pa->pa_lstart,
+			 (unsigned long) pa->pa_pstart,
+			 (unsigned long) pa->pa_len);
 		ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
 		ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
 					free, pa->pa_free);
 					free, pa->pa_free);
 		/*
 		/*
@@ -3775,7 +3805,8 @@ repeat:
 			 * use preallocation while we're discarding it */
 			 * use preallocation while we're discarding it */
 			spin_unlock(&pa->pa_lock);
 			spin_unlock(&pa->pa_lock);
 			spin_unlock(&ei->i_prealloc_lock);
 			spin_unlock(&ei->i_prealloc_lock);
-			printk(KERN_ERR "uh-oh! used pa while discarding\n");
+			ext4_msg(sb, KERN_ERR,
+				 "uh-oh! used pa while discarding");
 			WARN_ON(1);
 			WARN_ON(1);
 			schedule_timeout_uninterruptible(HZ);
 			schedule_timeout_uninterruptible(HZ);
 			goto repeat;
 			goto repeat;
@@ -3852,12 +3883,13 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 	    (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
 	    (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
 		return;
 		return;
 
 
-	printk(KERN_ERR "EXT4-fs: Can't allocate:"
-			" Allocation context details:\n");
-	printk(KERN_ERR "EXT4-fs: status %d flags %d\n",
+	ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:"
+			" Allocation context details:");
+	ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d",
 			ac->ac_status, ac->ac_flags);
 			ac->ac_status, ac->ac_flags);
-	printk(KERN_ERR "EXT4-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, "
-			"best %lu/%lu/%lu@%lu cr %d\n",
+	ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, "
+		 	"goal %lu/%lu/%lu@%lu, "
+			"best %lu/%lu/%lu@%lu cr %d",
 			(unsigned long)ac->ac_o_ex.fe_group,
 			(unsigned long)ac->ac_o_ex.fe_group,
 			(unsigned long)ac->ac_o_ex.fe_start,
 			(unsigned long)ac->ac_o_ex.fe_start,
 			(unsigned long)ac->ac_o_ex.fe_len,
 			(unsigned long)ac->ac_o_ex.fe_len,
@@ -3871,9 +3903,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 			(unsigned long)ac->ac_b_ex.fe_len,
 			(unsigned long)ac->ac_b_ex.fe_len,
 			(unsigned long)ac->ac_b_ex.fe_logical,
 			(unsigned long)ac->ac_b_ex.fe_logical,
 			(int)ac->ac_criteria);
 			(int)ac->ac_criteria);
-	printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
-		ac->ac_found);
-	printk(KERN_ERR "EXT4-fs: groups: \n");
+	ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found",
+		 ac->ac_ex_scanned, ac->ac_found);
+	ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: ");
 	ngroups = ext4_get_groups_count(sb);
 	ngroups = ext4_get_groups_count(sb);
 	for (i = 0; i < ngroups; i++) {
 	for (i = 0; i < ngroups; i++) {
 		struct ext4_group_info *grp = ext4_get_group_info(sb, i);
 		struct ext4_group_info *grp = ext4_get_group_info(sb, i);
@@ -4637,7 +4669,7 @@ do_more:
 	}
 	}
 	ext4_mark_super_dirty(sb);
 	ext4_mark_super_dirty(sb);
 error_return:
 error_return:
-	if (freed)
+	if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
 		dquot_free_block(inode, freed);
 		dquot_free_block(inode, freed);
 	brelse(bitmap_bh);
 	brelse(bitmap_bh);
 	ext4_std_error(sb, err);
 	ext4_std_error(sb, err);
@@ -4645,7 +4677,7 @@ error_return:
 }
 }
 
 
 /**
 /**
- * ext4_add_groupblocks() -- Add given blocks to an existing group
+ * ext4_group_add_blocks() -- Add given blocks to an existing group
  * @handle:			handle to this transaction
  * @handle:			handle to this transaction
  * @sb:				super block
  * @sb:				super block
  * @block:			start physcial block to add to the block group
  * @block:			start physcial block to add to the block group
@@ -4653,7 +4685,7 @@ error_return:
  *
  *
  * This marks the blocks as free in the bitmap and buddy.
  * This marks the blocks as free in the bitmap and buddy.
  */
  */
-void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
+int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
 			 ext4_fsblk_t block, unsigned long count)
 			 ext4_fsblk_t block, unsigned long count)
 {
 {
 	struct buffer_head *bitmap_bh = NULL;
 	struct buffer_head *bitmap_bh = NULL;
@@ -4666,25 +4698,35 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
 	struct ext4_buddy e4b;
 	struct ext4_buddy e4b;
 	int err = 0, ret, blk_free_count;
 	int err = 0, ret, blk_free_count;
 	ext4_grpblk_t blocks_freed;
 	ext4_grpblk_t blocks_freed;
-	struct ext4_group_info *grp;
 
 
 	ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
 	ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
 
 
+	if (count == 0)
+		return 0;
+
 	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
 	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
-	grp = ext4_get_group_info(sb, block_group);
 	/*
 	/*
 	 * Check to see if we are freeing blocks across a group
 	 * Check to see if we are freeing blocks across a group
 	 * boundary.
 	 * boundary.
 	 */
 	 */
-	if (bit + count > EXT4_BLOCKS_PER_GROUP(sb))
+	if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
+		ext4_warning(sb, "too much blocks added to group %u\n",
+			     block_group);
+		err = -EINVAL;
 		goto error_return;
 		goto error_return;
+	}
 
 
 	bitmap_bh = ext4_read_block_bitmap(sb, block_group);
 	bitmap_bh = ext4_read_block_bitmap(sb, block_group);
-	if (!bitmap_bh)
+	if (!bitmap_bh) {
+		err = -EIO;
 		goto error_return;
 		goto error_return;
+	}
+
 	desc = ext4_get_group_desc(sb, block_group, &gd_bh);
 	desc = ext4_get_group_desc(sb, block_group, &gd_bh);
-	if (!desc)
+	if (!desc) {
+		err = -EIO;
 		goto error_return;
 		goto error_return;
+	}
 
 
 	if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
 	if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
 	    in_range(ext4_inode_bitmap(sb, desc), block, count) ||
 	    in_range(ext4_inode_bitmap(sb, desc), block, count) ||
@@ -4694,6 +4736,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
 		ext4_error(sb, "Adding blocks in system zones - "
 		ext4_error(sb, "Adding blocks in system zones - "
 			   "Block = %llu, count = %lu",
 			   "Block = %llu, count = %lu",
 			   block, count);
 			   block, count);
+		err = -EINVAL;
 		goto error_return;
 		goto error_return;
 	}
 	}
 
 
@@ -4762,7 +4805,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
 error_return:
 error_return:
 	brelse(bitmap_bh);
 	brelse(bitmap_bh);
 	ext4_std_error(sb, err);
 	ext4_std_error(sb, err);
-	return;
+	return err;
 }
 }
 
 
 /**
 /**
@@ -4782,6 +4825,8 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count,
 {
 {
 	struct ext4_free_extent ex;
 	struct ext4_free_extent ex;
 
 
+	trace_ext4_trim_extent(sb, group, start, count);
+
 	assert_spin_locked(ext4_group_lock_ptr(sb, group));
 	assert_spin_locked(ext4_group_lock_ptr(sb, group));
 
 
 	ex.fe_start = start;
 	ex.fe_start = start;
@@ -4802,7 +4847,7 @@ static void ext4_trim_extent(struct super_block *sb, int start, int count,
 /**
 /**
  * ext4_trim_all_free -- function to trim all free space in alloc. group
  * ext4_trim_all_free -- function to trim all free space in alloc. group
  * @sb:			super block for file system
  * @sb:			super block for file system
- * @e4b:		ext4 buddy
+ * @group:		group to be trimmed
  * @start:		first group block to examine
  * @start:		first group block to examine
  * @max:		last group block to examine
  * @max:		last group block to examine
  * @minblocks:		minimum extent block count
  * @minblocks:		minimum extent block count
@@ -4823,10 +4868,12 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 		   ext4_grpblk_t minblocks)
 		   ext4_grpblk_t minblocks)
 {
 {
 	void *bitmap;
 	void *bitmap;
-	ext4_grpblk_t next, count = 0;
+	ext4_grpblk_t next, count = 0, free_count = 0;
 	struct ext4_buddy e4b;
 	struct ext4_buddy e4b;
 	int ret;
 	int ret;
 
 
+	trace_ext4_trim_all_free(sb, group, start, max);
+
 	ret = ext4_mb_load_buddy(sb, group, &e4b);
 	ret = ext4_mb_load_buddy(sb, group, &e4b);
 	if (ret) {
 	if (ret) {
 		ext4_error(sb, "Error in loading buddy "
 		ext4_error(sb, "Error in loading buddy "
@@ -4836,6 +4883,10 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 	bitmap = e4b.bd_bitmap;
 	bitmap = e4b.bd_bitmap;
 
 
 	ext4_lock_group(sb, group);
 	ext4_lock_group(sb, group);
+	if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
+	    minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
+		goto out;
+
 	start = (e4b.bd_info->bb_first_free > start) ?
 	start = (e4b.bd_info->bb_first_free > start) ?
 		e4b.bd_info->bb_first_free : start;
 		e4b.bd_info->bb_first_free : start;
 
 
@@ -4850,6 +4901,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 					 next - start, group, &e4b);
 					 next - start, group, &e4b);
 			count += next - start;
 			count += next - start;
 		}
 		}
+		free_count += next - start;
 		start = next + 1;
 		start = next + 1;
 
 
 		if (fatal_signal_pending(current)) {
 		if (fatal_signal_pending(current)) {
@@ -4863,9 +4915,13 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
 			ext4_lock_group(sb, group);
 			ext4_lock_group(sb, group);
 		}
 		}
 
 
-		if ((e4b.bd_info->bb_free - count) < minblocks)
+		if ((e4b.bd_info->bb_free - free_count) < minblocks)
 			break;
 			break;
 	}
 	}
+
+	if (!ret)
+		EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
+out:
 	ext4_unlock_group(sb, group);
 	ext4_unlock_group(sb, group);
 	ext4_mb_unload_buddy(&e4b);
 	ext4_mb_unload_buddy(&e4b);
 
 
@@ -4904,6 +4960,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 
 
 	if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
 	if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
 		return -EINVAL;
 		return -EINVAL;
+	if (start + len <= first_data_blk)
+		goto out;
 	if (start < first_data_blk) {
 	if (start < first_data_blk) {
 		len -= first_data_blk - start;
 		len -= first_data_blk - start;
 		start = first_data_blk;
 		start = first_data_blk;
@@ -4952,5 +5010,9 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 	}
 	}
 	range->len = trimmed * sb->s_blocksize;
 	range->len = trimmed * sb->s_blocksize;
 
 
+	if (!ret)
+		atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
+
+out:
 	return ret;
 	return ret;
 }
 }

+ 0 - 1
fs/ext4/mballoc.h

@@ -187,7 +187,6 @@ struct ext4_allocation_context {
 	__u16 ac_flags;		/* allocation hints */
 	__u16 ac_flags;		/* allocation hints */
 	__u8 ac_status;
 	__u8 ac_status;
 	__u8 ac_criteria;
 	__u8 ac_criteria;
-	__u8 ac_repeats;
 	__u8 ac_2order;		/* if request is to allocate 2^N blocks and
 	__u8 ac_2order;		/* if request is to allocate 2^N blocks and
 				 * N > 0, the field stores N, otherwise 0 */
 				 * N > 0, the field stores N, otherwise 0 */
 	__u8 ac_op;		/* operation, for history only */
 	__u8 ac_op;		/* operation, for history only */

+ 7 - 14
fs/ext4/namei.c

@@ -289,7 +289,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent
 				while (len--) printk("%c", *name++);
 				while (len--) printk("%c", *name++);
 				ext4fs_dirhash(de->name, de->name_len, &h);
 				ext4fs_dirhash(de->name, de->name_len, &h);
 				printk(":%x.%u ", h.hash,
 				printk(":%x.%u ", h.hash,
-				       ((char *) de - base));
+				       (unsigned) ((char *) de - base));
 			}
 			}
 			space += EXT4_DIR_REC_LEN(de->name_len);
 			space += EXT4_DIR_REC_LEN(de->name_len);
 			names++;
 			names++;
@@ -1013,7 +1013,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
 
 
 	*err = -ENOENT;
 	*err = -ENOENT;
 errout:
 errout:
-	dxtrace(printk(KERN_DEBUG "%s not found\n", name));
+	dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
 	dx_release (frames);
 	dx_release (frames);
 	return NULL;
 	return NULL;
 }
 }
@@ -1985,18 +1985,11 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 	if (!list_empty(&EXT4_I(inode)->i_orphan))
 	if (!list_empty(&EXT4_I(inode)->i_orphan))
 		goto out_unlock;
 		goto out_unlock;
 
 
-	/* Orphan handling is only valid for files with data blocks
-	 * being truncated, or files being unlinked. */
-
-	/* @@@ FIXME: Observation from aviro:
-	 * I think I can trigger J_ASSERT in ext4_orphan_add().  We block
-	 * here (on s_orphan_lock), so race with ext4_link() which might bump
-	 * ->i_nlink. For, say it, character device. Not a regular file,
-	 * not a directory, not a symlink and ->i_nlink > 0.
-	 *
-	 * tytso, 4/25/2009: I'm not sure how that could happen;
-	 * shouldn't the fs core protect us from these sort of
-	 * unlink()/link() races?
+	/*
+	 * Orphan handling is only valid for files with data blocks
+	 * being truncated, or files being unlinked. Note that we either
+	 * hold i_mutex, or the inode can not be referenced from outside,
+	 * so i_nlink should not be bumped due to race
 	 */
 	 */
 	J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 	J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 		  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
 		  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);

+ 1 - 5
fs/ext4/page-io.c

@@ -285,11 +285,7 @@ static int io_submit_init(struct ext4_io_submit *io,
 	io_end = ext4_init_io_end(inode, GFP_NOFS);
 	io_end = ext4_init_io_end(inode, GFP_NOFS);
 	if (!io_end)
 	if (!io_end)
 		return -ENOMEM;
 		return -ENOMEM;
-	do {
-		bio = bio_alloc(GFP_NOIO, nvecs);
-		nvecs >>= 1;
-	} while (bio == NULL);
-
+	bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
 	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio->bi_bdev = bh->b_bdev;
 	bio->bi_bdev = bh->b_bdev;
 	bio->bi_private = io->io_end = io_end;
 	bio->bi_private = io->io_end = io_end;

+ 103 - 96
fs/ext4/resize.c

@@ -16,6 +16,35 @@
 
 
 #include "ext4_jbd2.h"
 #include "ext4_jbd2.h"
 
 
+int ext4_resize_begin(struct super_block *sb)
+{
+	int ret = 0;
+
+	if (!capable(CAP_SYS_RESOURCE))
+		return -EPERM;
+
+	/*
+	 * We are not allowed to do online-resizing on a filesystem mounted
+	 * with error, because it can destroy the filesystem easily.
+	 */
+	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
+		ext4_warning(sb, "There are errors in the filesystem, "
+			     "so online resizing is not allowed\n");
+		return -EPERM;
+	}
+
+	if (test_and_set_bit_lock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags))
+		ret = -EBUSY;
+
+	return ret;
+}
+
+void ext4_resize_end(struct super_block *sb)
+{
+	clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags);
+	smp_mb__after_clear_bit();
+}
+
 #define outside(b, first, last)	((b) < (first) || (b) >= (last))
 #define outside(b, first, last)	((b) < (first) || (b) >= (last))
 #define inside(b, first, last)	((b) >= (first) && (b) < (last))
 #define inside(b, first, last)	((b) >= (first) && (b) < (last))
 
 
@@ -118,10 +147,8 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
 		brelse(bh);
 		brelse(bh);
 		bh = ERR_PTR(err);
 		bh = ERR_PTR(err);
 	} else {
 	} else {
-		lock_buffer(bh);
 		memset(bh->b_data, 0, sb->s_blocksize);
 		memset(bh->b_data, 0, sb->s_blocksize);
 		set_buffer_uptodate(bh);
 		set_buffer_uptodate(bh);
-		unlock_buffer(bh);
 	}
 	}
 
 
 	return bh;
 	return bh;
@@ -132,8 +159,7 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
  * If that fails, restart the transaction & regain write access for the
  * If that fails, restart the transaction & regain write access for the
  * buffer head which is used for block_bitmap modifications.
  * buffer head which is used for block_bitmap modifications.
  */
  */
-static int extend_or_restart_transaction(handle_t *handle, int thresh,
-					 struct buffer_head *bh)
+static int extend_or_restart_transaction(handle_t *handle, int thresh)
 {
 {
 	int err;
 	int err;
 
 
@@ -144,9 +170,8 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
 	if (err < 0)
 	if (err < 0)
 		return err;
 		return err;
 	if (err) {
 	if (err) {
-		if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
-			return err;
-		if ((err = ext4_journal_get_write_access(handle, bh)))
+		err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA);
+		if (err)
 			return err;
 			return err;
 	}
 	}
 
 
@@ -181,21 +206,7 @@ static int setup_new_group_blocks(struct super_block *sb,
 	if (IS_ERR(handle))
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 		return PTR_ERR(handle);
 
 
-	mutex_lock(&sbi->s_resize_lock);
-	if (input->group != sbi->s_groups_count) {
-		err = -EBUSY;
-		goto exit_journal;
-	}
-
-	if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) {
-		err = PTR_ERR(bh);
-		goto exit_journal;
-	}
-
-	if (ext4_bg_has_super(sb, input->group)) {
-		ext4_debug("mark backup superblock %#04llx (+0)\n", start);
-		ext4_set_bit(0, bh->b_data);
-	}
+	BUG_ON(input->group != sbi->s_groups_count);
 
 
 	/* Copy all of the GDT blocks into the backup in this group */
 	/* Copy all of the GDT blocks into the backup in this group */
 	for (i = 0, bit = 1, block = start + 1;
 	for (i = 0, bit = 1, block = start + 1;
@@ -203,29 +214,26 @@ static int setup_new_group_blocks(struct super_block *sb,
 		struct buffer_head *gdb;
 		struct buffer_head *gdb;
 
 
 		ext4_debug("update backup group %#04llx (+%d)\n", block, bit);
 		ext4_debug("update backup group %#04llx (+%d)\n", block, bit);
-
-		if ((err = extend_or_restart_transaction(handle, 1, bh)))
-			goto exit_bh;
+		err = extend_or_restart_transaction(handle, 1);
+		if (err)
+			goto exit_journal;
 
 
 		gdb = sb_getblk(sb, block);
 		gdb = sb_getblk(sb, block);
 		if (!gdb) {
 		if (!gdb) {
 			err = -EIO;
 			err = -EIO;
-			goto exit_bh;
+			goto exit_journal;
 		}
 		}
 		if ((err = ext4_journal_get_write_access(handle, gdb))) {
 		if ((err = ext4_journal_get_write_access(handle, gdb))) {
 			brelse(gdb);
 			brelse(gdb);
-			goto exit_bh;
+			goto exit_journal;
 		}
 		}
-		lock_buffer(gdb);
 		memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
 		memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size);
 		set_buffer_uptodate(gdb);
 		set_buffer_uptodate(gdb);
-		unlock_buffer(gdb);
 		err = ext4_handle_dirty_metadata(handle, NULL, gdb);
 		err = ext4_handle_dirty_metadata(handle, NULL, gdb);
 		if (unlikely(err)) {
 		if (unlikely(err)) {
 			brelse(gdb);
 			brelse(gdb);
-			goto exit_bh;
+			goto exit_journal;
 		}
 		}
-		ext4_set_bit(bit, bh->b_data);
 		brelse(gdb);
 		brelse(gdb);
 	}
 	}
 
 
@@ -235,9 +243,22 @@ static int setup_new_group_blocks(struct super_block *sb,
 	err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
 	err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
 			       GFP_NOFS);
 			       GFP_NOFS);
 	if (err)
 	if (err)
-		goto exit_bh;
-	for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++)
-		ext4_set_bit(bit, bh->b_data);
+		goto exit_journal;
+
+	err = extend_or_restart_transaction(handle, 2);
+	if (err)
+		goto exit_journal;
+
+	bh = bclean(handle, sb, input->block_bitmap);
+	if (IS_ERR(bh)) {
+		err = PTR_ERR(bh);
+		goto exit_journal;
+	}
+
+	if (ext4_bg_has_super(sb, input->group)) {
+		ext4_debug("mark backup group tables %#04llx (+0)\n", start);
+		ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1);
+	}
 
 
 	ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
 	ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
 		   input->block_bitmap - start);
 		   input->block_bitmap - start);
@@ -253,12 +274,9 @@ static int setup_new_group_blocks(struct super_block *sb,
 	err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
 	err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
 	if (err)
 	if (err)
 		goto exit_bh;
 		goto exit_bh;
-	for (i = 0, bit = input->inode_table - start;
-	     i < sbi->s_itb_per_group; i++, bit++)
-		ext4_set_bit(bit, bh->b_data);
+	ext4_set_bits(bh->b_data, input->inode_table - start,
+		      sbi->s_itb_per_group);
 
 
-	if ((err = extend_or_restart_transaction(handle, 2, bh)))
-		goto exit_bh;
 
 
 	ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
 	ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
 			     bh->b_data);
 			     bh->b_data);
@@ -285,7 +303,6 @@ exit_bh:
 	brelse(bh);
 	brelse(bh);
 
 
 exit_journal:
 exit_journal:
-	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext4_journal_stop(handle)) && !err)
 	if ((err2 = ext4_journal_stop(handle)) && !err)
 		err = err2;
 		err = err2;
 
 
@@ -377,15 +394,15 @@ static int verify_reserved_gdb(struct super_block *sb,
  * fail once we start modifying the data on disk, because JBD has no rollback.
  * fail once we start modifying the data on disk, because JBD has no rollback.
  */
  */
 static int add_new_gdb(handle_t *handle, struct inode *inode,
 static int add_new_gdb(handle_t *handle, struct inode *inode,
-		       struct ext4_new_group_data *input,
-		       struct buffer_head **primary)
+		       ext4_group_t group)
 {
 {
 	struct super_block *sb = inode->i_sb;
 	struct super_block *sb = inode->i_sb;
 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-	unsigned long gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
+	unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
 	ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
 	ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
 	struct buffer_head **o_group_desc, **n_group_desc;
 	struct buffer_head **o_group_desc, **n_group_desc;
 	struct buffer_head *dind;
 	struct buffer_head *dind;
+	struct buffer_head *gdb_bh;
 	int gdbackups;
 	int gdbackups;
 	struct ext4_iloc iloc;
 	struct ext4_iloc iloc;
 	__le32 *data;
 	__le32 *data;
@@ -408,11 +425,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 		return -EPERM;
 		return -EPERM;
 	}
 	}
 
 
-	*primary = sb_bread(sb, gdblock);
-	if (!*primary)
+	gdb_bh = sb_bread(sb, gdblock);
+	if (!gdb_bh)
 		return -EIO;
 		return -EIO;
 
 
-	if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) {
+	gdbackups = verify_reserved_gdb(sb, gdb_bh);
+	if (gdbackups < 0) {
 		err = gdbackups;
 		err = gdbackups;
 		goto exit_bh;
 		goto exit_bh;
 	}
 	}
@@ -427,7 +445,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	data = (__le32 *)dind->b_data;
 	data = (__le32 *)dind->b_data;
 	if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
 	if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
 		ext4_warning(sb, "new group %u GDT block %llu not reserved",
 		ext4_warning(sb, "new group %u GDT block %llu not reserved",
-			     input->group, gdblock);
+			     group, gdblock);
 		err = -EINVAL;
 		err = -EINVAL;
 		goto exit_dind;
 		goto exit_dind;
 	}
 	}
@@ -436,7 +454,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	if (unlikely(err))
 	if (unlikely(err))
 		goto exit_dind;
 		goto exit_dind;
 
 
-	err = ext4_journal_get_write_access(handle, *primary);
+	err = ext4_journal_get_write_access(handle, gdb_bh);
 	if (unlikely(err))
 	if (unlikely(err))
 		goto exit_sbh;
 		goto exit_sbh;
 
 
@@ -449,12 +467,13 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	if (unlikely(err))
 	if (unlikely(err))
 		goto exit_dindj;
 		goto exit_dindj;
 
 
-	n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
-			GFP_NOFS);
+	n_group_desc = ext4_kvmalloc((gdb_num + 1) *
+				     sizeof(struct buffer_head *),
+				     GFP_NOFS);
 	if (!n_group_desc) {
 	if (!n_group_desc) {
 		err = -ENOMEM;
 		err = -ENOMEM;
-		ext4_warning(sb,
-			      "not enough memory for %lu groups", gdb_num + 1);
+		ext4_warning(sb, "not enough memory for %lu groups",
+			     gdb_num + 1);
 		goto exit_inode;
 		goto exit_inode;
 	}
 	}
 
 
@@ -475,8 +494,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	}
 	}
 	inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
 	inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
 	ext4_mark_iloc_dirty(handle, inode, &iloc);
 	ext4_mark_iloc_dirty(handle, inode, &iloc);
-	memset((*primary)->b_data, 0, sb->s_blocksize);
-	err = ext4_handle_dirty_metadata(handle, NULL, *primary);
+	memset(gdb_bh->b_data, 0, sb->s_blocksize);
+	err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
 	if (unlikely(err)) {
 	if (unlikely(err)) {
 		ext4_std_error(sb, err);
 		ext4_std_error(sb, err);
 		goto exit_inode;
 		goto exit_inode;
@@ -486,10 +505,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	o_group_desc = EXT4_SB(sb)->s_group_desc;
 	o_group_desc = EXT4_SB(sb)->s_group_desc;
 	memcpy(n_group_desc, o_group_desc,
 	memcpy(n_group_desc, o_group_desc,
 	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
 	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
-	n_group_desc[gdb_num] = *primary;
+	n_group_desc[gdb_num] = gdb_bh;
 	EXT4_SB(sb)->s_group_desc = n_group_desc;
 	EXT4_SB(sb)->s_group_desc = n_group_desc;
 	EXT4_SB(sb)->s_gdb_count++;
 	EXT4_SB(sb)->s_gdb_count++;
-	kfree(o_group_desc);
+	ext4_kvfree(o_group_desc);
 
 
 	le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
 	le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
 	err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
 	err = ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
@@ -499,6 +518,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	return err;
 	return err;
 
 
 exit_inode:
 exit_inode:
+	ext4_kvfree(n_group_desc);
 	/* ext4_handle_release_buffer(handle, iloc.bh); */
 	/* ext4_handle_release_buffer(handle, iloc.bh); */
 	brelse(iloc.bh);
 	brelse(iloc.bh);
 exit_dindj:
 exit_dindj:
@@ -508,7 +528,7 @@ exit_sbh:
 exit_dind:
 exit_dind:
 	brelse(dind);
 	brelse(dind);
 exit_bh:
 exit_bh:
-	brelse(*primary);
+	brelse(gdb_bh);
 
 
 	ext4_debug("leaving with error %d\n", err);
 	ext4_debug("leaving with error %d\n", err);
 	return err;
 	return err;
@@ -528,7 +548,7 @@ exit_bh:
  * backup GDT blocks are stored in their reserved primary GDT block.
  * backup GDT blocks are stored in their reserved primary GDT block.
  */
  */
 static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
-			      struct ext4_new_group_data *input)
+			      ext4_group_t group)
 {
 {
 	struct super_block *sb = inode->i_sb;
 	struct super_block *sb = inode->i_sb;
 	int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
 	int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
@@ -599,7 +619,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	 * Finally we can add each of the reserved backup GDT blocks from
 	 * Finally we can add each of the reserved backup GDT blocks from
 	 * the new group to its reserved primary GDT block.
 	 * the new group to its reserved primary GDT block.
 	 */
 	 */
-	blk = input->group * EXT4_BLOCKS_PER_GROUP(sb);
+	blk = group * EXT4_BLOCKS_PER_GROUP(sb);
 	for (i = 0; i < reserved_gdb; i++) {
 	for (i = 0; i < reserved_gdb; i++) {
 		int err2;
 		int err2;
 		data = (__le32 *)primary[i]->b_data;
 		data = (__le32 *)primary[i]->b_data;
@@ -799,13 +819,6 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 		goto exit_put;
 		goto exit_put;
 	}
 	}
 
 
-	mutex_lock(&sbi->s_resize_lock);
-	if (input->group != sbi->s_groups_count) {
-		ext4_warning(sb, "multiple resizers run on filesystem!");
-		err = -EBUSY;
-		goto exit_journal;
-	}
-
 	if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
 	if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
 		goto exit_journal;
 		goto exit_journal;
 
 
@@ -820,16 +833,25 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 		if ((err = ext4_journal_get_write_access(handle, primary)))
 		if ((err = ext4_journal_get_write_access(handle, primary)))
 			goto exit_journal;
 			goto exit_journal;
 
 
-		if (reserved_gdb && ext4_bg_num_gdb(sb, input->group) &&
-		    (err = reserve_backup_gdb(handle, inode, input)))
+		if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) {
+			err = reserve_backup_gdb(handle, inode, input->group);
+			if (err)
+				goto exit_journal;
+		}
+	} else {
+		/*
+		 * Note that we can access new group descriptor block safely
+		 * only if add_new_gdb() succeeds.
+		 */
+		err = add_new_gdb(handle, inode, input->group);
+		if (err)
 			goto exit_journal;
 			goto exit_journal;
-	} else if ((err = add_new_gdb(handle, inode, input, &primary)))
-		goto exit_journal;
+		primary = sbi->s_group_desc[gdb_num];
+	}
 
 
         /*
         /*
          * OK, now we've set up the new group.  Time to make it active.
          * OK, now we've set up the new group.  Time to make it active.
          *
          *
-         * We do not lock all allocations via s_resize_lock
          * so we have to be safe wrt. concurrent accesses the group
          * so we have to be safe wrt. concurrent accesses the group
          * data.  So we need to be careful to set all of the relevant
          * data.  So we need to be careful to set all of the relevant
          * group descriptor data etc. *before* we enable the group.
          * group descriptor data etc. *before* we enable the group.
@@ -886,13 +908,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	 *
 	 *
 	 * The precise rules we use are:
 	 * The precise rules we use are:
 	 *
 	 *
-	 * * Writers of s_groups_count *must* hold s_resize_lock
-	 * AND
 	 * * Writers must perform a smp_wmb() after updating all dependent
 	 * * Writers must perform a smp_wmb() after updating all dependent
 	 *   data and before modifying the groups count
 	 *   data and before modifying the groups count
 	 *
 	 *
-	 * * Readers must hold s_resize_lock over the access
-	 * OR
 	 * * Readers must perform an smp_rmb() after reading the groups count
 	 * * Readers must perform an smp_rmb() after reading the groups count
 	 *   and before reading any dependent data.
 	 *   and before reading any dependent data.
 	 *
 	 *
@@ -937,10 +955,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	ext4_handle_dirty_super(handle, sb);
 	ext4_handle_dirty_super(handle, sb);
 
 
 exit_journal:
 exit_journal:
-	mutex_unlock(&sbi->s_resize_lock);
 	if ((err2 = ext4_journal_stop(handle)) && !err)
 	if ((err2 = ext4_journal_stop(handle)) && !err)
 		err = err2;
 		err = err2;
-	if (!err) {
+	if (!err && primary) {
 		update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
 		update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
 			       sizeof(struct ext4_super_block));
 			       sizeof(struct ext4_super_block));
 		update_backups(sb, primary->b_blocknr, primary->b_data,
 		update_backups(sb, primary->b_blocknr, primary->b_data,
@@ -969,16 +986,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	ext4_grpblk_t add;
 	ext4_grpblk_t add;
 	struct buffer_head *bh;
 	struct buffer_head *bh;
 	handle_t *handle;
 	handle_t *handle;
-	int err;
+	int err, err2;
 	ext4_group_t group;
 	ext4_group_t group;
 
 
-	/* We don't need to worry about locking wrt other resizers just
-	 * yet: we're going to revalidate es->s_blocks_count after
-	 * taking the s_resize_lock below. */
 	o_blocks_count = ext4_blocks_count(es);
 	o_blocks_count = ext4_blocks_count(es);
 
 
 	if (test_opt(sb, DEBUG))
 	if (test_opt(sb, DEBUG))
-		printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n",
+		printk(KERN_DEBUG "EXT4-fs: extending last group from %llu to %llu blocks\n",
 		       o_blocks_count, n_blocks_count);
 		       o_blocks_count, n_blocks_count);
 
 
 	if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
 	if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
@@ -995,7 +1009,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 
 
 	if (n_blocks_count < o_blocks_count) {
 	if (n_blocks_count < o_blocks_count) {
 		ext4_warning(sb, "can't shrink FS - resize aborted");
 		ext4_warning(sb, "can't shrink FS - resize aborted");
-		return -EBUSY;
+		return -EINVAL;
 	}
 	}
 
 
 	/* Handle the remaining blocks in the last group only. */
 	/* Handle the remaining blocks in the last group only. */
@@ -1038,32 +1052,25 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 		goto exit_put;
 		goto exit_put;
 	}
 	}
 
 
-	mutex_lock(&EXT4_SB(sb)->s_resize_lock);
-	if (o_blocks_count != ext4_blocks_count(es)) {
-		ext4_warning(sb, "multiple resizers run on filesystem!");
-		mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
-		ext4_journal_stop(handle);
-		err = -EBUSY;
-		goto exit_put;
-	}
-
 	if ((err = ext4_journal_get_write_access(handle,
 	if ((err = ext4_journal_get_write_access(handle,
 						 EXT4_SB(sb)->s_sbh))) {
 						 EXT4_SB(sb)->s_sbh))) {
 		ext4_warning(sb, "error %d on journal write access", err);
 		ext4_warning(sb, "error %d on journal write access", err);
-		mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
 		ext4_journal_stop(handle);
 		ext4_journal_stop(handle);
 		goto exit_put;
 		goto exit_put;
 	}
 	}
 	ext4_blocks_count_set(es, o_blocks_count + add);
 	ext4_blocks_count_set(es, o_blocks_count + add);
-	mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
 	ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
 	ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
 		   o_blocks_count + add);
 		   o_blocks_count + add);
 	/* We add the blocks to the bitmap and set the group need init bit */
 	/* We add the blocks to the bitmap and set the group need init bit */
-	ext4_add_groupblocks(handle, sb, o_blocks_count, add);
+	err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
 	ext4_handle_dirty_super(handle, sb);
 	ext4_handle_dirty_super(handle, sb);
 	ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
 	ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
 		   o_blocks_count + add);
 		   o_blocks_count + add);
-	if ((err = ext4_journal_stop(handle)))
+	err2 = ext4_journal_stop(handle);
+	if (!err && err2)
+		err = err2;
+
+	if (err)
 		goto exit_put;
 		goto exit_put;
 
 
 	if (test_opt(sb, DEBUG))
 	if (test_opt(sb, DEBUG))

+ 58 - 30
fs/ext4/super.c

@@ -110,6 +110,35 @@ static struct file_system_type ext3_fs_type = {
 #define IS_EXT3_SB(sb) (0)
 #define IS_EXT3_SB(sb) (0)
 #endif
 #endif
 
 
+void *ext4_kvmalloc(size_t size, gfp_t flags)
+{
+	void *ret;
+
+	ret = kmalloc(size, flags);
+	if (!ret)
+		ret = __vmalloc(size, flags, PAGE_KERNEL);
+	return ret;
+}
+
+void *ext4_kvzalloc(size_t size, gfp_t flags)
+{
+	void *ret;
+
+	ret = kmalloc(size, flags);
+	if (!ret)
+		ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL);
+	return ret;
+}
+
+void ext4_kvfree(void *ptr)
+{
+	if (is_vmalloc_addr(ptr))
+		vfree(ptr);
+	else
+		kfree(ptr);
+
+}
+
 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
 			       struct ext4_group_desc *bg)
 			       struct ext4_group_desc *bg)
 {
 {
@@ -269,6 +298,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 	journal_t *journal;
 	journal_t *journal;
 	handle_t  *handle;
 	handle_t  *handle;
 
 
+	trace_ext4_journal_start(sb, nblocks, _RET_IP_);
 	if (sb->s_flags & MS_RDONLY)
 	if (sb->s_flags & MS_RDONLY)
 		return ERR_PTR(-EROFS);
 		return ERR_PTR(-EROFS);
 
 
@@ -789,11 +819,8 @@ static void ext4_put_super(struct super_block *sb)
 
 
 	for (i = 0; i < sbi->s_gdb_count; i++)
 	for (i = 0; i < sbi->s_gdb_count; i++)
 		brelse(sbi->s_group_desc[i]);
 		brelse(sbi->s_group_desc[i]);
-	kfree(sbi->s_group_desc);
-	if (is_vmalloc_addr(sbi->s_flex_groups))
-		vfree(sbi->s_flex_groups);
-	else
-		kfree(sbi->s_flex_groups);
+	ext4_kvfree(sbi->s_group_desc);
+	ext4_kvfree(sbi->s_flex_groups);
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -1976,15 +2003,11 @@ static int ext4_fill_flex_info(struct super_block *sb)
 			((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
 			((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
 			      EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
 			      EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
 	size = flex_group_count * sizeof(struct flex_groups);
 	size = flex_group_count * sizeof(struct flex_groups);
-	sbi->s_flex_groups = kzalloc(size, GFP_KERNEL);
+	sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL);
 	if (sbi->s_flex_groups == NULL) {
 	if (sbi->s_flex_groups == NULL) {
-		sbi->s_flex_groups = vzalloc(size);
-		if (sbi->s_flex_groups == NULL) {
-			ext4_msg(sb, KERN_ERR,
-				 "not enough memory for %u flex groups",
-				 flex_group_count);
-			goto failed;
-		}
+		ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups",
+			 flex_group_count);
+		goto failed;
 	}
 	}
 
 
 	for (i = 0; i < sbi->s_groups_count; i++) {
 	for (i = 0; i < sbi->s_groups_count; i++) {
@@ -2383,17 +2406,25 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
 	unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
 	unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
 	unsigned long stripe_width =
 	unsigned long stripe_width =
 			le32_to_cpu(sbi->s_es->s_raid_stripe_width);
 			le32_to_cpu(sbi->s_es->s_raid_stripe_width);
+	int ret;
 
 
 	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
 	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
-		return sbi->s_stripe;
-
-	if (stripe_width <= sbi->s_blocks_per_group)
-		return stripe_width;
+		ret = sbi->s_stripe;
+	else if (stripe_width <= sbi->s_blocks_per_group)
+		ret = stripe_width;
+	else if (stride <= sbi->s_blocks_per_group)
+		ret = stride;
+	else
+		ret = 0;
 
 
-	if (stride <= sbi->s_blocks_per_group)
-		return stride;
+	/*
+	 * If the stripe width is 1, this makes no sense and
+	 * we set it to 0 to turn off stripe handling code.
+	 */
+	if (ret <= 1)
+		ret = 0;
 
 
-	return 0;
+	return ret;
 }
 }
 
 
 /* sysfs supprt */
 /* sysfs supprt */
@@ -3408,8 +3439,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
 			(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
 	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
 	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
 		   EXT4_DESC_PER_BLOCK(sb);
 		   EXT4_DESC_PER_BLOCK(sb);
-	sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
-				    GFP_KERNEL);
+	sbi->s_group_desc = ext4_kvmalloc(db_count *
+					  sizeof(struct buffer_head *),
+					  GFP_KERNEL);
 	if (sbi->s_group_desc == NULL) {
 	if (sbi->s_group_desc == NULL) {
 		ext4_msg(sb, KERN_ERR, "not enough memory");
 		ext4_msg(sb, KERN_ERR, "not enough memory");
 		goto failed_mount;
 		goto failed_mount;
@@ -3491,7 +3523,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
 
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
 	mutex_init(&sbi->s_orphan_lock);
 	mutex_init(&sbi->s_orphan_lock);
-	mutex_init(&sbi->s_resize_lock);
+	sbi->s_resize_flags = 0;
 
 
 	sb->s_root = NULL;
 	sb->s_root = NULL;
 
 
@@ -3741,12 +3773,8 @@ failed_mount_wq:
 	}
 	}
 failed_mount3:
 failed_mount3:
 	del_timer(&sbi->s_err_report);
 	del_timer(&sbi->s_err_report);
-	if (sbi->s_flex_groups) {
-		if (is_vmalloc_addr(sbi->s_flex_groups))
-			vfree(sbi->s_flex_groups);
-		else
-			kfree(sbi->s_flex_groups);
-	}
+	if (sbi->s_flex_groups)
+		ext4_kvfree(sbi->s_flex_groups);
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
 	percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -3756,7 +3784,7 @@ failed_mount3:
 failed_mount2:
 failed_mount2:
 	for (i = 0; i < db_count; i++)
 	for (i = 0; i < db_count; i++)
 		brelse(sbi->s_group_desc[i]);
 		brelse(sbi->s_group_desc[i]);
-	kfree(sbi->s_group_desc);
+	ext4_kvfree(sbi->s_group_desc);
 failed_mount:
 failed_mount:
 	if (sbi->s_proc) {
 	if (sbi->s_proc) {
 		remove_proc_entry(sb->s_id, ext4_proc_root);
 		remove_proc_entry(sb->s_id, ext4_proc_root);

+ 43 - 0
fs/ext4/truncate.h

@@ -0,0 +1,43 @@
+/*
+ * linux/fs/ext4/truncate.h
+ *
+ * Common inline functions needed for truncate support
+ */
+
+/*
+ * Truncate blocks that were not used by write. We have to truncate the
+ * pagecache as well so that corresponding buffers get properly unmapped.
+ */
+static inline void ext4_truncate_failed_write(struct inode *inode)
+{
+	truncate_inode_pages(inode->i_mapping, inode->i_size);
+	ext4_truncate(inode);
+}
+
+/*
+ * Work out how many blocks we need to proceed with the next chunk of a
+ * truncate transaction.
+ */
+static inline unsigned long ext4_blocks_for_truncate(struct inode *inode)
+{
+	ext4_lblk_t needed;
+
+	needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);
+
+	/* Give ourselves just enough room to cope with inodes in which
+	 * i_blocks is corrupt: we've seen disk corruptions in the past
+	 * which resulted in random data in an inode which looked enough
+	 * like a regular file for ext4 to try to delete it.  Things
+	 * will go a bit crazy if that happens, but at least we should
+	 * try not to panic the whole kernel. */
+	if (needed < 2)
+		needed = 2;
+
+	/* But we need to bound the transaction so we don't overflow the
+	 * journal. */
+	if (needed > EXT4_MAX_TRANS_DATA)
+		needed = EXT4_MAX_TRANS_DATA;
+
+	return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
+}
+

+ 4 - 9
fs/generic_acl.c

@@ -82,18 +82,14 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value,
 			return PTR_ERR(acl);
 			return PTR_ERR(acl);
 	}
 	}
 	if (acl) {
 	if (acl) {
-		mode_t mode;
-
 		error = posix_acl_valid(acl);
 		error = posix_acl_valid(acl);
 		if (error)
 		if (error)
 			goto failed;
 			goto failed;
 		switch (type) {
 		switch (type) {
 		case ACL_TYPE_ACCESS:
 		case ACL_TYPE_ACCESS:
-			mode = inode->i_mode;
-			error = posix_acl_equiv_mode(acl, &mode);
+			error = posix_acl_equiv_mode(acl, &inode->i_mode);
 			if (error < 0)
 			if (error < 0)
 				goto failed;
 				goto failed;
-			inode->i_mode = mode;
 			inode->i_ctime = CURRENT_TIME;
 			inode->i_ctime = CURRENT_TIME;
 			if (error == 0) {
 			if (error == 0) {
 				posix_acl_release(acl);
 				posix_acl_release(acl);
@@ -125,21 +121,20 @@ int
 generic_acl_init(struct inode *inode, struct inode *dir)
 generic_acl_init(struct inode *inode, struct inode *dir)
 {
 {
 	struct posix_acl *acl = NULL;
 	struct posix_acl *acl = NULL;
-	mode_t mode = inode->i_mode;
 	int error;
 	int error;
 
 
-	inode->i_mode = mode & ~current_umask();
 	if (!S_ISLNK(inode->i_mode))
 	if (!S_ISLNK(inode->i_mode))
 		acl = get_cached_acl(dir, ACL_TYPE_DEFAULT);
 		acl = get_cached_acl(dir, ACL_TYPE_DEFAULT);
 	if (acl) {
 	if (acl) {
 		if (S_ISDIR(inode->i_mode))
 		if (S_ISDIR(inode->i_mode))
 			set_cached_acl(inode, ACL_TYPE_DEFAULT, acl);
 			set_cached_acl(inode, ACL_TYPE_DEFAULT, acl);
-		error = posix_acl_create(&acl, GFP_KERNEL, &mode);
+		error = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
 		if (error < 0)
 		if (error < 0)
 			return error;
 			return error;
-		inode->i_mode = mode;
 		if (error > 0)
 		if (error > 0)
 			set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
 			set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
+	} else {
+		inode->i_mode &= ~current_umask();
 	}
 	}
 	error = 0;
 	error = 0;
 
 

+ 3 - 3
fs/gfs2/acl.c

@@ -72,7 +72,7 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type)
 	return gfs2_acl_get(GFS2_I(inode), type);
 	return gfs2_acl_get(GFS2_I(inode), type);
 }
 }
 
 
-static int gfs2_set_mode(struct inode *inode, mode_t mode)
+static int gfs2_set_mode(struct inode *inode, umode_t mode)
 {
 {
 	int error = 0;
 	int error = 0;
 
 
@@ -117,7 +117,7 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode)
 {
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct posix_acl *acl;
 	struct posix_acl *acl;
-	mode_t mode = inode->i_mode;
+	umode_t mode = inode->i_mode;
 	int error = 0;
 	int error = 0;
 
 
 	if (!sdp->sd_args.ar_posix_acl)
 	if (!sdp->sd_args.ar_posix_acl)
@@ -276,7 +276,7 @@ static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
 		goto out_release;
 		goto out_release;
 
 
 	if (type == ACL_TYPE_ACCESS) {
 	if (type == ACL_TYPE_ACCESS) {
-		mode_t mode = inode->i_mode;
+		umode_t mode = inode->i_mode;
 		error = posix_acl_equiv_mode(acl, &mode);
 		error = posix_acl_equiv_mode(acl, &mode);
 
 
 		if (error <= 0) {
 		if (error <= 0) {

+ 1 - 0
fs/hppfs/hppfs.c

@@ -16,6 +16,7 @@
 #include <linux/statfs.h>
 #include <linux/statfs.h>
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/pid_namespace.h>
 #include <linux/pid_namespace.h>
+#include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 #include "os.h"
 #include "os.h"
 
 

+ 8 - 5
fs/inode.c

@@ -399,12 +399,12 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval)
 EXPORT_SYMBOL(__insert_inode_hash);
 EXPORT_SYMBOL(__insert_inode_hash);
 
 
 /**
 /**
- *	remove_inode_hash - remove an inode from the hash
+ *	__remove_inode_hash - remove an inode from the hash
  *	@inode: inode to unhash
  *	@inode: inode to unhash
  *
  *
  *	Remove an inode from the superblock.
  *	Remove an inode from the superblock.
  */
  */
-void remove_inode_hash(struct inode *inode)
+void __remove_inode_hash(struct inode *inode)
 {
 {
 	spin_lock(&inode_hash_lock);
 	spin_lock(&inode_hash_lock);
 	spin_lock(&inode->i_lock);
 	spin_lock(&inode->i_lock);
@@ -412,7 +412,7 @@ void remove_inode_hash(struct inode *inode)
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode_hash_lock);
 	spin_unlock(&inode_hash_lock);
 }
 }
-EXPORT_SYMBOL(remove_inode_hash);
+EXPORT_SYMBOL(__remove_inode_hash);
 
 
 void end_writeback(struct inode *inode)
 void end_writeback(struct inode *inode)
 {
 {
@@ -454,7 +454,9 @@ static void evict(struct inode *inode)
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(!(inode->i_state & I_FREEING));
 	BUG_ON(!list_empty(&inode->i_lru));
 	BUG_ON(!list_empty(&inode->i_lru));
 
 
-	inode_wb_list_del(inode);
+	if (!list_empty(&inode->i_wb_list))
+		inode_wb_list_del(inode);
+
 	inode_sb_list_del(inode);
 	inode_sb_list_del(inode);
 
 
 	if (op->evict_inode) {
 	if (op->evict_inode) {
@@ -1328,7 +1330,8 @@ static void iput_final(struct inode *inode)
 	}
 	}
 
 
 	inode->i_state |= I_FREEING;
 	inode->i_state |= I_FREEING;
-	inode_lru_list_del(inode);
+	if (!list_empty(&inode->i_lru))
+		inode_lru_list_del(inode);
 	spin_unlock(&inode->i_lock);
 	spin_unlock(&inode->i_lock);
 
 
 	evict(inode);
 	evict(inode);

+ 4 - 1
fs/jbd2/checkpoint.c

@@ -257,9 +257,12 @@ static void
 __flush_batch(journal_t *journal, int *batch_count)
 __flush_batch(journal_t *journal, int *batch_count)
 {
 {
 	int i;
 	int i;
+	struct blk_plug plug;
 
 
+	blk_start_plug(&plug);
 	for (i = 0; i < *batch_count; i++)
 	for (i = 0; i < *batch_count; i++)
-		write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE);
+		write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE_SYNC);
+	blk_finish_plug(&plug);
 
 
 	for (i = 0; i < *batch_count; i++) {
 	for (i = 0; i < *batch_count; i++) {
 		struct buffer_head *bh = journal->j_chkpt_bhs[i];
 		struct buffer_head *bh = journal->j_chkpt_bhs[i];

+ 0 - 67
fs/jbd2/journal.c

@@ -2390,73 +2390,6 @@ static void __exit journal_exit(void)
 	jbd2_journal_destroy_caches();
 	jbd2_journal_destroy_caches();
 }
 }
 
 
-/* 
- * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 
- * tracing infrastructure to map a dev_t to a device name.
- *
- * The caller should use rcu_read_lock() in order to make sure the
- * device name stays valid until its done with it.  We use
- * rcu_read_lock() as well to make sure we're safe in case the caller
- * gets sloppy, and because rcu_read_lock() is cheap and can be safely
- * nested.
- */
-struct devname_cache {
-	struct rcu_head	rcu;
-	dev_t		device;
-	char		devname[BDEVNAME_SIZE];
-};
-#define CACHE_SIZE_BITS 6
-static struct devname_cache *devcache[1 << CACHE_SIZE_BITS];
-static DEFINE_SPINLOCK(devname_cache_lock);
-
-static void free_devcache(struct rcu_head *rcu)
-{
-	kfree(rcu);
-}
-
-const char *jbd2_dev_to_name(dev_t device)
-{
-	int	i = hash_32(device, CACHE_SIZE_BITS);
-	char	*ret;
-	struct block_device *bd;
-	static struct devname_cache *new_dev;
-
-	rcu_read_lock();
-	if (devcache[i] && devcache[i]->device == device) {
-		ret = devcache[i]->devname;
-		rcu_read_unlock();
-		return ret;
-	}
-	rcu_read_unlock();
-
-	new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
-	if (!new_dev)
-		return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
-	bd = bdget(device);
-	spin_lock(&devname_cache_lock);
-	if (devcache[i]) {
-		if (devcache[i]->device == device) {
-			kfree(new_dev);
-			bdput(bd);
-			ret = devcache[i]->devname;
-			spin_unlock(&devname_cache_lock);
-			return ret;
-		}
-		call_rcu(&devcache[i]->rcu, free_devcache);
-	}
-	devcache[i] = new_dev;
-	devcache[i]->device = device;
-	if (bd) {
-		bdevname(bd, devcache[i]->devname);
-		bdput(bd);
-	} else
-		__bdevname(device, devcache[i]->devname);
-	ret = devcache[i]->devname;
-	spin_unlock(&devname_cache_lock);
-	return ret;
-}
-EXPORT_SYMBOL(jbd2_dev_to_name);
-
 MODULE_LICENSE("GPL");
 MODULE_LICENSE("GPL");
 module_init(journal_init);
 module_init(journal_init);
 module_exit(journal_exit);
 module_exit(journal_exit);

+ 2 - 2
fs/jffs2/acl.c

@@ -227,7 +227,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	case ACL_TYPE_ACCESS:
 	case ACL_TYPE_ACCESS:
 		xprefix = JFFS2_XPREFIX_ACL_ACCESS;
 		xprefix = JFFS2_XPREFIX_ACL_ACCESS;
 		if (acl) {
 		if (acl) {
-			mode_t mode = inode->i_mode;
+			umode_t mode = inode->i_mode;
 			rc = posix_acl_equiv_mode(acl, &mode);
 			rc = posix_acl_equiv_mode(acl, &mode);
 			if (rc < 0)
 			if (rc < 0)
 				return rc;
 				return rc;
@@ -259,7 +259,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 	return rc;
 	return rc;
 }
 }
 
 
-int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, mode_t *i_mode)
+int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, umode_t *i_mode)
 {
 {
 	struct posix_acl *acl;
 	struct posix_acl *acl;
 	int rc;
 	int rc;

+ 1 - 1
fs/jffs2/acl.h

@@ -28,7 +28,7 @@ struct jffs2_acl_header {
 
 
 struct posix_acl *jffs2_get_acl(struct inode *inode, int type);
 struct posix_acl *jffs2_get_acl(struct inode *inode, int type);
 extern int jffs2_acl_chmod(struct inode *);
 extern int jffs2_acl_chmod(struct inode *);
-extern int jffs2_init_acl_pre(struct inode *, struct inode *, mode_t *);
+extern int jffs2_init_acl_pre(struct inode *, struct inode *, umode_t *);
 extern int jffs2_init_acl_post(struct inode *);
 extern int jffs2_init_acl_post(struct inode *);
 
 
 extern const struct xattr_handler jffs2_acl_access_xattr_handler;
 extern const struct xattr_handler jffs2_acl_access_xattr_handler;

+ 1 - 1
fs/jffs2/fs.c

@@ -406,7 +406,7 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data)
 
 
 /* jffs2_new_inode: allocate a new inode and inocache, add it to the hash,
 /* jffs2_new_inode: allocate a new inode and inocache, add it to the hash,
    fill in the raw_inode while you're at it. */
    fill in the raw_inode while you're at it. */
-struct inode *jffs2_new_inode (struct inode *dir_i, mode_t mode, struct jffs2_raw_inode *ri)
+struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_raw_inode *ri)
 {
 {
 	struct inode *inode;
 	struct inode *inode;
 	struct super_block *sb = dir_i->i_sb;
 	struct super_block *sb = dir_i->i_sb;

+ 1 - 1
fs/jffs2/os-linux.h

@@ -173,7 +173,7 @@ int jffs2_do_setattr (struct inode *, struct iattr *);
 struct inode *jffs2_iget(struct super_block *, unsigned long);
 struct inode *jffs2_iget(struct super_block *, unsigned long);
 void jffs2_evict_inode (struct inode *);
 void jffs2_evict_inode (struct inode *);
 void jffs2_dirty_inode(struct inode *inode, int flags);
 void jffs2_dirty_inode(struct inode *inode, int flags);
-struct inode *jffs2_new_inode (struct inode *dir_i, mode_t mode,
+struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode,
 			       struct jffs2_raw_inode *ri);
 			       struct jffs2_raw_inode *ri);
 int jffs2_statfs (struct dentry *, struct kstatfs *);
 int jffs2_statfs (struct dentry *, struct kstatfs *);
 int jffs2_remount_fs (struct super_block *, int *, char *);
 int jffs2_remount_fs (struct super_block *, int *, char *);

+ 1 - 3
fs/jfs/acl.c

@@ -127,16 +127,14 @@ int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir)
 		return PTR_ERR(acl);
 		return PTR_ERR(acl);
 
 
 	if (acl) {
 	if (acl) {
-		mode_t mode = inode->i_mode;
 		if (S_ISDIR(inode->i_mode)) {
 		if (S_ISDIR(inode->i_mode)) {
 			rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl);
 			rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl);
 			if (rc)
 			if (rc)
 				goto cleanup;
 				goto cleanup;
 		}
 		}
-		rc = posix_acl_create(&acl, GFP_KERNEL, &mode);
+		rc = posix_acl_create(&acl, GFP_KERNEL, &inode->i_mode);
 		if (rc < 0)
 		if (rc < 0)
 			goto cleanup; /* posix_acl_release(NULL) is no-op */
 			goto cleanup; /* posix_acl_release(NULL) is no-op */
-		inode->i_mode = mode;
 		if (rc > 0)
 		if (rc > 0)
 			rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl);
 			rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl);
 cleanup:
 cleanup:

+ 1 - 3
fs/jfs/xattr.c

@@ -693,8 +693,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 			return rc;
 			return rc;
 		}
 		}
 		if (acl) {
 		if (acl) {
-			mode_t mode = inode->i_mode;
-			rc = posix_acl_equiv_mode(acl, &mode);
+			rc = posix_acl_equiv_mode(acl, &inode->i_mode);
 			posix_acl_release(acl);
 			posix_acl_release(acl);
 			if (rc < 0) {
 			if (rc < 0) {
 				printk(KERN_ERR
 				printk(KERN_ERR
@@ -702,7 +701,6 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
 				       rc);
 				       rc);
 				return rc;
 				return rc;
 			}
 			}
-			inode->i_mode = mode;
 			mark_inode_dirty(inode);
 			mark_inode_dirty(inode);
 		}
 		}
 		/*
 		/*

+ 15 - 9
fs/namei.c

@@ -716,19 +716,25 @@ static int follow_automount(struct path *path, unsigned flags,
 	if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT))
 	if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT))
 		return -EISDIR; /* we actually want to stop here */
 		return -EISDIR; /* we actually want to stop here */
 
 
-	/* We want to mount if someone is trying to open/create a file of any
-	 * type under the mountpoint, wants to traverse through the mountpoint
-	 * or wants to open the mounted directory.
-	 *
+	/*
 	 * We don't want to mount if someone's just doing a stat and they've
 	 * We don't want to mount if someone's just doing a stat and they've
 	 * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and
 	 * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and
 	 * appended a '/' to the name.
 	 * appended a '/' to the name.
 	 */
 	 */
-	if (!(flags & LOOKUP_FOLLOW) &&
-	    !(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
-		       LOOKUP_OPEN | LOOKUP_CREATE)))
-		return -EISDIR;
-
+	if (!(flags & LOOKUP_FOLLOW)) {
+		/* We do, however, want to mount if someone wants to open or
+		 * create a file of any type under the mountpoint, wants to
+		 * traverse through the mountpoint or wants to open the mounted
+		 * directory.
+		 * Also, autofs may mark negative dentries as being automount
+		 * points.  These will need the attentions of the daemon to
+		 * instantiate them before they can be used.
+		 */
+		if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
+			     LOOKUP_OPEN | LOOKUP_CREATE)) &&
+		    path->dentry->d_inode)
+			return -EISDIR;
+	}
 	current->total_link_count++;
 	current->total_link_count++;
 	if (current->total_link_count >= 40)
 	if (current->total_link_count >= 40)
 		return -ELOOP;
 		return -ELOOP;

+ 1 - 1
fs/nfs/nfs3acl.c

@@ -415,7 +415,7 @@ fail:
 }
 }
 
 
 int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
 int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
-		mode_t mode)
+		umode_t mode)
 {
 {
 	struct posix_acl *dfacl, *acl;
 	struct posix_acl *dfacl, *acl;
 	int error = 0;
 	int error = 0;

+ 3 - 3
fs/nfs/nfs3proc.c

@@ -316,7 +316,7 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		 int flags, struct nfs_open_context *ctx)
 		 int flags, struct nfs_open_context *ctx)
 {
 {
 	struct nfs3_createdata *data;
 	struct nfs3_createdata *data;
-	mode_t mode = sattr->ia_mode;
+	umode_t mode = sattr->ia_mode;
 	int status = -ENOMEM;
 	int status = -ENOMEM;
 
 
 	dprintk("NFS call  create %s\n", dentry->d_name.name);
 	dprintk("NFS call  create %s\n", dentry->d_name.name);
@@ -562,7 +562,7 @@ static int
 nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 {
 {
 	struct nfs3_createdata *data;
 	struct nfs3_createdata *data;
-	int mode = sattr->ia_mode;
+	umode_t mode = sattr->ia_mode;
 	int status = -ENOMEM;
 	int status = -ENOMEM;
 
 
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
 	dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
@@ -681,7 +681,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		dev_t rdev)
 		dev_t rdev)
 {
 {
 	struct nfs3_createdata *data;
 	struct nfs3_createdata *data;
-	mode_t mode = sattr->ia_mode;
+	umode_t mode = sattr->ia_mode;
 	int status = -ENOMEM;
 	int status = -ENOMEM;
 
 
 	dprintk("NFS call  mknod %s %u:%u\n", dentry->d_name.name,
 	dprintk("NFS call  mknod %s %u:%u\n", dentry->d_name.name,

+ 2 - 2
fs/ocfs2/acl.c

@@ -247,7 +247,7 @@ static int ocfs2_set_acl(handle_t *handle,
 	case ACL_TYPE_ACCESS:
 	case ACL_TYPE_ACCESS:
 		name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
 		name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
 		if (acl) {
 		if (acl) {
-			mode_t mode = inode->i_mode;
+			umode_t mode = inode->i_mode;
 			ret = posix_acl_equiv_mode(acl, &mode);
 			ret = posix_acl_equiv_mode(acl, &mode);
 			if (ret < 0)
 			if (ret < 0)
 				return ret;
 				return ret;
@@ -351,7 +351,7 @@ int ocfs2_init_acl(handle_t *handle,
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	struct posix_acl *acl = NULL;
 	struct posix_acl *acl = NULL;
 	int ret = 0, ret2;
 	int ret = 0, ret2;
-	mode_t mode;
+	umode_t mode;
 
 
 	if (!S_ISLNK(inode->i_mode)) {
 	if (!S_ISLNK(inode->i_mode)) {
 		if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
 		if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {

+ 8 - 8
fs/posix_acl.c

@@ -149,10 +149,10 @@ posix_acl_valid(const struct posix_acl *acl)
  * file mode permission bits, or else 1. Returns -E... on error.
  * file mode permission bits, or else 1. Returns -E... on error.
  */
  */
 int
 int
-posix_acl_equiv_mode(const struct posix_acl *acl, mode_t *mode_p)
+posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p)
 {
 {
 	const struct posix_acl_entry *pa, *pe;
 	const struct posix_acl_entry *pa, *pe;
-	mode_t mode = 0;
+	umode_t mode = 0;
 	int not_equiv = 0;
 	int not_equiv = 0;
 
 
 	FOREACH_ACL_ENTRY(pa, acl, pe) {
 	FOREACH_ACL_ENTRY(pa, acl, pe) {
@@ -188,7 +188,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, mode_t *mode_p)
  * Create an ACL representing the file mode permission bits of an inode.
  * Create an ACL representing the file mode permission bits of an inode.
  */
  */
 struct posix_acl *
 struct posix_acl *
-posix_acl_from_mode(mode_t mode, gfp_t flags)
+posix_acl_from_mode(umode_t mode, gfp_t flags)
 {
 {
 	struct posix_acl *acl = posix_acl_alloc(3, flags);
 	struct posix_acl *acl = posix_acl_alloc(3, flags);
 	if (!acl)
 	if (!acl)
@@ -279,11 +279,11 @@ check_perm:
  * system calls. All permissions that are not granted by the acl are removed.
  * system calls. All permissions that are not granted by the acl are removed.
  * The permissions in the acl are changed to reflect the mode_p parameter.
  * The permissions in the acl are changed to reflect the mode_p parameter.
  */
  */
-static int posix_acl_create_masq(struct posix_acl *acl, mode_t *mode_p)
+static int posix_acl_create_masq(struct posix_acl *acl, umode_t *mode_p)
 {
 {
 	struct posix_acl_entry *pa, *pe;
 	struct posix_acl_entry *pa, *pe;
 	struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL;
 	struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL;
-	mode_t mode = *mode_p;
+	umode_t mode = *mode_p;
 	int not_equiv = 0;
 	int not_equiv = 0;
 
 
 	/* assert(atomic_read(acl->a_refcount) == 1); */
 	/* assert(atomic_read(acl->a_refcount) == 1); */
@@ -336,7 +336,7 @@ static int posix_acl_create_masq(struct posix_acl *acl, mode_t *mode_p)
 /*
 /*
  * Modify the ACL for the chmod syscall.
  * Modify the ACL for the chmod syscall.
  */
  */
-static int posix_acl_chmod_masq(struct posix_acl *acl, mode_t mode)
+static int posix_acl_chmod_masq(struct posix_acl *acl, umode_t mode)
 {
 {
 	struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL;
 	struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL;
 	struct posix_acl_entry *pa, *pe;
 	struct posix_acl_entry *pa, *pe;
@@ -382,7 +382,7 @@ static int posix_acl_chmod_masq(struct posix_acl *acl, mode_t mode)
 }
 }
 
 
 int
 int
-posix_acl_create(struct posix_acl **acl, gfp_t gfp, mode_t *mode_p)
+posix_acl_create(struct posix_acl **acl, gfp_t gfp, umode_t *mode_p)
 {
 {
 	struct posix_acl *clone = posix_acl_clone(*acl, gfp);
 	struct posix_acl *clone = posix_acl_clone(*acl, gfp);
 	int err = -ENOMEM;
 	int err = -ENOMEM;
@@ -400,7 +400,7 @@ posix_acl_create(struct posix_acl **acl, gfp_t gfp, mode_t *mode_p)
 EXPORT_SYMBOL(posix_acl_create);
 EXPORT_SYMBOL(posix_acl_create);
 
 
 int
 int
-posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, mode_t mode)
+posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, umode_t mode)
 {
 {
 	struct posix_acl *clone = posix_acl_clone(*acl, gfp);
 	struct posix_acl *clone = posix_acl_clone(*acl, gfp);
 	int err = -ENOMEM;
 	int err = -ENOMEM;

+ 7 - 5
fs/pstore/inode.c

@@ -39,8 +39,9 @@
 #define	PSTORE_NAMELEN	64
 #define	PSTORE_NAMELEN	64
 
 
 struct pstore_private {
 struct pstore_private {
+	struct pstore_info *psi;
+	enum pstore_type_id type;
 	u64	id;
 	u64	id;
-	int	(*erase)(u64);
 	ssize_t	size;
 	ssize_t	size;
 	char	data[];
 	char	data[];
 };
 };
@@ -73,7 +74,7 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
 {
 {
 	struct pstore_private *p = dentry->d_inode->i_private;
 	struct pstore_private *p = dentry->d_inode->i_private;
 
 
-	p->erase(p->id);
+	p->psi->erase(p->type, p->id, p->psi);
 
 
 	return simple_unlink(dir, dentry);
 	return simple_unlink(dir, dentry);
 }
 }
@@ -175,8 +176,8 @@ int pstore_is_mounted(void)
  * Set the mtime & ctime to the date that this record was originally stored.
  * Set the mtime & ctime to the date that this record was originally stored.
  */
  */
 int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
 int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
-			      char *data, size_t size,
-			      struct timespec time, int (*erase)(u64))
+		  char *data, size_t size, struct timespec time,
+		  struct pstore_info *psi)
 {
 {
 	struct dentry		*root = pstore_sb->s_root;
 	struct dentry		*root = pstore_sb->s_root;
 	struct dentry		*dentry;
 	struct dentry		*dentry;
@@ -192,8 +193,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
 	private = kmalloc(sizeof *private + size, GFP_KERNEL);
 	private = kmalloc(sizeof *private + size, GFP_KERNEL);
 	if (!private)
 	if (!private)
 		goto fail_alloc;
 		goto fail_alloc;
+	private->type = type;
 	private->id = id;
 	private->id = id;
-	private->erase = erase;
+	private->psi = psi;
 
 
 	switch (type) {
 	switch (type) {
 	case PSTORE_TYPE_DMESG:
 	case PSTORE_TYPE_DMESG:

+ 1 - 1
fs/pstore/internal.h

@@ -2,5 +2,5 @@ extern void	pstore_set_kmsg_bytes(int);
 extern void	pstore_get_records(void);
 extern void	pstore_get_records(void);
 extern int	pstore_mkfile(enum pstore_type_id, char *psname, u64 id,
 extern int	pstore_mkfile(enum pstore_type_id, char *psname, u64 id,
 			      char *data, size_t size,
 			      char *data, size_t size,
-			      struct timespec time, int (*erase)(u64));
+			      struct timespec time, struct pstore_info *psi);
 extern int	pstore_is_mounted(void);
 extern int	pstore_is_mounted(void);

+ 22 - 8
fs/pstore/platform.c

@@ -37,6 +37,8 @@
 static DEFINE_SPINLOCK(pstore_lock);
 static DEFINE_SPINLOCK(pstore_lock);
 static struct pstore_info *psinfo;
 static struct pstore_info *psinfo;
 
 
+static char *backend;
+
 /* How much of the console log to snapshot */
 /* How much of the console log to snapshot */
 static unsigned long kmsg_bytes = 10240;
 static unsigned long kmsg_bytes = 10240;
 
 
@@ -67,7 +69,8 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 	unsigned long	size, total = 0;
 	unsigned long	size, total = 0;
 	char		*dst, *why;
 	char		*dst, *why;
 	u64		id;
 	u64		id;
-	int		hsize, part = 1;
+	int		hsize;
+	unsigned int	part = 1;
 
 
 	if (reason < ARRAY_SIZE(reason_str))
 	if (reason < ARRAY_SIZE(reason_str))
 		why = reason_str[reason];
 		why = reason_str[reason];
@@ -78,7 +81,7 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 	oopscount++;
 	oopscount++;
 	while (total < kmsg_bytes) {
 	while (total < kmsg_bytes) {
 		dst = psinfo->buf;
 		dst = psinfo->buf;
-		hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part++);
+		hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part);
 		size = psinfo->bufsize - hsize;
 		size = psinfo->bufsize - hsize;
 		dst += hsize;
 		dst += hsize;
 
 
@@ -94,14 +97,16 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 		memcpy(dst, s1 + s1_start, l1_cpy);
 		memcpy(dst, s1 + s1_start, l1_cpy);
 		memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
 		memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
 
 
-		id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy);
+		id = psinfo->write(PSTORE_TYPE_DMESG, part,
+				   hsize + l1_cpy + l2_cpy, psinfo);
 		if (reason == KMSG_DUMP_OOPS && pstore_is_mounted())
 		if (reason == KMSG_DUMP_OOPS && pstore_is_mounted())
 			pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id,
 			pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id,
 				      psinfo->buf, hsize + l1_cpy + l2_cpy,
 				      psinfo->buf, hsize + l1_cpy + l2_cpy,
-				      CURRENT_TIME, psinfo->erase);
+				      CURRENT_TIME, psinfo);
 		l1 -= l1_cpy;
 		l1 -= l1_cpy;
 		l2 -= l2_cpy;
 		l2 -= l2_cpy;
 		total += l1_cpy + l2_cpy;
 		total += l1_cpy + l2_cpy;
+		part++;
 	}
 	}
 	mutex_unlock(&psinfo->buf_mutex);
 	mutex_unlock(&psinfo->buf_mutex);
 }
 }
@@ -128,6 +133,12 @@ int pstore_register(struct pstore_info *psi)
 		spin_unlock(&pstore_lock);
 		spin_unlock(&pstore_lock);
 		return -EBUSY;
 		return -EBUSY;
 	}
 	}
+
+	if (backend && strcmp(backend, psi->name)) {
+		spin_unlock(&pstore_lock);
+		return -EINVAL;
+	}
+
 	psinfo = psi;
 	psinfo = psi;
 	spin_unlock(&pstore_lock);
 	spin_unlock(&pstore_lock);
 
 
@@ -166,9 +177,9 @@ void pstore_get_records(void)
 	if (rc)
 	if (rc)
 		goto out;
 		goto out;
 
 
-	while ((size = psi->read(&id, &type, &time)) > 0) {
+	while ((size = psi->read(&id, &type, &time, psi)) > 0) {
 		if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
 		if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
-				  time, psi->erase))
+				  time, psi))
 			failed++;
 			failed++;
 	}
 	}
 	psi->close(psi);
 	psi->close(psi);
@@ -196,12 +207,15 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size)
 
 
 	mutex_lock(&psinfo->buf_mutex);
 	mutex_lock(&psinfo->buf_mutex);
 	memcpy(psinfo->buf, buf, size);
 	memcpy(psinfo->buf, buf, size);
-	id = psinfo->write(type, size);
+	id = psinfo->write(type, 0, size, psinfo);
 	if (pstore_is_mounted())
 	if (pstore_is_mounted())
 		pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf,
 		pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf,
-			      size, CURRENT_TIME, psinfo->erase);
+			      size, CURRENT_TIME, psinfo);
 	mutex_unlock(&psinfo->buf_mutex);
 	mutex_unlock(&psinfo->buf_mutex);
 
 
 	return 0;
 	return 0;
 }
 }
 EXPORT_SYMBOL_GPL(pstore_write);
 EXPORT_SYMBOL_GPL(pstore_write);
+
+module_param(backend, charp, 0444);
+MODULE_PARM_DESC(backend, "Pstore backend to use");

+ 2 - 8
fs/reiserfs/xattr_acl.c

@@ -272,12 +272,10 @@ reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
 	case ACL_TYPE_ACCESS:
 	case ACL_TYPE_ACCESS:
 		name = POSIX_ACL_XATTR_ACCESS;
 		name = POSIX_ACL_XATTR_ACCESS;
 		if (acl) {
 		if (acl) {
-			mode_t mode = inode->i_mode;
-			error = posix_acl_equiv_mode(acl, &mode);
+			error = posix_acl_equiv_mode(acl, &inode->i_mode);
 			if (error < 0)
 			if (error < 0)
 				return error;
 				return error;
 			else {
 			else {
-				inode->i_mode = mode;
 				if (error == 0)
 				if (error == 0)
 					acl = NULL;
 					acl = NULL;
 			}
 			}
@@ -354,8 +352,6 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
 		return PTR_ERR(acl);
 		return PTR_ERR(acl);
 
 
 	if (acl) {
 	if (acl) {
-		mode_t mode = inode->i_mode;
-
 		/* Copy the default ACL to the default ACL of a new directory */
 		/* Copy the default ACL to the default ACL of a new directory */
 		if (S_ISDIR(inode->i_mode)) {
 		if (S_ISDIR(inode->i_mode)) {
 			err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT,
 			err = reiserfs_set_acl(th, inode, ACL_TYPE_DEFAULT,
@@ -366,12 +362,10 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
 
 
 		/* Now we reconcile the new ACL and the mode,
 		/* Now we reconcile the new ACL and the mode,
 		   potentially modifying both */
 		   potentially modifying both */
-		err = posix_acl_create(&acl, GFP_NOFS, &mode);
+		err = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
 		if (err < 0)
 		if (err < 0)
 			return err;
 			return err;
 
 
-		inode->i_mode = mode;
-
 		/* If we need an ACL.. */
 		/* If we need an ACL.. */
 		if (err > 0)
 		if (err > 0)
 			err = reiserfs_set_acl(th, inode, ACL_TYPE_ACCESS, acl);
 			err = reiserfs_set_acl(th, inode, ACL_TYPE_ACCESS, acl);

+ 3 - 3
fs/xfs/linux-2.6/xfs_acl.c

@@ -221,7 +221,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 }
 }
 
 
 static int
 static int
-xfs_set_mode(struct inode *inode, mode_t mode)
+xfs_set_mode(struct inode *inode, umode_t mode)
 {
 {
 	int error = 0;
 	int error = 0;
 
 
@@ -267,7 +267,7 @@ posix_acl_default_exists(struct inode *inode)
 int
 int
 xfs_inherit_acl(struct inode *inode, struct posix_acl *acl)
 xfs_inherit_acl(struct inode *inode, struct posix_acl *acl)
 {
 {
-	mode_t mode = inode->i_mode;
+	umode_t mode = inode->i_mode;
 	int error = 0, inherit = 0;
 	int error = 0, inherit = 0;
 
 
 	if (S_ISDIR(inode->i_mode)) {
 	if (S_ISDIR(inode->i_mode)) {
@@ -381,7 +381,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name,
 		goto out_release;
 		goto out_release;
 
 
 	if (type == ACL_TYPE_ACCESS) {
 	if (type == ACL_TYPE_ACCESS) {
-		mode_t mode = inode->i_mode;
+		umode_t mode = inode->i_mode;
 		error = posix_acl_equiv_mode(acl, &mode);
 		error = posix_acl_equiv_mode(acl, &mode);
 
 
 		if (error <= 0) {
 		if (error <= 0) {

+ 6 - 3
include/linux/amba/pl08x.h

@@ -172,8 +172,11 @@ struct pl08x_dma_chan {
 	int phychan_hold;
 	int phychan_hold;
 	struct tasklet_struct tasklet;
 	struct tasklet_struct tasklet;
 	char *name;
 	char *name;
-	struct pl08x_channel_data *cd;
-	dma_addr_t runtime_addr;
+	const struct pl08x_channel_data *cd;
+	dma_addr_t src_addr;
+	dma_addr_t dst_addr;
+	u32 src_cctl;
+	u32 dst_cctl;
 	enum dma_data_direction	runtime_direction;
 	enum dma_data_direction	runtime_direction;
 	dma_cookie_t lc;
 	dma_cookie_t lc;
 	struct list_head pend_list;
 	struct list_head pend_list;
@@ -202,7 +205,7 @@ struct pl08x_dma_chan {
  * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2
  * @mem_buses: buses which memory can be accessed from: PL08X_AHB1 | PL08X_AHB2
  */
  */
 struct pl08x_platform_data {
 struct pl08x_platform_data {
-	struct pl08x_channel_data *slave_channels;
+	const struct pl08x_channel_data *slave_channels;
 	unsigned int num_slave_channels;
 	unsigned int num_slave_channels;
 	struct pl08x_channel_data memcpy_channel;
 	struct pl08x_channel_data memcpy_channel;
 	int (*get_signal)(struct pl08x_dma_chan *);
 	int (*get_signal)(struct pl08x_dma_chan *);

+ 6 - 0
include/linux/efi.h

@@ -19,6 +19,7 @@
 #include <linux/rtc.h>
 #include <linux/rtc.h>
 #include <linux/ioport.h>
 #include <linux/ioport.h>
 #include <linux/pfn.h>
 #include <linux/pfn.h>
+#include <linux/pstore.h>
 
 
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/system.h>
 #include <asm/system.h>
@@ -232,6 +233,9 @@ typedef efi_status_t efi_query_capsule_caps_t(efi_capsule_header_t **capsules,
 #define UV_SYSTEM_TABLE_GUID \
 #define UV_SYSTEM_TABLE_GUID \
     EFI_GUID(  0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 )
     EFI_GUID(  0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93 )
 
 
+#define LINUX_EFI_CRASH_GUID \
+    EFI_GUID(  0xcfc8fc79, 0xbe2e, 0x4ddc, 0x97, 0xf0, 0x9f, 0x98, 0xbf, 0xe2, 0x98, 0xa0 )
+
 typedef struct {
 typedef struct {
 	efi_guid_t guid;
 	efi_guid_t guid;
 	unsigned long table;
 	unsigned long table;
@@ -458,6 +462,8 @@ struct efivars {
 	struct kset *kset;
 	struct kset *kset;
 	struct bin_attribute *new_var, *del_var;
 	struct bin_attribute *new_var, *del_var;
 	const struct efivar_operations *ops;
 	const struct efivar_operations *ops;
+	struct efivar_entry *walk_entry;
+	struct pstore_info efi_pstore_info;
 };
 };
 
 
 int register_efivars(struct efivars *efivars,
 int register_efivars(struct efivars *efivars,

+ 8 - 1
include/linux/fs.h

@@ -2317,11 +2317,18 @@ extern int should_remove_suid(struct dentry *);
 extern int file_remove_suid(struct file *);
 extern int file_remove_suid(struct file *);
 
 
 extern void __insert_inode_hash(struct inode *, unsigned long hashval);
 extern void __insert_inode_hash(struct inode *, unsigned long hashval);
-extern void remove_inode_hash(struct inode *);
 static inline void insert_inode_hash(struct inode *inode)
 static inline void insert_inode_hash(struct inode *inode)
 {
 {
 	__insert_inode_hash(inode, inode->i_ino);
 	__insert_inode_hash(inode, inode->i_ino);
 }
 }
+
+extern void __remove_inode_hash(struct inode *);
+static inline void remove_inode_hash(struct inode *inode)
+{
+	if (!inode_unhashed(inode))
+		__remove_inode_hash(inode);
+}
+
 extern void inode_sb_list_add(struct inode *inode);
 extern void inode_sb_list_add(struct inode *inode);
 
 
 #ifdef CONFIG_BLOCK
 #ifdef CONFIG_BLOCK

+ 0 - 6
include/linux/jbd2.h

@@ -1329,12 +1329,6 @@ extern int jbd_blocks_per_page(struct inode *inode);
 #define BUFFER_TRACE2(bh, bh2, info)	do {} while (0)
 #define BUFFER_TRACE2(bh, bh2, info)	do {} while (0)
 #define JBUFFER_TRACE(jh, info)	do {} while (0)
 #define JBUFFER_TRACE(jh, info)	do {} while (0)
 
 
-/* 
- * jbd2_dev_to_name is a utility function used by the jbd2 and ext4 
- * tracing infrastructure to map a dev_t to a device name.
- */
-extern const char *jbd2_dev_to_name(dev_t device);
-
 #endif	/* __KERNEL__ */
 #endif	/* __KERNEL__ */
 
 
 #endif	/* _LINUX_JBD2_H */
 #endif	/* _LINUX_JBD2_H */

+ 2 - 2
include/linux/nfs_fs.h

@@ -569,12 +569,12 @@ extern struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type);
 extern int nfs3_proc_setacl(struct inode *inode, int type,
 extern int nfs3_proc_setacl(struct inode *inode, int type,
 			    struct posix_acl *acl);
 			    struct posix_acl *acl);
 extern int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
 extern int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode,
-		mode_t mode);
+		umode_t mode);
 extern void nfs3_forget_cached_acls(struct inode *inode);
 extern void nfs3_forget_cached_acls(struct inode *inode);
 #else
 #else
 static inline int nfs3_proc_set_default_acl(struct inode *dir,
 static inline int nfs3_proc_set_default_acl(struct inode *dir,
 					    struct inode *inode,
 					    struct inode *inode,
-					    mode_t mode)
+					    umode_t mode)
 {
 {
 	return 0;
 	return 0;
 }
 }

+ 4 - 4
include/linux/posix_acl.h

@@ -75,10 +75,10 @@ extern void posix_acl_init(struct posix_acl *, int);
 extern struct posix_acl *posix_acl_alloc(int, gfp_t);
 extern struct posix_acl *posix_acl_alloc(int, gfp_t);
 extern int posix_acl_valid(const struct posix_acl *);
 extern int posix_acl_valid(const struct posix_acl *);
 extern int posix_acl_permission(struct inode *, const struct posix_acl *, int);
 extern int posix_acl_permission(struct inode *, const struct posix_acl *, int);
-extern struct posix_acl *posix_acl_from_mode(mode_t, gfp_t);
-extern int posix_acl_equiv_mode(const struct posix_acl *, mode_t *);
-extern int posix_acl_create(struct posix_acl **, gfp_t, mode_t *);
-extern int posix_acl_chmod(struct posix_acl **, gfp_t, mode_t);
+extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t);
+extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *);
+extern int posix_acl_create(struct posix_acl **, gfp_t, umode_t *);
+extern int posix_acl_chmod(struct posix_acl **, gfp_t, umode_t);
 
 
 extern struct posix_acl *get_posix_acl(struct inode *, int);
 extern struct posix_acl *get_posix_acl(struct inode *, int);
 extern int set_posix_acl(struct inode *, int, struct posix_acl *);
 extern int set_posix_acl(struct inode *, int, struct posix_acl *);

+ 6 - 3
include/linux/pstore.h

@@ -38,9 +38,12 @@ struct pstore_info {
 	int		(*open)(struct pstore_info *psi);
 	int		(*open)(struct pstore_info *psi);
 	int		(*close)(struct pstore_info *psi);
 	int		(*close)(struct pstore_info *psi);
 	ssize_t		(*read)(u64 *id, enum pstore_type_id *type,
 	ssize_t		(*read)(u64 *id, enum pstore_type_id *type,
-			struct timespec *time);
-	u64		(*write)(enum pstore_type_id type, size_t size);
-	int		(*erase)(u64 id);
+			struct timespec *time, struct pstore_info *psi);
+	u64		(*write)(enum pstore_type_id type, unsigned int part,
+			size_t size, struct pstore_info *psi);
+	int		(*erase)(enum pstore_type_id type, u64 id,
+			struct pstore_info *psi);
+	void		*data;
 };
 };
 
 
 #ifdef CONFIG_PSTORE
 #ifdef CONFIG_PSTORE

+ 3 - 0
include/linux/regulator/consumer.h

@@ -122,6 +122,9 @@ struct regulator;
 struct regulator_bulk_data {
 struct regulator_bulk_data {
 	const char *supply;
 	const char *supply;
 	struct regulator *consumer;
 	struct regulator *consumer;
+
+	/* Internal use */
+	int ret;
 };
 };
 
 
 #if defined(CONFIG_REGULATOR)
 #if defined(CONFIG_REGULATOR)

+ 1 - 3
include/linux/regulator/driver.h

@@ -188,18 +188,16 @@ struct regulator_dev {
 
 
 	/* lists we belong to */
 	/* lists we belong to */
 	struct list_head list; /* list of all regulators */
 	struct list_head list; /* list of all regulators */
-	struct list_head slist; /* list of supplied regulators */
 
 
 	/* lists we own */
 	/* lists we own */
 	struct list_head consumer_list; /* consumers we supply */
 	struct list_head consumer_list; /* consumers we supply */
-	struct list_head supply_list; /* regulators we supply */
 
 
 	struct blocking_notifier_head notifier;
 	struct blocking_notifier_head notifier;
 	struct mutex mutex; /* consumer lock */
 	struct mutex mutex; /* consumer lock */
 	struct module *owner;
 	struct module *owner;
 	struct device dev;
 	struct device dev;
 	struct regulation_constraints *constraints;
 	struct regulation_constraints *constraints;
-	struct regulator_dev *supply;	/* for tree */
+	struct regulator *supply;	/* for tree */
 
 
 	void *reg_data;		/* regulator_dev data */
 	void *reg_data;		/* regulator_dev data */
 
 

+ 79 - 8
include/trace/events/ext4.h

@@ -23,7 +23,7 @@ TRACE_EVENT(ext4_free_inode,
 	TP_STRUCT__entry(
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	ino_t,	ino			)
-		__field(	umode_t, mode			)
+		__field(	__u16, mode			)
 		__field(	uid_t,	uid			)
 		__field(	uid_t,	uid			)
 		__field(	gid_t,	gid			)
 		__field(	gid_t,	gid			)
 		__field(	__u64, blocks			)
 		__field(	__u64, blocks			)
@@ -52,7 +52,7 @@ TRACE_EVENT(ext4_request_inode,
 	TP_STRUCT__entry(
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	dir			)
 		__field(	ino_t,	dir			)
-		__field(	umode_t, mode			)
+		__field(	__u16, mode			)
 	),
 	),
 
 
 	TP_fast_assign(
 	TP_fast_assign(
@@ -75,7 +75,7 @@ TRACE_EVENT(ext4_allocate_inode,
 		__field(	dev_t,	dev			)
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	ino_t,	ino			)
 		__field(	ino_t,	dir			)
 		__field(	ino_t,	dir			)
-		__field(	umode_t, mode			)
+		__field(	__u16,	mode			)
 	),
 	),
 
 
 	TP_fast_assign(
 	TP_fast_assign(
@@ -725,7 +725,7 @@ TRACE_EVENT(ext4_free_blocks,
 	TP_STRUCT__entry(
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	ino_t,	ino			)
-		__field(	umode_t, mode			)
+		__field(	__u16,	mode			)
 		__field(	__u64,	block			)
 		__field(	__u64,	block			)
 		__field(	unsigned long,	count		)
 		__field(	unsigned long,	count		)
 		__field(	int,	flags			)
 		__field(	int,	flags			)
@@ -1012,7 +1012,7 @@ TRACE_EVENT(ext4_forget,
 	TP_STRUCT__entry(
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	ino_t,	ino			)
-		__field(	umode_t, mode			)
+		__field(	__u16,	mode			)
 		__field(	int,	is_metadata		)
 		__field(	int,	is_metadata		)
 		__field(	__u64,	block			)
 		__field(	__u64,	block			)
 	),
 	),
@@ -1039,7 +1039,7 @@ TRACE_EVENT(ext4_da_update_reserve_space,
 	TP_STRUCT__entry(
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	ino_t,	ino			)
-		__field(	umode_t, mode			)
+		__field(	__u16,	mode			)
 		__field(	__u64,	i_blocks		)
 		__field(	__u64,	i_blocks		)
 		__field(	int,	used_blocks		)
 		__field(	int,	used_blocks		)
 		__field(	int,	reserved_data_blocks	)
 		__field(	int,	reserved_data_blocks	)
@@ -1076,7 +1076,7 @@ TRACE_EVENT(ext4_da_reserve_space,
 	TP_STRUCT__entry(
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	ino_t,	ino			)
-		__field(	umode_t, mode			)
+		__field(	__u16,  mode			)
 		__field(	__u64,	i_blocks		)
 		__field(	__u64,	i_blocks		)
 		__field(	int,	md_needed		)
 		__field(	int,	md_needed		)
 		__field(	int,	reserved_data_blocks	)
 		__field(	int,	reserved_data_blocks	)
@@ -1110,7 +1110,7 @@ TRACE_EVENT(ext4_da_release_space,
 	TP_STRUCT__entry(
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
 		__field(	ino_t,	ino			)
-		__field(	umode_t, mode			)
+		__field(	__u16,  mode			)
 		__field(	__u64,	i_blocks		)
 		__field(	__u64,	i_blocks		)
 		__field(	int,	freed_blocks		)
 		__field(	int,	freed_blocks		)
 		__field(	int,	reserved_data_blocks	)
 		__field(	int,	reserved_data_blocks	)
@@ -1518,6 +1518,77 @@ TRACE_EVENT(ext4_load_inode,
 		  (unsigned long) __entry->ino)
 		  (unsigned long) __entry->ino)
 );
 );
 
 
+TRACE_EVENT(ext4_journal_start,
+	TP_PROTO(struct super_block *sb, int nblocks, unsigned long IP),
+
+	TP_ARGS(sb, nblocks, IP),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	  int, 	nblocks			)
+		__field(unsigned long,	ip			)
+	),
+
+	TP_fast_assign(
+		__entry->dev	 = sb->s_dev;
+		__entry->nblocks = nblocks;
+		__entry->ip	 = IP;
+	),
+
+	TP_printk("dev %d,%d nblocks %d caller %pF",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->nblocks, (void *)__entry->ip)
+);
+
+DECLARE_EVENT_CLASS(ext4__trim,
+	TP_PROTO(struct super_block *sb,
+		 ext4_group_t group,
+		 ext4_grpblk_t start,
+		 ext4_grpblk_t len),
+
+	TP_ARGS(sb, group, start, len),
+
+	TP_STRUCT__entry(
+		__field(	int,	dev_major		)
+		__field(	int,	dev_minor		)
+		__field(	__u32, 	group			)
+		__field(	int,	start			)
+		__field(	int,	len			)
+	),
+
+	TP_fast_assign(
+		__entry->dev_major	= MAJOR(sb->s_dev);
+		__entry->dev_minor	= MINOR(sb->s_dev);
+		__entry->group		= group;
+		__entry->start		= start;
+		__entry->len		= len;
+	),
+
+	TP_printk("dev %d,%d group %u, start %d, len %d",
+		  __entry->dev_major, __entry->dev_minor,
+		  __entry->group, __entry->start, __entry->len)
+);
+
+DEFINE_EVENT(ext4__trim, ext4_trim_extent,
+
+	TP_PROTO(struct super_block *sb,
+		 ext4_group_t group,
+		 ext4_grpblk_t start,
+		 ext4_grpblk_t len),
+
+	TP_ARGS(sb, group, start, len)
+);
+
+DEFINE_EVENT(ext4__trim, ext4_trim_all_free,
+
+	TP_PROTO(struct super_block *sb,
+		 ext4_group_t group,
+		 ext4_grpblk_t start,
+		 ext4_grpblk_t len),
+
+	TP_ARGS(sb, group, start, len)
+);
+
 #endif /* _TRACE_EXT4_H */
 #endif /* _TRACE_EXT4_H */
 
 
 /* This part must be outside protection */
 /* This part must be outside protection */

+ 19 - 17
include/trace/events/jbd2.h

@@ -26,8 +26,8 @@ TRACE_EVENT(jbd2_checkpoint,
 		__entry->result		= result;
 		__entry->result		= result;
 	),
 	),
 
 
-	TP_printk("dev %s result %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->result)
+	TP_printk("dev %d,%d result %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->result)
 );
 );
 
 
 DECLARE_EVENT_CLASS(jbd2_commit,
 DECLARE_EVENT_CLASS(jbd2_commit,
@@ -48,9 +48,9 @@ DECLARE_EVENT_CLASS(jbd2_commit,
 		__entry->transaction	= commit_transaction->t_tid;
 		__entry->transaction	= commit_transaction->t_tid;
 	),
 	),
 
 
-	TP_printk("dev %s transaction %d sync %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
-		  __entry->sync_commit)
+	TP_printk("dev %d,%d transaction %d sync %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->transaction, __entry->sync_commit)
 );
 );
 
 
 DEFINE_EVENT(jbd2_commit, jbd2_start_commit,
 DEFINE_EVENT(jbd2_commit, jbd2_start_commit,
@@ -100,9 +100,9 @@ TRACE_EVENT(jbd2_end_commit,
 		__entry->head		= journal->j_tail_sequence;
 		__entry->head		= journal->j_tail_sequence;
 	),
 	),
 
 
-	TP_printk("dev %s transaction %d sync %d head %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->transaction,
-		  __entry->sync_commit, __entry->head)
+	TP_printk("dev %d,%d transaction %d sync %d head %d",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->transaction, __entry->sync_commit, __entry->head)
 );
 );
 
 
 TRACE_EVENT(jbd2_submit_inode_data,
 TRACE_EVENT(jbd2_submit_inode_data,
@@ -120,8 +120,9 @@ TRACE_EVENT(jbd2_submit_inode_data,
 		__entry->ino	= inode->i_ino;
 		__entry->ino	= inode->i_ino;
 	),
 	),
 
 
-	TP_printk("dev %s ino %lu",
-		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
+	TP_printk("dev %d,%d ino %lu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  (unsigned long) __entry->ino)
 );
 );
 
 
 TRACE_EVENT(jbd2_run_stats,
 TRACE_EVENT(jbd2_run_stats,
@@ -156,9 +157,9 @@ TRACE_EVENT(jbd2_run_stats,
 		__entry->blocks_logged	= stats->rs_blocks_logged;
 		__entry->blocks_logged	= stats->rs_blocks_logged;
 	),
 	),
 
 
-	TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u "
+	TP_printk("dev %d,%d tid %lu wait %u running %u locked %u flushing %u "
 		  "logging %u handle_count %u blocks %u blocks_logged %u",
 		  "logging %u handle_count %u blocks %u blocks_logged %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->tid,
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
 		  jiffies_to_msecs(__entry->wait),
 		  jiffies_to_msecs(__entry->wait),
 		  jiffies_to_msecs(__entry->running),
 		  jiffies_to_msecs(__entry->running),
 		  jiffies_to_msecs(__entry->locked),
 		  jiffies_to_msecs(__entry->locked),
@@ -192,9 +193,9 @@ TRACE_EVENT(jbd2_checkpoint_stats,
 		__entry->dropped	= stats->cs_dropped;
 		__entry->dropped	= stats->cs_dropped;
 	),
 	),
 
 
-	TP_printk("dev %s tid %lu chp_time %u forced_to_close %u "
+	TP_printk("dev %d,%d tid %lu chp_time %u forced_to_close %u "
 		  "written %u dropped %u",
 		  "written %u dropped %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->tid,
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid,
 		  jiffies_to_msecs(__entry->chp_time),
 		  jiffies_to_msecs(__entry->chp_time),
 		  __entry->forced_to_close, __entry->written, __entry->dropped)
 		  __entry->forced_to_close, __entry->written, __entry->dropped)
 );
 );
@@ -222,9 +223,10 @@ TRACE_EVENT(jbd2_cleanup_journal_tail,
 		__entry->freed		= freed;
 		__entry->freed		= freed;
 	),
 	),
 
 
-	TP_printk("dev %s from %u to %u offset %lu freed %lu",
-		  jbd2_dev_to_name(__entry->dev), __entry->tail_sequence,
-		  __entry->first_tid, __entry->block_nr, __entry->freed)
+	TP_printk("dev %d,%d from %u to %u offset %lu freed %lu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->tail_sequence, __entry->first_tid,
+		  __entry->block_nr, __entry->freed)
 );
 );
 
 
 #endif /* _TRACE_JBD2_H */
 #endif /* _TRACE_JBD2_H */

+ 15 - 7
kernel/debug/gdbstub.c

@@ -42,6 +42,8 @@
 /* Our I/O buffers. */
 /* Our I/O buffers. */
 static char			remcom_in_buffer[BUFMAX];
 static char			remcom_in_buffer[BUFMAX];
 static char			remcom_out_buffer[BUFMAX];
 static char			remcom_out_buffer[BUFMAX];
+static int			gdbstub_use_prev_in_buf;
+static int			gdbstub_prev_in_buf_pos;
 
 
 /* Storage for the registers, in GDB format. */
 /* Storage for the registers, in GDB format. */
 static unsigned long		gdb_regs[(NUMREGBYTES +
 static unsigned long		gdb_regs[(NUMREGBYTES +
@@ -58,6 +60,13 @@ static int gdbstub_read_wait(void)
 	int ret = -1;
 	int ret = -1;
 	int i;
 	int i;
 
 
+	if (unlikely(gdbstub_use_prev_in_buf)) {
+		if (gdbstub_prev_in_buf_pos < gdbstub_use_prev_in_buf)
+			return remcom_in_buffer[gdbstub_prev_in_buf_pos++];
+		else
+			gdbstub_use_prev_in_buf = 0;
+	}
+
 	/* poll any additional I/O interfaces that are defined */
 	/* poll any additional I/O interfaces that are defined */
 	while (ret < 0)
 	while (ret < 0)
 		for (i = 0; kdb_poll_funcs[i] != NULL; i++) {
 		for (i = 0; kdb_poll_funcs[i] != NULL; i++) {
@@ -109,7 +118,6 @@ static void get_packet(char *buffer)
 			buffer[count] = ch;
 			buffer[count] = ch;
 			count = count + 1;
 			count = count + 1;
 		}
 		}
-		buffer[count] = 0;
 
 
 		if (ch == '#') {
 		if (ch == '#') {
 			xmitcsum = hex_to_bin(gdbstub_read_wait()) << 4;
 			xmitcsum = hex_to_bin(gdbstub_read_wait()) << 4;
@@ -124,6 +132,7 @@ static void get_packet(char *buffer)
 			if (dbg_io_ops->flush)
 			if (dbg_io_ops->flush)
 				dbg_io_ops->flush();
 				dbg_io_ops->flush();
 		}
 		}
+		buffer[count] = 0;
 	} while (checksum != xmitcsum);
 	} while (checksum != xmitcsum);
 }
 }
 
 
@@ -1082,12 +1091,11 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd)
 	case 'c':
 	case 'c':
 		strcpy(remcom_in_buffer, cmd);
 		strcpy(remcom_in_buffer, cmd);
 		return 0;
 		return 0;
-	case '?':
-		gdb_cmd_status(ks);
-		break;
-	case '\0':
-		strcpy(remcom_out_buffer, "");
-		break;
+	case '$':
+		strcpy(remcom_in_buffer, cmd);
+		gdbstub_use_prev_in_buf = strlen(remcom_in_buffer);
+		gdbstub_prev_in_buf_pos = 0;
+		return 0;
 	}
 	}
 	dbg_io_ops->write_char('+');
 	dbg_io_ops->write_char('+');
 	put_packet(remcom_out_buffer);
 	put_packet(remcom_out_buffer);

+ 2 - 3
kernel/debug/kdb/kdb_bt.c

@@ -112,9 +112,8 @@ kdb_bt(int argc, const char **argv)
 	unsigned long addr;
 	unsigned long addr;
 	long offset;
 	long offset;
 
 
-	kdbgetintenv("BTARGS", &argcount);	/* Arguments to print */
-	kdbgetintenv("BTAPROMPT", &btaprompt);	/* Prompt after each
-						 * proc in bta */
+	/* Prompt after each proc in bta */
+	kdbgetintenv("BTAPROMPT", &btaprompt);
 
 
 	if (strcmp(argv[0], "bta") == 0) {
 	if (strcmp(argv[0], "bta") == 0) {
 		struct task_struct *g, *p;
 		struct task_struct *g, *p;

+ 0 - 4
kernel/debug/kdb/kdb_cmds

@@ -18,16 +18,12 @@ defcmd dumpcommon "" "Common kdb debugging"
 endefcmd
 endefcmd
 
 
 defcmd dumpall "" "First line debugging"
 defcmd dumpall "" "First line debugging"
-  set BTSYMARG 1
-  set BTARGS 9
   pid R
   pid R
   -dumpcommon
   -dumpcommon
   -bta
   -bta
 endefcmd
 endefcmd
 
 
 defcmd dumpcpu "" "Same as dumpall but only tasks on cpus"
 defcmd dumpcpu "" "Same as dumpall but only tasks on cpus"
-  set BTSYMARG 1
-  set BTARGS 9
   pid R
   pid R
   -dumpcommon
   -dumpcommon
   -btc
   -btc

+ 8 - 13
kernel/debug/kdb/kdb_debugger.c

@@ -30,6 +30,8 @@ EXPORT_SYMBOL_GPL(kdb_poll_funcs);
 int kdb_poll_idx = 1;
 int kdb_poll_idx = 1;
 EXPORT_SYMBOL_GPL(kdb_poll_idx);
 EXPORT_SYMBOL_GPL(kdb_poll_idx);
 
 
+static struct kgdb_state *kdb_ks;
+
 int kdb_stub(struct kgdb_state *ks)
 int kdb_stub(struct kgdb_state *ks)
 {
 {
 	int error = 0;
 	int error = 0;
@@ -39,6 +41,7 @@ int kdb_stub(struct kgdb_state *ks)
 	kdb_dbtrap_t db_result = KDB_DB_NOBPT;
 	kdb_dbtrap_t db_result = KDB_DB_NOBPT;
 	int i;
 	int i;
 
 
+	kdb_ks = ks;
 	if (KDB_STATE(REENTRY)) {
 	if (KDB_STATE(REENTRY)) {
 		reason = KDB_REASON_SWITCH;
 		reason = KDB_REASON_SWITCH;
 		KDB_STATE_CLEAR(REENTRY);
 		KDB_STATE_CLEAR(REENTRY);
@@ -123,20 +126,8 @@ int kdb_stub(struct kgdb_state *ks)
 	KDB_STATE_CLEAR(PAGER);
 	KDB_STATE_CLEAR(PAGER);
 	kdbnearsym_cleanup();
 	kdbnearsym_cleanup();
 	if (error == KDB_CMD_KGDB) {
 	if (error == KDB_CMD_KGDB) {
-		if (KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)) {
-	/*
-	 * This inteface glue which allows kdb to transition in into
-	 * the gdb stub.  In order to do this the '?' or '' gdb serial
-	 * packet response is processed here.  And then control is
-	 * passed to the gdbstub.
-	 */
-			if (KDB_STATE(DOING_KGDB))
-				gdbstub_state(ks, "?");
-			else
-				gdbstub_state(ks, "");
+		if (KDB_STATE(DOING_KGDB))
 			KDB_STATE_CLEAR(DOING_KGDB);
 			KDB_STATE_CLEAR(DOING_KGDB);
-			KDB_STATE_CLEAR(DOING_KGDB2);
-		}
 		return DBG_PASS_EVENT;
 		return DBG_PASS_EVENT;
 	}
 	}
 	kdb_bp_install(ks->linux_regs);
 	kdb_bp_install(ks->linux_regs);
@@ -166,3 +157,7 @@ int kdb_stub(struct kgdb_state *ks)
 	return kgdb_info[ks->cpu].ret_state;
 	return kgdb_info[ks->cpu].ret_state;
 }
 }
 
 
+void kdb_gdb_state_pass(char *buf)
+{
+	gdbstub_state(kdb_ks, buf);
+}

+ 25 - 11
kernel/debug/kdb/kdb_io.c

@@ -31,15 +31,21 @@ char kdb_prompt_str[CMD_BUFLEN];
 
 
 int kdb_trap_printk;
 int kdb_trap_printk;
 
 
-static void kgdb_transition_check(char *buffer)
+static int kgdb_transition_check(char *buffer)
 {
 {
-	int slen = strlen(buffer);
-	if (strncmp(buffer, "$?#3f", slen) != 0 &&
-	    strncmp(buffer, "$qSupported#37", slen) != 0 &&
-	    strncmp(buffer, "+$qSupported#37", slen) != 0) {
+	if (buffer[0] != '+' && buffer[0] != '$') {
 		KDB_STATE_SET(KGDB_TRANS);
 		KDB_STATE_SET(KGDB_TRANS);
 		kdb_printf("%s", buffer);
 		kdb_printf("%s", buffer);
+	} else {
+		int slen = strlen(buffer);
+		if (slen > 3 && buffer[slen - 3] == '#') {
+			kdb_gdb_state_pass(buffer);
+			strcpy(buffer, "kgdb");
+			KDB_STATE_SET(DOING_KGDB);
+			return 1;
+		}
 	}
 	}
+	return 0;
 }
 }
 
 
 static int kdb_read_get_key(char *buffer, size_t bufsize)
 static int kdb_read_get_key(char *buffer, size_t bufsize)
@@ -251,6 +257,10 @@ poll_again:
 	case 13: /* enter */
 	case 13: /* enter */
 		*lastchar++ = '\n';
 		*lastchar++ = '\n';
 		*lastchar++ = '\0';
 		*lastchar++ = '\0';
+		if (!KDB_STATE(KGDB_TRANS)) {
+			KDB_STATE_SET(KGDB_TRANS);
+			kdb_printf("%s", buffer);
+		}
 		kdb_printf("\n");
 		kdb_printf("\n");
 		return buffer;
 		return buffer;
 	case 4: /* Del */
 	case 4: /* Del */
@@ -382,22 +392,26 @@ poll_again:
 				 * printed characters if we think that
 				 * printed characters if we think that
 				 * kgdb is connecting, until the check
 				 * kgdb is connecting, until the check
 				 * fails */
 				 * fails */
-				if (!KDB_STATE(KGDB_TRANS))
-					kgdb_transition_check(buffer);
-				else
+				if (!KDB_STATE(KGDB_TRANS)) {
+					if (kgdb_transition_check(buffer))
+						return buffer;
+				} else {
 					kdb_printf("%c", key);
 					kdb_printf("%c", key);
+				}
 			}
 			}
 			/* Special escape to kgdb */
 			/* Special escape to kgdb */
 			if (lastchar - buffer >= 5 &&
 			if (lastchar - buffer >= 5 &&
 			    strcmp(lastchar - 5, "$?#3f") == 0) {
 			    strcmp(lastchar - 5, "$?#3f") == 0) {
+				kdb_gdb_state_pass(lastchar - 5);
 				strcpy(buffer, "kgdb");
 				strcpy(buffer, "kgdb");
 				KDB_STATE_SET(DOING_KGDB);
 				KDB_STATE_SET(DOING_KGDB);
 				return buffer;
 				return buffer;
 			}
 			}
-			if (lastchar - buffer >= 14 &&
-			    strcmp(lastchar - 14, "$qSupported#37") == 0) {
+			if (lastchar - buffer >= 11 &&
+			    strcmp(lastchar - 11, "$qSupported") == 0) {
+				kdb_gdb_state_pass(lastchar - 11);
 				strcpy(buffer, "kgdb");
 				strcpy(buffer, "kgdb");
-				KDB_STATE_SET(DOING_KGDB2);
+				KDB_STATE_SET(DOING_KGDB);
 				return buffer;
 				return buffer;
 			}
 			}
 		}
 		}

+ 2 - 2
kernel/debug/kdb/kdb_main.c

@@ -145,7 +145,6 @@ static char *__env[] = {
 #endif
 #endif
  "RADIX=16",
  "RADIX=16",
  "MDCOUNT=8",			/* lines of md output */
  "MDCOUNT=8",			/* lines of md output */
- "BTARGS=9",			/* 9 possible args in bt */
  KDB_PLATFORM_ENV,
  KDB_PLATFORM_ENV,
  "DTABCOUNT=30",
  "DTABCOUNT=30",
  "NOSECT=1",
  "NOSECT=1",
@@ -172,6 +171,7 @@ static char *__env[] = {
  (char *)0,
  (char *)0,
  (char *)0,
  (char *)0,
  (char *)0,
  (char *)0,
+ (char *)0,
 };
 };
 
 
 static const int __nenv = (sizeof(__env) / sizeof(char *));
 static const int __nenv = (sizeof(__env) / sizeof(char *));
@@ -1386,7 +1386,7 @@ int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error,
 		}
 		}
 
 
 		if (result == KDB_CMD_KGDB) {
 		if (result == KDB_CMD_KGDB) {
-			if (!(KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)))
+			if (!KDB_STATE(DOING_KGDB))
 				kdb_printf("Entering please attach debugger "
 				kdb_printf("Entering please attach debugger "
 					   "or use $D#44+ or $3#33\n");
 					   "or use $D#44+ or $3#33\n");
 			break;
 			break;

+ 1 - 2
kernel/debug/kdb/kdb_private.h

@@ -21,7 +21,6 @@
 #define KDB_CMD_SS	(-1003)
 #define KDB_CMD_SS	(-1003)
 #define KDB_CMD_SSB	(-1004)
 #define KDB_CMD_SSB	(-1004)
 #define KDB_CMD_KGDB (-1005)
 #define KDB_CMD_KGDB (-1005)
-#define KDB_CMD_KGDB2 (-1006)
 
 
 /* Internal debug flags */
 /* Internal debug flags */
 #define KDB_DEBUG_FLAG_BP	0x0002	/* Breakpoint subsystem debug */
 #define KDB_DEBUG_FLAG_BP	0x0002	/* Breakpoint subsystem debug */
@@ -146,7 +145,6 @@ extern int kdb_state;
 						 * keyboard on this cpu */
 						 * keyboard on this cpu */
 #define KDB_STATE_KEXEC		0x00040000	/* kexec issued */
 #define KDB_STATE_KEXEC		0x00040000	/* kexec issued */
 #define KDB_STATE_DOING_KGDB	0x00080000	/* kgdb enter now issued */
 #define KDB_STATE_DOING_KGDB	0x00080000	/* kgdb enter now issued */
-#define KDB_STATE_DOING_KGDB2	0x00100000	/* kgdb enter now issued */
 #define KDB_STATE_KGDB_TRANS	0x00200000	/* Transition to kgdb */
 #define KDB_STATE_KGDB_TRANS	0x00200000	/* Transition to kgdb */
 #define KDB_STATE_ARCH		0xff000000	/* Reserved for arch
 #define KDB_STATE_ARCH		0xff000000	/* Reserved for arch
 						 * specific use */
 						 * specific use */
@@ -218,6 +216,7 @@ extern void kdb_print_nameval(const char *name, unsigned long val);
 extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
 extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
 extern void kdb_meminfo_proc_show(void);
 extern void kdb_meminfo_proc_show(void);
 extern char *kdb_getstr(char *, size_t, char *);
 extern char *kdb_getstr(char *, size_t, char *);
+extern void kdb_gdb_state_pass(char *buf);
 
 
 /* Defines for kdb_symbol_print */
 /* Defines for kdb_symbol_print */
 #define KDB_SP_SPACEB	0x0001		/* Space before string */
 #define KDB_SP_SPACEB	0x0001		/* Space before string */

+ 3 - 1
mm/oom_kill.c

@@ -303,7 +303,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 	do_each_thread(g, p) {
 	do_each_thread(g, p) {
 		unsigned int points;
 		unsigned int points;
 
 
-		if (!p->mm)
+		if (p->exit_state)
 			continue;
 			continue;
 		if (oom_unkillable_task(p, mem, nodemask))
 		if (oom_unkillable_task(p, mem, nodemask))
 			continue;
 			continue;
@@ -319,6 +319,8 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
 		 */
 		 */
 		if (test_tsk_thread_flag(p, TIF_MEMDIE))
 		if (test_tsk_thread_flag(p, TIF_MEMDIE))
 			return ERR_PTR(-1UL);
 			return ERR_PTR(-1UL);
+		if (!p->mm)
+			continue;
 
 
 		if (p->flags & PF_EXITING) {
 		if (p->flags & PF_EXITING) {
 			/*
 			/*