Browse Source

Merge branch 'akpm' (patchbomb from Andrew)

Merge first patchbomb from Andrew Morton:
 - a few minor cifs fixes
 - dma-debug upadtes
 - ocfs2
 - slab
 - about half of MM
 - procfs
 - kernel/exit.c
 - panic.c tweaks
 - printk upates
 - lib/ updates
 - checkpatch updates
 - fs/binfmt updates
 - the drivers/rtc tree
 - nilfs
 - kmod fixes
 - more kernel/exit.c
 - various other misc tweaks and fixes

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (190 commits)
  exit: pidns: fix/update the comments in zap_pid_ns_processes()
  exit: pidns: alloc_pid() leaks pid_namespace if child_reaper is exiting
  exit: exit_notify: re-use "dead" list to autoreap current
  exit: reparent: call forget_original_parent() under tasklist_lock
  exit: reparent: avoid find_new_reaper() if no children
  exit: reparent: introduce find_alive_thread()
  exit: reparent: introduce find_child_reaper()
  exit: reparent: document the ->has_child_subreaper checks
  exit: reparent: s/while_each_thread/for_each_thread/ in find_new_reaper()
  exit: reparent: fix the cross-namespace PR_SET_CHILD_SUBREAPER reparenting
  exit: reparent: fix the dead-parent PR_SET_CHILD_SUBREAPER reparenting
  exit: proc: don't try to flush /proc/tgid/task/tgid
  exit: release_task: fix the comment about group leader accounting
  exit: wait: drop tasklist_lock before psig->c* accounting
  exit: wait: don't use zombie->real_parent
  exit: wait: cleanup the ptrace_reparented() checks
  usermodehelper: kill the kmod_thread_locker logic
  usermodehelper: don't use CLONE_VFORK for ____call_usermodehelper()
  fs/hfs/catalog.c: fix comparison bug in hfs_cat_keycmp
  nilfs2: fix the nilfs_iget() vs. nilfs_new_inode() races
  ...
Linus Torvalds 10 years ago
parent
commit
b6da0076ba
100 changed files with 2085 additions and 1586 deletions
  1. 1 1
      Documentation/cgroups/hugetlb.txt
  2. 15 11
      Documentation/cgroups/memory.txt
  3. 0 197
      Documentation/cgroups/resource_counter.txt
  4. 8 1
      Documentation/devicetree/bindings/rtc/rtc-omap.txt
  5. 1 0
      Documentation/devicetree/bindings/vendor-prefixes.txt
  6. 7 0
      Documentation/kdump/kdump.txt
  7. 3 0
      Documentation/kernel-parameters.txt
  8. 26 14
      Documentation/sysctl/kernel.txt
  9. 3 3
      MAINTAINERS
  10. 4 0
      arch/arm/boot/dts/am335x-boneblack.dts
  11. 1 1
      arch/arm/boot/dts/am33xx.dtsi
  12. 1 0
      arch/arm64/include/asm/pgtable.h
  13. 1 1
      arch/ia64/kernel/perfmon.c
  14. 1 0
      arch/powerpc/include/asm/pgtable-ppc64.h
  15. 2 2
      arch/powerpc/platforms/cell/spufs/inode.c
  16. 1 1
      arch/sh/mm/numa.c
  17. 7 0
      arch/sparc/include/asm/pgtable_64.h
  18. 13 6
      arch/tile/kernel/early_printk.c
  19. 20 25
      arch/tile/kernel/setup.c
  20. 5 0
      arch/x86/include/asm/pgtable.h
  21. 7 1
      drivers/base/Kconfig
  22. 8 0
      drivers/rtc/Kconfig
  23. 21 0
      drivers/rtc/interface.c
  24. 2 0
      drivers/rtc/rtc-ab8500.c
  25. 64 63
      drivers/rtc/rtc-ds1307.c
  26. 285 0
      drivers/rtc/rtc-ds1374.c
  27. 60 23
      drivers/rtc/rtc-isl12057.c
  28. 349 198
      drivers/rtc/rtc-omap.c
  29. 46 9
      drivers/rtc/rtc-pcf8563.c
  30. 51 15
      drivers/rtc/rtc-sirfsoc.c
  31. 36 3
      drivers/rtc/rtc-snvs.c
  32. 1 1
      drivers/usb/storage/debug.c
  33. 22 18
      fs/binfmt_elf.c
  34. 248 145
      fs/binfmt_misc.c
  35. 0 1
      fs/char_dev.c
  36. 1 1
      fs/cifs/cifsacl.c
  37. 10 10
      fs/cifs/cifssmb.c
  38. 2 2
      fs/cifs/file.c
  39. 1 1
      fs/cifs/sess.c
  40. 2 2
      fs/cifs/smb2file.c
  41. 19 19
      fs/cifs/smb2misc.c
  42. 1 1
      fs/cifs/smb2ops.c
  43. 1 1
      fs/cifs/smb2pdu.c
  44. 14 14
      fs/cifs/smb2pdu.h
  45. 1 1
      fs/file.c
  46. 8 6
      fs/hfs/catalog.c
  47. 0 1
      fs/ncpfs/ioctl.c
  48. 2 8
      fs/nilfs2/file.c
  49. 24 8
      fs/nilfs2/inode.c
  50. 12 3
      fs/nilfs2/namei.c
  51. 1 2
      fs/nilfs2/the_nilfs.c
  52. 1 1
      fs/ocfs2/aops.c
  53. 2 2
      fs/ocfs2/cluster/heartbeat.c
  54. 1 1
      fs/ocfs2/cluster/tcp.c
  55. 1 1
      fs/ocfs2/dir.c
  56. 1 1
      fs/ocfs2/dlm/dlmdomain.c
  57. 12 0
      fs/ocfs2/dlm/dlmmaster.c
  58. 13 5
      fs/ocfs2/dlm/dlmrecovery.c
  59. 31 6
      fs/ocfs2/dlmglue.c
  60. 1 3
      fs/ocfs2/file.c
  61. 1 2
      fs/ocfs2/inode.c
  62. 0 3
      fs/ocfs2/move_extents.c
  63. 6 0
      fs/ocfs2/ocfs2.h
  64. 1 1
      fs/ocfs2/slot_map.c
  65. 2 1
      fs/ocfs2/super.c
  66. 1 1
      fs/ocfs2/xattr.c
  67. 22 25
      fs/proc/array.c
  68. 3 0
      fs/proc/base.c
  69. 104 59
      fs/proc/generic.c
  70. 6 5
      fs/proc/internal.h
  71. 1 0
      fs/proc/proc_net.c
  72. 1 0
      fs/proc/root.c
  73. 68 36
      fs/proc/task_mmu.c
  74. 26 0
      include/linux/cgroup.h
  75. 6 4
      include/linux/compaction.h
  76. 0 1
      include/linux/file.h
  77. 2 2
      include/linux/gfp.h
  78. 2 1
      include/linux/hugetlb.h
  79. 0 1
      include/linux/hugetlb_cgroup.h
  80. 13 0
      include/linux/kern_levels.h
  81. 1 0
      include/linux/kernel.h
  82. 14 36
      include/linux/memcontrol.h
  83. 5 0
      include/linux/mm_types.h
  84. 0 12
      include/linux/mmzone.h
  85. 0 105
      include/linux/page_cgroup.h
  86. 51 0
      include/linux/page_counter.h
  87. 38 9
      include/linux/percpu-refcount.h
  88. 0 1
      include/linux/printk.h
  89. 1 1
      include/linux/ptrace.h
  90. 0 223
      include/linux/res_counter.h
  91. 0 4
      include/linux/slab.h
  92. 42 0
      include/linux/swap_cgroup.h
  93. 9 17
      include/net/sock.h
  94. 1 0
      include/uapi/linux/sysctl.h
  95. 29 27
      init/Kconfig
  96. 6 8
      init/main.c
  97. 0 1
      kernel/Makefile
  98. 123 122
      kernel/exit.c
  99. 5 38
      kernel/kmod.c
  100. 13 0
      kernel/panic.c

+ 1 - 1
Documentation/cgroups/hugetlb.txt

@@ -29,7 +29,7 @@ Brief summary of control files
 
  hugetlb.<hugepagesize>.limit_in_bytes     # set/show limit of "hugepagesize" hugetlb usage
  hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb  usage recorded
- hugetlb.<hugepagesize>.usage_in_bytes     # show current res_counter usage for "hugepagesize" hugetlb
+ hugetlb.<hugepagesize>.usage_in_bytes     # show current usage for "hugepagesize" hugetlb
  hugetlb.<hugepagesize>.failcnt		   # show the number of allocation failure due to HugeTLB limit
 
 For a system supporting two hugepage size (16M and 16G) the control

+ 15 - 11
Documentation/cgroups/memory.txt

@@ -1,5 +1,10 @@
 Memory Resource Controller
 
+NOTE: This document is hopelessly outdated and it asks for a complete
+      rewrite. It still contains a useful information so we are keeping it
+      here but make sure to check the current code if you need a deeper
+      understanding.
+
 NOTE: The Memory Resource Controller has generically been referred to as the
       memory controller in this document. Do not confuse memory controller
       used here with the memory controller that is used in hardware.
@@ -52,9 +57,9 @@ Brief summary of control files.
  tasks				 # attach a task(thread) and show list of threads
  cgroup.procs			 # show list of processes
  cgroup.event_control		 # an interface for event_fd()
- memory.usage_in_bytes		 # show current res_counter usage for memory
+ memory.usage_in_bytes		 # show current usage for memory
 				 (See 5.5 for details)
- memory.memsw.usage_in_bytes	 # show current res_counter usage for memory+Swap
+ memory.memsw.usage_in_bytes	 # show current usage for memory+Swap
 				 (See 5.5 for details)
  memory.limit_in_bytes		 # set/show limit of memory usage
  memory.memsw.limit_in_bytes	 # set/show limit of memory+Swap usage
@@ -116,16 +121,16 @@ The memory controller is the first controller developed.
 
 2.1. Design
 
-The core of the design is a counter called the res_counter. The res_counter
-tracks the current memory usage and limit of the group of processes associated
-with the controller. Each cgroup has a memory controller specific data
-structure (mem_cgroup) associated with it.
+The core of the design is a counter called the page_counter. The
+page_counter tracks the current memory usage and limit of the group of
+processes associated with the controller. Each cgroup has a memory controller
+specific data structure (mem_cgroup) associated with it.
 
 2.2. Accounting
 
 		+--------------------+
-		|  mem_cgroup     |
-		|  (res_counter)     |
+		|  mem_cgroup        |
+		|  (page_counter)    |
 		+--------------------+
 		 /            ^      \
 		/             |       \
@@ -352,9 +357,8 @@ set:
 0. Configuration
 
 a. Enable CONFIG_CGROUPS
-b. Enable CONFIG_RESOURCE_COUNTERS
-c. Enable CONFIG_MEMCG
-d. Enable CONFIG_MEMCG_SWAP (to use swap extension)
+b. Enable CONFIG_MEMCG
+c. Enable CONFIG_MEMCG_SWAP (to use swap extension)
 d. Enable CONFIG_MEMCG_KMEM (to use kmem extension)
 
 1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)

+ 0 - 197
Documentation/cgroups/resource_counter.txt

@@ -1,197 +0,0 @@
-
-		The Resource Counter
-
-The resource counter, declared at include/linux/res_counter.h,
-is supposed to facilitate the resource management by controllers
-by providing common stuff for accounting.
-
-This "stuff" includes the res_counter structure and routines
-to work with it.
-
-
-
-1. Crucial parts of the res_counter structure
-
- a. unsigned long long usage
-
- 	The usage value shows the amount of a resource that is consumed
-	by a group at a given time. The units of measurement should be
-	determined by the controller that uses this counter. E.g. it can
-	be bytes, items or any other unit the controller operates on.
-
- b. unsigned long long max_usage
-
- 	The maximal value of the usage over time.
-
- 	This value is useful when gathering statistical information about
-	the particular group, as it shows the actual resource requirements
-	for a particular group, not just some usage snapshot.
-
- c. unsigned long long limit
-
- 	The maximal allowed amount of resource to consume by the group. In
-	case the group requests for more resources, so that the usage value
-	would exceed the limit, the resource allocation is rejected (see
-	the next section).
-
- d. unsigned long long failcnt
-
- 	The failcnt stands for "failures counter". This is the number of
-	resource allocation attempts that failed.
-
- c. spinlock_t lock
-
- 	Protects changes of the above values.
-
-
-
-2. Basic accounting routines
-
- a. void res_counter_init(struct res_counter *rc,
-				struct res_counter *rc_parent)
-
- 	Initializes the resource counter. As usual, should be the first
-	routine called for a new counter.
-
-	The struct res_counter *parent can be used to define a hierarchical
-	child -> parent relationship directly in the res_counter structure,
-	NULL can be used to define no relationship.
-
- c. int res_counter_charge(struct res_counter *rc, unsigned long val,
-				struct res_counter **limit_fail_at)
-
-	When a resource is about to be allocated it has to be accounted
-	with the appropriate resource counter (controller should determine
-	which one to use on its own). This operation is called "charging".
-
-	This is not very important which operation - resource allocation
-	or charging - is performed first, but
-	  * if the allocation is performed first, this may create a
-	    temporary resource over-usage by the time resource counter is
-	    charged;
-	  * if the charging is performed first, then it should be uncharged
-	    on error path (if the one is called).
-
-	If the charging fails and a hierarchical dependency exists, the
-	limit_fail_at parameter is set to the particular res_counter element
-	where the charging failed.
-
- d. u64 res_counter_uncharge(struct res_counter *rc, unsigned long val)
-
-	When a resource is released (freed) it should be de-accounted
-	from the resource counter it was accounted to.  This is called
-	"uncharging". The return value of this function indicate the amount
-	of charges still present in the counter.
-
-	The _locked routines imply that the res_counter->lock is taken.
-
- e. u64 res_counter_uncharge_until
-		(struct res_counter *rc, struct res_counter *top,
-		 unsigned long val)
-
-	Almost same as res_counter_uncharge() but propagation of uncharge
-	stops when rc == top. This is useful when kill a res_counter in
-	child cgroup.
-
- 2.1 Other accounting routines
-
-    There are more routines that may help you with common needs, like
-    checking whether the limit is reached or resetting the max_usage
-    value. They are all declared in include/linux/res_counter.h.
-
-
-
-3. Analyzing the resource counter registrations
-
- a. If the failcnt value constantly grows, this means that the counter's
-    limit is too tight. Either the group is misbehaving and consumes too
-    many resources, or the configuration is not suitable for the group
-    and the limit should be increased.
-
- b. The max_usage value can be used to quickly tune the group. One may
-    set the limits to maximal values and either load the container with
-    a common pattern or leave one for a while. After this the max_usage
-    value shows the amount of memory the container would require during
-    its common activity.
-
-    Setting the limit a bit above this value gives a pretty good
-    configuration that works in most of the cases.
-
- c. If the max_usage is much less than the limit, but the failcnt value
-    is growing, then the group tries to allocate a big chunk of resource
-    at once.
-
- d. If the max_usage is much less than the limit, but the failcnt value
-    is 0, then this group is given too high limit, that it does not
-    require. It is better to lower the limit a bit leaving more resource
-    for other groups.
-
-
-
-4. Communication with the control groups subsystem (cgroups)
-
-All the resource controllers that are using cgroups and resource counters
-should provide files (in the cgroup filesystem) to work with the resource
-counter fields. They are recommended to adhere to the following rules:
-
- a. File names
-
- 	Field name	File name
-	---------------------------------------------------
-	usage		usage_in_<unit_of_measurement>
-	max_usage	max_usage_in_<unit_of_measurement>
-	limit		limit_in_<unit_of_measurement>
-	failcnt		failcnt
-	lock		no file :)
-
- b. Reading from file should show the corresponding field value in the
-    appropriate format.
-
- c. Writing to file
-
- 	Field		Expected behavior
-	----------------------------------
-	usage		prohibited
-	max_usage	reset to usage
-	limit		set the limit
-	failcnt		reset to zero
-
-
-
-5. Usage example
-
- a. Declare a task group (take a look at cgroups subsystem for this) and
-    fold a res_counter into it
-
-	struct my_group {
-		struct res_counter res;
-
-		<other fields>
-	}
-
- b. Put hooks in resource allocation/release paths
-
- 	int alloc_something(...)
-	{
-		if (res_counter_charge(res_counter_ptr, amount) < 0)
-			return -ENOMEM;
-
-		<allocate the resource and return to the caller>
-	}
-
-	void release_something(...)
-	{
-		res_counter_uncharge(res_counter_ptr, amount);
-
-		<release the resource>
-	}
-
-    In order to keep the usage value self-consistent, both the
-    "res_counter_ptr" and the "amount" in release_something() should be
-    the same as they were in the alloc_something() when the releasing
-    resource was allocated.
-
- c. Provide the way to read res_counter values and set them (the cgroups
-    still can help with it).
-
- c. Compile and run :)

+ 8 - 1
Documentation/devicetree/bindings/rtc/rtc-omap.txt

@@ -5,11 +5,17 @@ Required properties:
 	- "ti,da830-rtc"  - for RTC IP used similar to that on DA8xx SoC family.
 	- "ti,am3352-rtc" - for RTC IP used similar to that on AM335x SoC family.
 			    This RTC IP has special WAKE-EN Register to enable
-			    Wakeup generation for event Alarm.
+			    Wakeup generation for event Alarm. It can also be
+			    used to control an external PMIC via the
+			    pmic_power_en pin.
 - reg: Address range of rtc register set
 - interrupts: rtc timer, alarm interrupts in order
 - interrupt-parent: phandle for the interrupt controller
 
+Optional properties:
+- system-power-controller: whether the rtc is controlling the system power
+  through pmic_power_en
+
 Example:
 
 rtc@1c23000 {
@@ -18,4 +24,5 @@ rtc@1c23000 {
 	interrupts = <19
 		      19>;
 	interrupt-parent = <&intc>;
+	system-power-controller;
 };

+ 1 - 0
Documentation/devicetree/bindings/vendor-prefixes.txt

@@ -115,6 +115,7 @@ nxp	NXP Semiconductors
 onnn	ON Semiconductor Corp.
 opencores	OpenCores.org
 panasonic	Panasonic Corporation
+pericom	Pericom Technology Inc.
 phytec	PHYTEC Messtechnik GmbH
 picochip	Picochip Ltd
 plathome	Plat'Home Co., Ltd.

+ 7 - 0
Documentation/kdump/kdump.txt

@@ -471,6 +471,13 @@ format. Crash is available on Dave Anderson's site at the following URL:
 
    http://people.redhat.com/~anderson/
 
+Trigger Kdump on WARN()
+=======================
+
+The kernel parameter, panic_on_warn, calls panic() in all WARN() paths.  This
+will cause a kdump to occur at the panic() call.  In cases where a user wants
+to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1
+to achieve the same behaviour.
 
 Contact
 =======

+ 3 - 0
Documentation/kernel-parameters.txt

@@ -2509,6 +2509,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			timeout < 0: reboot immediately
 			Format: <timeout>
 
+	panic_on_warn	panic() instead of WARN().  Useful to cause kdump
+			on a WARN().
+
 	crash_kexec_post_notifiers
 			Run kdump after running panic-notifiers and dumping
 			kmsg. This only for the users who doubt kdump always

+ 26 - 14
Documentation/sysctl/kernel.txt

@@ -54,8 +54,9 @@ show up in /proc/sys/kernel:
 - overflowuid
 - panic
 - panic_on_oops
-- panic_on_unrecovered_nmi
 - panic_on_stackoverflow
+- panic_on_unrecovered_nmi
+- panic_on_warn
 - pid_max
 - powersave-nap               [ PPC only ]
 - printk
@@ -527,19 +528,6 @@ the recommended setting is 60.
 
 ==============================================================
 
-panic_on_unrecovered_nmi:
-
-The default Linux behaviour on an NMI of either memory or unknown is
-to continue operation. For many environments such as scientific
-computing it is preferable that the box is taken out and the error
-dealt with than an uncorrected parity/ECC error get propagated.
-
-A small number of systems do generate NMI's for bizarre random reasons
-such as power management so the default is off. That sysctl works like
-the existing panic controls already in that directory.
-
-==============================================================
-
 panic_on_oops:
 
 Controls the kernel's behaviour when an oops or BUG is encountered.
@@ -563,6 +551,30 @@ This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
 
 ==============================================================
 
+panic_on_unrecovered_nmi:
+
+The default Linux behaviour on an NMI of either memory or unknown is
+to continue operation. For many environments such as scientific
+computing it is preferable that the box is taken out and the error
+dealt with than an uncorrected parity/ECC error get propagated.
+
+A small number of systems do generate NMI's for bizarre random reasons
+such as power management so the default is off. That sysctl works like
+the existing panic controls already in that directory.
+
+==============================================================
+
+panic_on_warn:
+
+Calls panic() in the WARN() path when set to 1.  This is useful to avoid
+a kernel rebuild when attempting to kdump at the location of a WARN().
+
+0: only WARN(), default behaviour.
+
+1: call panic() after printing out WARN() location.
+
+==============================================================
+
 perf_cpu_time_max_percent:
 
 Hints to the kernel how much CPU time it should be allowed to

+ 3 - 3
MAINTAINERS

@@ -2605,7 +2605,7 @@ L:	cgroups@vger.kernel.org
 L:	linux-mm@kvack.org
 S:	Maintained
 F:	mm/memcontrol.c
-F:	mm/page_cgroup.c
+F:	mm/swap_cgroup.c
 
 CORETEMP HARDWARE MONITORING DRIVER
 M:	Fenghua Yu <fenghua.yu@intel.com>
@@ -2722,7 +2722,7 @@ F:	drivers/net/wireless/cw1200/
 
 CX18 VIDEO4LINUX DRIVER
 M:	Andy Walls <awalls@md.metrocast.net>
-L:	ivtv-devel@ivtvdriver.org (moderated for non-subscribers)
+L:	ivtv-devel@ivtvdriver.org (subscribers-only)
 L:	linux-media@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
 W:	http://linuxtv.org
@@ -5208,7 +5208,7 @@ F:	drivers/media/tuners/it913x*
 
 IVTV VIDEO4LINUX DRIVER
 M:	Andy Walls <awalls@md.metrocast.net>
-L:	ivtv-devel@ivtvdriver.org (moderated for non-subscribers)
+L:	ivtv-devel@ivtvdriver.org (subscribers-only)
 L:	linux-media@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
 W:	http://www.ivtvdriver.org

+ 4 - 0
arch/arm/boot/dts/am335x-boneblack.dts

@@ -80,3 +80,7 @@
 		status = "okay";
 	};
 };
+
+&rtc {
+	system-power-controller;
+};

+ 1 - 1
arch/arm/boot/dts/am33xx.dtsi

@@ -435,7 +435,7 @@
 		};
 
 		rtc: rtc@44e3e000 {
-			compatible = "ti,da830-rtc";
+			compatible = "ti,am3352-rtc", "ti,da830-rtc";
 			reg = <0x44e3e000 0x1000>;
 			interrupts = <75
 				      76>;

+ 1 - 0
arch/arm64/include/asm/pgtable.h

@@ -279,6 +279,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
 #endif /* CONFIG_HAVE_RCU_TABLE_FREE */
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
 #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
 #define pmd_mksplitting(pmd)	pte_pmd(pte_mkspecial(pmd_pte(pmd)))

+ 1 - 1
arch/ia64/kernel/perfmon.c

@@ -2662,7 +2662,7 @@ pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *reg
 
 	ret = -ENOMEM;
 
-	fd = get_unused_fd();
+	fd = get_unused_fd_flags(0);
 	if (fd < 0)
 		return fd;
 

+ 1 - 0
arch/powerpc/include/asm/pgtable-ppc64.h

@@ -467,6 +467,7 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 }
 
 #define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
+#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
 #define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
 #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))

+ 2 - 2
arch/powerpc/platforms/cell/spufs/inode.c

@@ -301,7 +301,7 @@ static int spufs_context_open(struct path *path)
 	int ret;
 	struct file *filp;
 
-	ret = get_unused_fd();
+	ret = get_unused_fd_flags(0);
 	if (ret < 0)
 		return ret;
 
@@ -518,7 +518,7 @@ static int spufs_gang_open(struct path *path)
 	int ret;
 	struct file *filp;
 
-	ret = get_unused_fd();
+	ret = get_unused_fd_flags(0);
 	if (ret < 0)
 		return ret;
 

+ 1 - 1
arch/sh/mm/numa.c

@@ -31,7 +31,7 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
 	unsigned long bootmem_paddr;
 
 	/* Don't allow bogus node assignment */
-	BUG_ON(nid > MAX_NUMNODES || nid <= 0);
+	BUG_ON(nid >= MAX_NUMNODES || nid <= 0);
 
 	start_pfn = start >> PAGE_SHIFT;
 	end_pfn = end >> PAGE_SHIFT;

+ 7 - 0
arch/sparc/include/asm/pgtable_64.h

@@ -667,6 +667,13 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline unsigned long pmd_dirty(pmd_t pmd)
+{
+	pte_t pte = __pte(pmd_val(pmd));
+
+	return pte_dirty(pte);
+}
+
 static inline unsigned long pmd_young(pmd_t pmd)
 {
 	pte_t pte = __pte(pmd_val(pmd));

+ 13 - 6
arch/tile/kernel/early_printk.c

@@ -43,13 +43,20 @@ static struct console early_hv_console = {
 
 void early_panic(const char *fmt, ...)
 {
-	va_list ap;
+	struct va_format vaf;
+	va_list args;
+
 	arch_local_irq_disable_all();
-	va_start(ap, fmt);
-	early_printk("Kernel panic - not syncing: ");
-	early_vprintk(fmt, ap);
-	early_printk("\n");
-	va_end(ap);
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	early_printk("Kernel panic - not syncing: %pV", &vaf);
+
+	va_end(args);
+
 	dump_stack();
 	hv_halt();
 }

+ 20 - 25
arch/tile/kernel/setup.c

@@ -534,11 +534,10 @@ static void __init setup_memory(void)
 			}
 		}
 		physpages -= dropped_pages;
-		pr_warning("Only using %ldMB memory;"
-		       " ignoring %ldMB.\n",
-		       physpages >> (20 - PAGE_SHIFT),
-		       dropped_pages >> (20 - PAGE_SHIFT));
-		pr_warning("Consider using a larger page size.\n");
+		pr_warn("Only using %ldMB memory - ignoring %ldMB\n",
+			physpages >> (20 - PAGE_SHIFT),
+			dropped_pages >> (20 - PAGE_SHIFT));
+		pr_warn("Consider using a larger page size\n");
 	}
 #endif
 
@@ -566,9 +565,8 @@ static void __init setup_memory(void)
 
 #ifndef __tilegx__
 	if (node_end_pfn[0] > MAXMEM_PFN) {
-		pr_warning("Only using %ldMB LOWMEM.\n",
-		       MAXMEM>>20);
-		pr_warning("Use a HIGHMEM enabled kernel.\n");
+		pr_warn("Only using %ldMB LOWMEM\n", MAXMEM >> 20);
+		pr_warn("Use a HIGHMEM enabled kernel\n");
 		max_low_pfn = MAXMEM_PFN;
 		max_pfn = MAXMEM_PFN;
 		node_end_pfn[0] = MAXMEM_PFN;
@@ -1112,8 +1110,8 @@ static void __init load_hv_initrd(void)
 	fd = hv_fs_findfile((HV_VirtAddr) initramfs_file);
 	if (fd == HV_ENOENT) {
 		if (set_initramfs_file) {
-			pr_warning("No such hvfs initramfs file '%s'\n",
-				   initramfs_file);
+			pr_warn("No such hvfs initramfs file '%s'\n",
+				initramfs_file);
 			return;
 		} else {
 			/* Try old backwards-compatible name. */
@@ -1126,8 +1124,8 @@ static void __init load_hv_initrd(void)
 	stat = hv_fs_fstat(fd);
 	BUG_ON(stat.size < 0);
 	if (stat.flags & HV_FS_ISDIR) {
-		pr_warning("Ignoring hvfs file '%s': it's a directory.\n",
-			   initramfs_file);
+		pr_warn("Ignoring hvfs file '%s': it's a directory\n",
+			initramfs_file);
 		return;
 	}
 	initrd = alloc_bootmem_pages(stat.size);
@@ -1185,9 +1183,8 @@ static void __init validate_hv(void)
 	HV_Topology topology = hv_inquire_topology();
 	BUG_ON(topology.coord.x != 0 || topology.coord.y != 0);
 	if (topology.width != 1 || topology.height != 1) {
-		pr_warning("Warning: booting UP kernel on %dx%d grid;"
-			   " will ignore all but first tile.\n",
-			   topology.width, topology.height);
+		pr_warn("Warning: booting UP kernel on %dx%d grid; will ignore all but first tile\n",
+			topology.width, topology.height);
 	}
 #endif
 
@@ -1208,9 +1205,8 @@ static void __init validate_hv(void)
 	 * We use a struct cpumask for this, so it must be big enough.
 	 */
 	if ((smp_height * smp_width) > nr_cpu_ids)
-		early_panic("Hypervisor %d x %d grid too big for Linux"
-			    " NR_CPUS %d\n", smp_height, smp_width,
-			    nr_cpu_ids);
+		early_panic("Hypervisor %d x %d grid too big for Linux NR_CPUS %d\n",
+			    smp_height, smp_width, nr_cpu_ids);
 #endif
 
 	/*
@@ -1265,10 +1261,9 @@ static void __init validate_va(void)
 
 	/* Kernel PCs must have their high bit set; see intvec.S. */
 	if ((long)VMALLOC_START >= 0)
-		early_panic(
-			"Linux VMALLOC region below the 2GB line (%#lx)!\n"
-			"Reconfigure the kernel with smaller VMALLOC_RESERVE.\n",
-			VMALLOC_START);
+		early_panic("Linux VMALLOC region below the 2GB line (%#lx)!\n"
+			    "Reconfigure the kernel with smaller VMALLOC_RESERVE\n",
+			    VMALLOC_START);
 #endif
 }
 
@@ -1395,7 +1390,7 @@ static void __init setup_cpu_maps(void)
 
 static int __init dataplane(char *str)
 {
-	pr_warning("WARNING: dataplane support disabled in this kernel\n");
+	pr_warn("WARNING: dataplane support disabled in this kernel\n");
 	return 0;
 }
 
@@ -1413,8 +1408,8 @@ void __init setup_arch(char **cmdline_p)
 	len = hv_get_command_line((HV_VirtAddr) boot_command_line,
 				  COMMAND_LINE_SIZE);
 	if (boot_command_line[0])
-		pr_warning("WARNING: ignoring dynamic command line \"%s\"\n",
-			   boot_command_line);
+		pr_warn("WARNING: ignoring dynamic command line \"%s\"\n",
+			boot_command_line);
 	strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
 #else
 	char *hv_cmdline;

+ 5 - 0
arch/x86/include/asm/pgtable.h

@@ -100,6 +100,11 @@ static inline int pte_young(pte_t pte)
 	return pte_flags(pte) & _PAGE_ACCESSED;
 }
 
+static inline int pmd_dirty(pmd_t pmd)
+{
+	return pmd_flags(pmd) & _PAGE_DIRTY;
+}
+
 static inline int pmd_young(pmd_t pmd)
 {
 	return pmd_flags(pmd) & _PAGE_ACCESSED;

+ 7 - 1
drivers/base/Kconfig

@@ -267,18 +267,24 @@ comment "Default contiguous memory area size:"
 config CMA_SIZE_MBYTES
 	int "Size in Mega Bytes"
 	depends on !CMA_SIZE_SEL_PERCENTAGE
+	default 0 if X86
 	default 16
 	help
 	  Defines the size (in MiB) of the default memory area for Contiguous
-	  Memory Allocator.
+	  Memory Allocator.  If the size of 0 is selected, CMA is disabled by
+	  default, but it can be enabled by passing cma=size[MG] to the kernel.
+
 
 config CMA_SIZE_PERCENTAGE
 	int "Percentage of total memory"
 	depends on !CMA_SIZE_SEL_MBYTES
+	default 0 if X86
 	default 10
 	help
 	  Defines the size of the default memory area for Contiguous Memory
 	  Allocator as a percentage of the total memory in the system.
+	  If 0 percent is selected, CMA is disabled by default, but it can be
+	  enabled by passing cma=size[MG] to the kernel.
 
 choice
 	prompt "Selected region size"

+ 8 - 0
drivers/rtc/Kconfig

@@ -192,6 +192,14 @@ config RTC_DRV_DS1374
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-ds1374.
 
+config RTC_DRV_DS1374_WDT
+	bool "Dallas/Maxim DS1374 watchdog timer"
+	depends on RTC_DRV_DS1374
+	help
+	  If you say Y here you will get support for the
+	  watchdog timer in the Dallas Semiconductor DS1374
+	  real-time clock chips.
+
 config RTC_DRV_DS1672
 	tristate "Dallas/Maxim DS1672"
 	help

+ 21 - 0
drivers/rtc/interface.c

@@ -30,6 +30,14 @@ static int __rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm)
 	else {
 		memset(tm, 0, sizeof(struct rtc_time));
 		err = rtc->ops->read_time(rtc->dev.parent, tm);
+		if (err < 0) {
+			dev_err(&rtc->dev, "read_time: fail to read\n");
+			return err;
+		}
+
+		err = rtc_valid_tm(tm);
+		if (err < 0)
+			dev_err(&rtc->dev, "read_time: rtc_time isn't valid\n");
 	}
 	return err;
 }
@@ -891,11 +899,24 @@ again:
 	if (next) {
 		struct rtc_wkalrm alarm;
 		int err;
+		int retry = 3;
+
 		alarm.time = rtc_ktime_to_tm(next->expires);
 		alarm.enabled = 1;
+reprogram:
 		err = __rtc_set_alarm(rtc, &alarm);
 		if (err == -ETIME)
 			goto again;
+		else if (err) {
+			if (retry-- > 0)
+				goto reprogram;
+
+			timer = container_of(next, struct rtc_timer, node);
+			timerqueue_del(&rtc->timerqueue, &timer->node);
+			timer->enabled = 0;
+			dev_err(&rtc->dev, "__rtc_set_alarm: err=%d\n", err);
+			goto again;
+		}
 	} else
 		rtc_alarm_disable(rtc);
 

+ 2 - 0
drivers/rtc/rtc-ab8500.c

@@ -504,6 +504,8 @@ static int ab8500_rtc_probe(struct platform_device *pdev)
 		return err;
 	}
 
+	rtc->uie_unsupported = 1;
+
 	return 0;
 }
 

+ 64 - 63
drivers/rtc/rtc-ds1307.c

@@ -35,7 +35,7 @@ enum ds_type {
 	ds_1388,
 	ds_3231,
 	m41t00,
-	mcp7941x,
+	mcp794xx,
 	rx_8025,
 	last_ds_type /* always last */
 	/* rs5c372 too?  different address... */
@@ -46,7 +46,7 @@ enum ds_type {
 #define DS1307_REG_SECS		0x00	/* 00-59 */
 #	define DS1307_BIT_CH		0x80
 #	define DS1340_BIT_nEOSC		0x80
-#	define MCP7941X_BIT_ST		0x80
+#	define MCP794XX_BIT_ST		0x80
 #define DS1307_REG_MIN		0x01	/* 00-59 */
 #define DS1307_REG_HOUR		0x02	/* 00-23, or 1-12{am,pm} */
 #	define DS1307_BIT_12HR		0x40	/* in REG_HOUR */
@@ -54,7 +54,7 @@ enum ds_type {
 #	define DS1340_BIT_CENTURY_EN	0x80	/* in REG_HOUR */
 #	define DS1340_BIT_CENTURY	0x40	/* in REG_HOUR */
 #define DS1307_REG_WDAY		0x03	/* 01-07 */
-#	define MCP7941X_BIT_VBATEN	0x08
+#	define MCP794XX_BIT_VBATEN	0x08
 #define DS1307_REG_MDAY		0x04	/* 01-31 */
 #define DS1307_REG_MONTH	0x05	/* 01-12 */
 #	define DS1337_BIT_CENTURY	0x80	/* in REG_MONTH */
@@ -159,7 +159,7 @@ static struct chip_desc chips[last_ds_type] = {
 	[ds_3231] = {
 		.alarm		= 1,
 	},
-	[mcp7941x] = {
+	[mcp794xx] = {
 		.alarm		= 1,
 		/* this is battery backed SRAM */
 		.nvram_offset	= 0x20,
@@ -176,7 +176,8 @@ static const struct i2c_device_id ds1307_id[] = {
 	{ "ds1340", ds_1340 },
 	{ "ds3231", ds_3231 },
 	{ "m41t00", m41t00 },
-	{ "mcp7941x", mcp7941x },
+	{ "mcp7940x", mcp794xx },
+	{ "mcp7941x", mcp794xx },
 	{ "pt7c4338", ds_1307 },
 	{ "rx8025", rx_8025 },
 	{ }
@@ -439,14 +440,14 @@ static int ds1307_set_time(struct device *dev, struct rtc_time *t)
 		buf[DS1307_REG_HOUR] |= DS1340_BIT_CENTURY_EN
 				| DS1340_BIT_CENTURY;
 		break;
-	case mcp7941x:
+	case mcp794xx:
 		/*
 		 * these bits were cleared when preparing the date/time
 		 * values and need to be set again before writing the
 		 * buffer out to the device.
 		 */
-		buf[DS1307_REG_SECS] |= MCP7941X_BIT_ST;
-		buf[DS1307_REG_WDAY] |= MCP7941X_BIT_VBATEN;
+		buf[DS1307_REG_SECS] |= MCP794XX_BIT_ST;
+		buf[DS1307_REG_WDAY] |= MCP794XX_BIT_VBATEN;
 		break;
 	default:
 		break;
@@ -614,26 +615,26 @@ static const struct rtc_class_ops ds13xx_rtc_ops = {
 /*----------------------------------------------------------------------*/
 
 /*
- * Alarm support for mcp7941x devices.
+ * Alarm support for mcp794xx devices.
  */
 
-#define MCP7941X_REG_CONTROL		0x07
-#	define MCP7941X_BIT_ALM0_EN	0x10
-#	define MCP7941X_BIT_ALM1_EN	0x20
-#define MCP7941X_REG_ALARM0_BASE	0x0a
-#define MCP7941X_REG_ALARM0_CTRL	0x0d
-#define MCP7941X_REG_ALARM1_BASE	0x11
-#define MCP7941X_REG_ALARM1_CTRL	0x14
-#	define MCP7941X_BIT_ALMX_IF	(1 << 3)
-#	define MCP7941X_BIT_ALMX_C0	(1 << 4)
-#	define MCP7941X_BIT_ALMX_C1	(1 << 5)
-#	define MCP7941X_BIT_ALMX_C2	(1 << 6)
-#	define MCP7941X_BIT_ALMX_POL	(1 << 7)
-#	define MCP7941X_MSK_ALMX_MATCH	(MCP7941X_BIT_ALMX_C0 | \
-					 MCP7941X_BIT_ALMX_C1 | \
-					 MCP7941X_BIT_ALMX_C2)
-
-static void mcp7941x_work(struct work_struct *work)
+#define MCP794XX_REG_CONTROL		0x07
+#	define MCP794XX_BIT_ALM0_EN	0x10
+#	define MCP794XX_BIT_ALM1_EN	0x20
+#define MCP794XX_REG_ALARM0_BASE	0x0a
+#define MCP794XX_REG_ALARM0_CTRL	0x0d
+#define MCP794XX_REG_ALARM1_BASE	0x11
+#define MCP794XX_REG_ALARM1_CTRL	0x14
+#	define MCP794XX_BIT_ALMX_IF	(1 << 3)
+#	define MCP794XX_BIT_ALMX_C0	(1 << 4)
+#	define MCP794XX_BIT_ALMX_C1	(1 << 5)
+#	define MCP794XX_BIT_ALMX_C2	(1 << 6)
+#	define MCP794XX_BIT_ALMX_POL	(1 << 7)
+#	define MCP794XX_MSK_ALMX_MATCH	(MCP794XX_BIT_ALMX_C0 | \
+					 MCP794XX_BIT_ALMX_C1 | \
+					 MCP794XX_BIT_ALMX_C2)
+
+static void mcp794xx_work(struct work_struct *work)
 {
 	struct ds1307 *ds1307 = container_of(work, struct ds1307, work);
 	struct i2c_client *client = ds1307->client;
@@ -642,22 +643,22 @@ static void mcp7941x_work(struct work_struct *work)
 	mutex_lock(&ds1307->rtc->ops_lock);
 
 	/* Check and clear alarm 0 interrupt flag. */
-	reg = i2c_smbus_read_byte_data(client, MCP7941X_REG_ALARM0_CTRL);
+	reg = i2c_smbus_read_byte_data(client, MCP794XX_REG_ALARM0_CTRL);
 	if (reg < 0)
 		goto out;
-	if (!(reg & MCP7941X_BIT_ALMX_IF))
+	if (!(reg & MCP794XX_BIT_ALMX_IF))
 		goto out;
-	reg &= ~MCP7941X_BIT_ALMX_IF;
-	ret = i2c_smbus_write_byte_data(client, MCP7941X_REG_ALARM0_CTRL, reg);
+	reg &= ~MCP794XX_BIT_ALMX_IF;
+	ret = i2c_smbus_write_byte_data(client, MCP794XX_REG_ALARM0_CTRL, reg);
 	if (ret < 0)
 		goto out;
 
 	/* Disable alarm 0. */
-	reg = i2c_smbus_read_byte_data(client, MCP7941X_REG_CONTROL);
+	reg = i2c_smbus_read_byte_data(client, MCP794XX_REG_CONTROL);
 	if (reg < 0)
 		goto out;
-	reg &= ~MCP7941X_BIT_ALM0_EN;
-	ret = i2c_smbus_write_byte_data(client, MCP7941X_REG_CONTROL, reg);
+	reg &= ~MCP794XX_BIT_ALM0_EN;
+	ret = i2c_smbus_write_byte_data(client, MCP794XX_REG_CONTROL, reg);
 	if (ret < 0)
 		goto out;
 
@@ -669,7 +670,7 @@ out:
 	mutex_unlock(&ds1307->rtc->ops_lock);
 }
 
-static int mcp7941x_read_alarm(struct device *dev, struct rtc_wkalrm *t)
+static int mcp794xx_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct ds1307 *ds1307 = i2c_get_clientdata(client);
@@ -680,11 +681,11 @@ static int mcp7941x_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 		return -EINVAL;
 
 	/* Read control and alarm 0 registers. */
-	ret = ds1307->read_block_data(client, MCP7941X_REG_CONTROL, 10, regs);
+	ret = ds1307->read_block_data(client, MCP794XX_REG_CONTROL, 10, regs);
 	if (ret < 0)
 		return ret;
 
-	t->enabled = !!(regs[0] & MCP7941X_BIT_ALM0_EN);
+	t->enabled = !!(regs[0] & MCP794XX_BIT_ALM0_EN);
 
 	/* Report alarm 0 time assuming 24-hour and day-of-month modes. */
 	t->time.tm_sec = bcd2bin(ds1307->regs[3] & 0x7f);
@@ -701,14 +702,14 @@ static int mcp7941x_read_alarm(struct device *dev, struct rtc_wkalrm *t)
 		"enabled=%d polarity=%d irq=%d match=%d\n", __func__,
 		t->time.tm_sec, t->time.tm_min, t->time.tm_hour,
 		t->time.tm_wday, t->time.tm_mday, t->time.tm_mon, t->enabled,
-		!!(ds1307->regs[6] & MCP7941X_BIT_ALMX_POL),
-		!!(ds1307->regs[6] & MCP7941X_BIT_ALMX_IF),
-		(ds1307->regs[6] & MCP7941X_MSK_ALMX_MATCH) >> 4);
+		!!(ds1307->regs[6] & MCP794XX_BIT_ALMX_POL),
+		!!(ds1307->regs[6] & MCP794XX_BIT_ALMX_IF),
+		(ds1307->regs[6] & MCP794XX_MSK_ALMX_MATCH) >> 4);
 
 	return 0;
 }
 
-static int mcp7941x_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+static int mcp794xx_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct ds1307 *ds1307 = i2c_get_clientdata(client);
@@ -725,7 +726,7 @@ static int mcp7941x_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 		t->enabled, t->pending);
 
 	/* Read control and alarm 0 registers. */
-	ret = ds1307->read_block_data(client, MCP7941X_REG_CONTROL, 10, regs);
+	ret = ds1307->read_block_data(client, MCP794XX_REG_CONTROL, 10, regs);
 	if (ret < 0)
 		return ret;
 
@@ -738,23 +739,23 @@ static int mcp7941x_set_alarm(struct device *dev, struct rtc_wkalrm *t)
 	regs[8] = bin2bcd(t->time.tm_mon) + 1;
 
 	/* Clear the alarm 0 interrupt flag. */
-	regs[6] &= ~MCP7941X_BIT_ALMX_IF;
+	regs[6] &= ~MCP794XX_BIT_ALMX_IF;
 	/* Set alarm match: second, minute, hour, day, date, month. */
-	regs[6] |= MCP7941X_MSK_ALMX_MATCH;
+	regs[6] |= MCP794XX_MSK_ALMX_MATCH;
 
 	if (t->enabled)
-		regs[0] |= MCP7941X_BIT_ALM0_EN;
+		regs[0] |= MCP794XX_BIT_ALM0_EN;
 	else
-		regs[0] &= ~MCP7941X_BIT_ALM0_EN;
+		regs[0] &= ~MCP794XX_BIT_ALM0_EN;
 
-	ret = ds1307->write_block_data(client, MCP7941X_REG_CONTROL, 10, regs);
+	ret = ds1307->write_block_data(client, MCP794XX_REG_CONTROL, 10, regs);
 	if (ret < 0)
 		return ret;
 
 	return 0;
 }
 
-static int mcp7941x_alarm_irq_enable(struct device *dev, unsigned int enabled)
+static int mcp794xx_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct ds1307 *ds1307 = i2c_get_clientdata(client);
@@ -763,24 +764,24 @@ static int mcp7941x_alarm_irq_enable(struct device *dev, unsigned int enabled)
 	if (!test_bit(HAS_ALARM, &ds1307->flags))
 		return -EINVAL;
 
-	reg = i2c_smbus_read_byte_data(client, MCP7941X_REG_CONTROL);
+	reg = i2c_smbus_read_byte_data(client, MCP794XX_REG_CONTROL);
 	if (reg < 0)
 		return reg;
 
 	if (enabled)
-		reg |= MCP7941X_BIT_ALM0_EN;
+		reg |= MCP794XX_BIT_ALM0_EN;
 	else
-		reg &= ~MCP7941X_BIT_ALM0_EN;
+		reg &= ~MCP794XX_BIT_ALM0_EN;
 
-	return i2c_smbus_write_byte_data(client, MCP7941X_REG_CONTROL, reg);
+	return i2c_smbus_write_byte_data(client, MCP794XX_REG_CONTROL, reg);
 }
 
-static const struct rtc_class_ops mcp7941x_rtc_ops = {
+static const struct rtc_class_ops mcp794xx_rtc_ops = {
 	.read_time	= ds1307_get_time,
 	.set_time	= ds1307_set_time,
-	.read_alarm	= mcp7941x_read_alarm,
-	.set_alarm	= mcp7941x_set_alarm,
-	.alarm_irq_enable = mcp7941x_alarm_irq_enable,
+	.read_alarm	= mcp794xx_read_alarm,
+	.set_alarm	= mcp794xx_set_alarm,
+	.alarm_irq_enable = mcp794xx_alarm_irq_enable,
 };
 
 /*----------------------------------------------------------------------*/
@@ -1049,10 +1050,10 @@ static int ds1307_probe(struct i2c_client *client,
 	case ds_1388:
 		ds1307->offset = 1; /* Seconds starts at 1 */
 		break;
-	case mcp7941x:
-		rtc_ops = &mcp7941x_rtc_ops;
+	case mcp794xx:
+		rtc_ops = &mcp794xx_rtc_ops;
 		if (ds1307->client->irq > 0 && chip->alarm) {
-			INIT_WORK(&ds1307->work, mcp7941x_work);
+			INIT_WORK(&ds1307->work, mcp794xx_work);
 			want_irq = true;
 		}
 		break;
@@ -1117,18 +1118,18 @@ read_rtc:
 			dev_warn(&client->dev, "SET TIME!\n");
 		}
 		break;
-	case mcp7941x:
+	case mcp794xx:
 		/* make sure that the backup battery is enabled */
-		if (!(ds1307->regs[DS1307_REG_WDAY] & MCP7941X_BIT_VBATEN)) {
+		if (!(ds1307->regs[DS1307_REG_WDAY] & MCP794XX_BIT_VBATEN)) {
 			i2c_smbus_write_byte_data(client, DS1307_REG_WDAY,
 					ds1307->regs[DS1307_REG_WDAY]
-					| MCP7941X_BIT_VBATEN);
+					| MCP794XX_BIT_VBATEN);
 		}
 
 		/* clock halted?  turn it on, so clock can tick. */
-		if (!(tmp & MCP7941X_BIT_ST)) {
+		if (!(tmp & MCP794XX_BIT_ST)) {
 			i2c_smbus_write_byte_data(client, DS1307_REG_SECS,
-					MCP7941X_BIT_ST);
+					MCP794XX_BIT_ST);
 			dev_warn(&client->dev, "SET TIME!\n");
 			goto read_rtc;
 		}

+ 285 - 0
drivers/rtc/rtc-ds1374.c

@@ -4,6 +4,7 @@
  * Based on code by Randy Vinson <rvinson@mvista.com>,
  * which was based on the m41t00.c by Mark Greer <mgreer@mvista.com>.
  *
+ * Copyright (C) 2014 Rose Technology
  * Copyright (C) 2006-2007 Freescale Semiconductor
  *
  * 2005 (c) MontaVista Software, Inc. This file is licensed under
@@ -26,6 +27,13 @@
 #include <linux/workqueue.h>
 #include <linux/slab.h>
 #include <linux/pm.h>
+#ifdef CONFIG_RTC_DRV_DS1374_WDT
+#include <linux/fs.h>
+#include <linux/ioctl.h>
+#include <linux/miscdevice.h>
+#include <linux/reboot.h>
+#include <linux/watchdog.h>
+#endif
 
 #define DS1374_REG_TOD0		0x00 /* Time of Day */
 #define DS1374_REG_TOD1		0x01
@@ -49,6 +57,14 @@ static const struct i2c_device_id ds1374_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, ds1374_id);
 
+#ifdef CONFIG_OF
+static const struct of_device_id ds1374_of_match[] = {
+	{ .compatible = "dallas,ds1374" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ds1374_of_match);
+#endif
+
 struct ds1374 {
 	struct i2c_client *client;
 	struct rtc_device *rtc;
@@ -162,6 +178,7 @@ static int ds1374_set_time(struct device *dev, struct rtc_time *time)
 	return ds1374_write_rtc(client, itime, DS1374_REG_TOD0, 4);
 }
 
+#ifndef CONFIG_RTC_DRV_DS1374_WDT
 /* The ds1374 has a decrementer for an alarm, rather than a comparator.
  * If the time of day is changed, then the alarm will need to be
  * reset.
@@ -263,6 +280,7 @@ out:
 	mutex_unlock(&ds1374->mutex);
 	return ret;
 }
+#endif
 
 static irqreturn_t ds1374_irq(int irq, void *dev_id)
 {
@@ -307,6 +325,7 @@ unlock:
 	mutex_unlock(&ds1374->mutex);
 }
 
+#ifndef CONFIG_RTC_DRV_DS1374_WDT
 static int ds1374_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
 	struct i2c_client *client = to_i2c_client(dev);
@@ -331,15 +350,260 @@ out:
 	mutex_unlock(&ds1374->mutex);
 	return ret;
 }
+#endif
 
 static const struct rtc_class_ops ds1374_rtc_ops = {
 	.read_time = ds1374_read_time,
 	.set_time = ds1374_set_time,
+#ifndef CONFIG_RTC_DRV_DS1374_WDT
 	.read_alarm = ds1374_read_alarm,
 	.set_alarm = ds1374_set_alarm,
 	.alarm_irq_enable = ds1374_alarm_irq_enable,
+#endif
+};
+
+#ifdef CONFIG_RTC_DRV_DS1374_WDT
+/*
+ *****************************************************************************
+ *
+ * Watchdog Driver
+ *
+ *****************************************************************************
+ */
+static struct i2c_client *save_client;
+/* Default margin */
+#define WD_TIMO 131762
+
+#define DRV_NAME "DS1374 Watchdog"
+
+static int wdt_margin = WD_TIMO;
+static unsigned long wdt_is_open;
+module_param(wdt_margin, int, 0);
+MODULE_PARM_DESC(wdt_margin, "Watchdog timeout in seconds (default 32s)");
+
+static const struct watchdog_info ds1374_wdt_info = {
+	.identity       = "DS1374 WTD",
+	.options        = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING |
+						WDIOF_MAGICCLOSE,
 };
 
+static int ds1374_wdt_settimeout(unsigned int timeout)
+{
+	int ret = -ENOIOCTLCMD;
+	int cr;
+
+	ret = cr = i2c_smbus_read_byte_data(save_client, DS1374_REG_CR);
+	if (ret < 0)
+		goto out;
+
+	/* Disable any existing watchdog/alarm before setting the new one */
+	cr &= ~DS1374_REG_CR_WACE;
+
+	ret = i2c_smbus_write_byte_data(save_client, DS1374_REG_CR, cr);
+	if (ret < 0)
+		goto out;
+
+	/* Set new watchdog time */
+	ret = ds1374_write_rtc(save_client, timeout, DS1374_REG_WDALM0, 3);
+	if (ret) {
+		pr_info("rtc-ds1374 - couldn't set new watchdog time\n");
+		goto out;
+	}
+
+	/* Enable watchdog timer */
+	cr |= DS1374_REG_CR_WACE | DS1374_REG_CR_WDALM;
+	cr &= ~DS1374_REG_CR_AIE;
+
+	ret = i2c_smbus_write_byte_data(save_client, DS1374_REG_CR, cr);
+	if (ret < 0)
+		goto out;
+
+	return 0;
+out:
+	return ret;
+}
+
+
+/*
+ * Reload the watchdog timer.  (ie, pat the watchdog)
+ */
+static void ds1374_wdt_ping(void)
+{
+	u32 val;
+	int ret = 0;
+
+	ret = ds1374_read_rtc(save_client, &val, DS1374_REG_WDALM0, 3);
+	if (ret)
+		pr_info("WD TICK FAIL!!!!!!!!!! %i\n", ret);
+}
+
+static void ds1374_wdt_disable(void)
+{
+	int ret = -ENOIOCTLCMD;
+	int cr;
+
+	cr = i2c_smbus_read_byte_data(save_client, DS1374_REG_CR);
+	/* Disable watchdog timer */
+	cr &= ~DS1374_REG_CR_WACE;
+
+	ret = i2c_smbus_write_byte_data(save_client, DS1374_REG_CR, cr);
+}
+
+/*
+ * Watchdog device is opened, and watchdog starts running.
+ */
+static int ds1374_wdt_open(struct inode *inode, struct file *file)
+{
+	struct ds1374 *ds1374 = i2c_get_clientdata(save_client);
+
+	if (MINOR(inode->i_rdev) == WATCHDOG_MINOR) {
+		mutex_lock(&ds1374->mutex);
+		if (test_and_set_bit(0, &wdt_is_open)) {
+			mutex_unlock(&ds1374->mutex);
+			return -EBUSY;
+		}
+		/*
+		 *      Activate
+		 */
+		wdt_is_open = 1;
+		mutex_unlock(&ds1374->mutex);
+		return nonseekable_open(inode, file);
+	}
+	return -ENODEV;
+}
+
+/*
+ * Close the watchdog device.
+ */
+static int ds1374_wdt_release(struct inode *inode, struct file *file)
+{
+	if (MINOR(inode->i_rdev) == WATCHDOG_MINOR)
+		clear_bit(0, &wdt_is_open);
+
+	return 0;
+}
+
+/*
+ * Pat the watchdog whenever device is written to.
+ */
+static ssize_t ds1374_wdt_write(struct file *file, const char __user *data,
+				size_t len, loff_t *ppos)
+{
+	if (len) {
+		ds1374_wdt_ping();
+		return 1;
+	}
+	return 0;
+}
+
+static ssize_t ds1374_wdt_read(struct file *file, char __user *data,
+				size_t len, loff_t *ppos)
+{
+	return 0;
+}
+
+/*
+ * Handle commands from user-space.
+ */
+static long ds1374_wdt_ioctl(struct file *file, unsigned int cmd,
+							unsigned long arg)
+{
+	int new_margin, options;
+
+	switch (cmd) {
+	case WDIOC_GETSUPPORT:
+		return copy_to_user((struct watchdog_info __user *)arg,
+		&ds1374_wdt_info, sizeof(ds1374_wdt_info)) ? -EFAULT : 0;
+
+	case WDIOC_GETSTATUS:
+	case WDIOC_GETBOOTSTATUS:
+		return put_user(0, (int __user *)arg);
+	case WDIOC_KEEPALIVE:
+		ds1374_wdt_ping();
+		return 0;
+	case WDIOC_SETTIMEOUT:
+		if (get_user(new_margin, (int __user *)arg))
+			return -EFAULT;
+
+		if (new_margin < 1 || new_margin > 16777216)
+			return -EINVAL;
+
+		wdt_margin = new_margin;
+		ds1374_wdt_settimeout(new_margin);
+		ds1374_wdt_ping();
+		/* fallthrough */
+	case WDIOC_GETTIMEOUT:
+		return put_user(wdt_margin, (int __user *)arg);
+	case WDIOC_SETOPTIONS:
+		if (copy_from_user(&options, (int __user *)arg, sizeof(int)))
+			return -EFAULT;
+
+		if (options & WDIOS_DISABLECARD) {
+			pr_info("rtc-ds1374: disable watchdog\n");
+			ds1374_wdt_disable();
+		}
+
+		if (options & WDIOS_ENABLECARD) {
+			pr_info("rtc-ds1374: enable watchdog\n");
+			ds1374_wdt_settimeout(wdt_margin);
+			ds1374_wdt_ping();
+		}
+
+		return -EINVAL;
+	}
+	return -ENOTTY;
+}
+
+static long ds1374_wdt_unlocked_ioctl(struct file *file, unsigned int cmd,
+			unsigned long arg)
+{
+	int ret;
+	struct ds1374 *ds1374 = i2c_get_clientdata(save_client);
+
+	mutex_lock(&ds1374->mutex);
+	ret = ds1374_wdt_ioctl(file, cmd, arg);
+	mutex_unlock(&ds1374->mutex);
+
+	return ret;
+}
+
+static int ds1374_wdt_notify_sys(struct notifier_block *this,
+			unsigned long code, void *unused)
+{
+	if (code == SYS_DOWN || code == SYS_HALT)
+		/* Disable Watchdog */
+		ds1374_wdt_disable();
+	return NOTIFY_DONE;
+}
+
+static const struct file_operations ds1374_wdt_fops = {
+	.owner			= THIS_MODULE,
+	.read			= ds1374_wdt_read,
+	.unlocked_ioctl		= ds1374_wdt_unlocked_ioctl,
+	.write			= ds1374_wdt_write,
+	.open                   = ds1374_wdt_open,
+	.release                = ds1374_wdt_release,
+	.llseek			= no_llseek,
+};
+
+static struct miscdevice ds1374_miscdev = {
+	.minor          = WATCHDOG_MINOR,
+	.name           = "watchdog",
+	.fops           = &ds1374_wdt_fops,
+};
+
+static struct notifier_block ds1374_wdt_notifier = {
+	.notifier_call = ds1374_wdt_notify_sys,
+};
+
+#endif /*CONFIG_RTC_DRV_DS1374_WDT*/
+/*
+ *****************************************************************************
+ *
+ *	Driver Interface
+ *
+ *****************************************************************************
+ */
 static int ds1374_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
@@ -378,12 +642,33 @@ static int ds1374_probe(struct i2c_client *client,
 		return PTR_ERR(ds1374->rtc);
 	}
 
+#ifdef CONFIG_RTC_DRV_DS1374_WDT
+	save_client = client;
+	ret = misc_register(&ds1374_miscdev);
+	if (ret)
+		return ret;
+	ret = register_reboot_notifier(&ds1374_wdt_notifier);
+	if (ret) {
+		misc_deregister(&ds1374_miscdev);
+		return ret;
+	}
+	ds1374_wdt_settimeout(131072);
+#endif
+
 	return 0;
 }
 
 static int ds1374_remove(struct i2c_client *client)
 {
 	struct ds1374 *ds1374 = i2c_get_clientdata(client);
+#ifdef CONFIG_RTC_DRV_DS1374_WDT
+	int res;
+
+	res = misc_deregister(&ds1374_miscdev);
+	if (!res)
+		ds1374_miscdev.parent = NULL;
+	unregister_reboot_notifier(&ds1374_wdt_notifier);
+#endif
 
 	if (client->irq > 0) {
 		mutex_lock(&ds1374->mutex);

+ 60 - 23
drivers/rtc/rtc-isl12057.c

@@ -41,6 +41,7 @@
 #define ISL12057_REG_RTC_DW	0x03	/* Day of the Week */
 #define ISL12057_REG_RTC_DT	0x04	/* Date */
 #define ISL12057_REG_RTC_MO	0x05	/* Month */
+#define ISL12057_REG_RTC_MO_CEN	BIT(7)	/* Century bit */
 #define ISL12057_REG_RTC_YR	0x06	/* Year */
 #define ISL12057_RTC_SEC_LEN	7
 
@@ -88,7 +89,7 @@ static void isl12057_rtc_regs_to_tm(struct rtc_time *tm, u8 *regs)
 	tm->tm_min = bcd2bin(regs[ISL12057_REG_RTC_MN]);
 
 	if (regs[ISL12057_REG_RTC_HR] & ISL12057_REG_RTC_HR_MIL) { /* AM/PM */
-		tm->tm_hour = bcd2bin(regs[ISL12057_REG_RTC_HR] & 0x0f);
+		tm->tm_hour = bcd2bin(regs[ISL12057_REG_RTC_HR] & 0x1f);
 		if (regs[ISL12057_REG_RTC_HR] & ISL12057_REG_RTC_HR_PM)
 			tm->tm_hour += 12;
 	} else {					    /* 24 hour mode */
@@ -97,26 +98,37 @@ static void isl12057_rtc_regs_to_tm(struct rtc_time *tm, u8 *regs)
 
 	tm->tm_mday = bcd2bin(regs[ISL12057_REG_RTC_DT]);
 	tm->tm_wday = bcd2bin(regs[ISL12057_REG_RTC_DW]) - 1; /* starts at 1 */
-	tm->tm_mon  = bcd2bin(regs[ISL12057_REG_RTC_MO]) - 1; /* starts at 1 */
+	tm->tm_mon  = bcd2bin(regs[ISL12057_REG_RTC_MO] & 0x1f) - 1; /* ditto */
 	tm->tm_year = bcd2bin(regs[ISL12057_REG_RTC_YR]) + 100;
+
+	/* Check if years register has overflown from 99 to 00 */
+	if (regs[ISL12057_REG_RTC_MO] & ISL12057_REG_RTC_MO_CEN)
+		tm->tm_year += 100;
 }
 
 static int isl12057_rtc_tm_to_regs(u8 *regs, struct rtc_time *tm)
 {
+	u8 century_bit;
+
 	/*
 	 * The clock has an 8 bit wide bcd-coded register for the year.
+	 * It also has a century bit encoded in MO flag which provides
+	 * information about overflow of year register from 99 to 00.
 	 * tm_year is an offset from 1900 and we are interested in the
-	 * 2000-2099 range, so any value less than 100 is invalid.
+	 * 2000-2199 range, so any value less than 100 or larger than
+	 * 299 is invalid.
 	 */
-	if (tm->tm_year < 100)
+	if (tm->tm_year < 100 || tm->tm_year > 299)
 		return -EINVAL;
 
+	century_bit = (tm->tm_year > 199) ? ISL12057_REG_RTC_MO_CEN : 0;
+
 	regs[ISL12057_REG_RTC_SC] = bin2bcd(tm->tm_sec);
 	regs[ISL12057_REG_RTC_MN] = bin2bcd(tm->tm_min);
 	regs[ISL12057_REG_RTC_HR] = bin2bcd(tm->tm_hour); /* 24-hour format */
 	regs[ISL12057_REG_RTC_DT] = bin2bcd(tm->tm_mday);
-	regs[ISL12057_REG_RTC_MO] = bin2bcd(tm->tm_mon + 1);
-	regs[ISL12057_REG_RTC_YR] = bin2bcd(tm->tm_year - 100);
+	regs[ISL12057_REG_RTC_MO] = bin2bcd(tm->tm_mon + 1) | century_bit;
+	regs[ISL12057_REG_RTC_YR] = bin2bcd(tm->tm_year % 100);
 	regs[ISL12057_REG_RTC_DW] = bin2bcd(tm->tm_wday + 1);
 
 	return 0;
@@ -152,17 +164,33 @@ static int isl12057_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
 	struct isl12057_rtc_data *data = dev_get_drvdata(dev);
 	u8 regs[ISL12057_RTC_SEC_LEN];
+	unsigned int sr;
 	int ret;
 
 	mutex_lock(&data->lock);
+	ret = regmap_read(data->regmap, ISL12057_REG_SR, &sr);
+	if (ret) {
+		dev_err(dev, "%s: unable to read oscillator status flag (%d)\n",
+			__func__, ret);
+		goto out;
+	} else {
+		if (sr & ISL12057_REG_SR_OSF) {
+			ret = -ENODATA;
+			goto out;
+		}
+	}
+
 	ret = regmap_bulk_read(data->regmap, ISL12057_REG_RTC_SC, regs,
 			       ISL12057_RTC_SEC_LEN);
+	if (ret)
+		dev_err(dev, "%s: unable to read RTC time section (%d)\n",
+			__func__, ret);
+
+out:
 	mutex_unlock(&data->lock);
 
-	if (ret) {
-		dev_err(dev, "%s: RTC read failed\n", __func__);
+	if (ret)
 		return ret;
-	}
 
 	isl12057_rtc_regs_to_tm(tm, regs);
 
@@ -182,10 +210,24 @@ static int isl12057_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	mutex_lock(&data->lock);
 	ret = regmap_bulk_write(data->regmap, ISL12057_REG_RTC_SC, regs,
 				ISL12057_RTC_SEC_LEN);
-	mutex_unlock(&data->lock);
+	if (ret) {
+		dev_err(dev, "%s: unable to write RTC time section (%d)\n",
+			__func__, ret);
+		goto out;
+	}
 
-	if (ret)
-		dev_err(dev, "%s: RTC write failed\n", __func__);
+	/*
+	 * Now that RTC time has been updated, let's clear oscillator
+	 * failure flag, if needed.
+	 */
+	ret = regmap_update_bits(data->regmap, ISL12057_REG_SR,
+				 ISL12057_REG_SR_OSF, 0);
+	if (ret < 0)
+		dev_err(dev, "%s: unable to clear osc. failure bit (%d)\n",
+			__func__, ret);
+
+out:
+	mutex_unlock(&data->lock);
 
 	return ret;
 }
@@ -203,15 +245,8 @@ static int isl12057_check_rtc_status(struct device *dev, struct regmap *regmap)
 	ret = regmap_update_bits(regmap, ISL12057_REG_INT,
 				 ISL12057_REG_INT_EOSC, 0);
 	if (ret < 0) {
-		dev_err(dev, "Unable to enable oscillator\n");
-		return ret;
-	}
-
-	/* Clear oscillator failure bit if needed */
-	ret = regmap_update_bits(regmap, ISL12057_REG_SR,
-				 ISL12057_REG_SR_OSF, 0);
-	if (ret < 0) {
-		dev_err(dev, "Unable to clear oscillator failure bit\n");
+		dev_err(dev, "%s: unable to enable oscillator (%d)\n",
+			__func__, ret);
 		return ret;
 	}
 
@@ -219,7 +254,8 @@ static int isl12057_check_rtc_status(struct device *dev, struct regmap *regmap)
 	ret = regmap_update_bits(regmap, ISL12057_REG_SR,
 				 ISL12057_REG_SR_A1F, 0);
 	if (ret < 0) {
-		dev_err(dev, "Unable to clear alarm bit\n");
+		dev_err(dev, "%s: unable to clear alarm bit (%d)\n",
+			__func__, ret);
 		return ret;
 	}
 
@@ -253,7 +289,8 @@ static int isl12057_probe(struct i2c_client *client,
 	regmap = devm_regmap_init_i2c(client, &isl12057_rtc_regmap_config);
 	if (IS_ERR(regmap)) {
 		ret = PTR_ERR(regmap);
-		dev_err(dev, "regmap allocation failed: %d\n", ret);
+		dev_err(dev, "%s: regmap allocation failed (%d)\n",
+			__func__, ret);
 		return ret;
 	}
 

+ 349 - 198
drivers/rtc/rtc-omap.c

@@ -1,10 +1,11 @@
 /*
- * TI OMAP1 Real Time Clock interface for Linux
+ * TI OMAP Real Time Clock interface for Linux
  *
  * Copyright (C) 2003 MontaVista Software, Inc.
  * Author: George G. Davis <gdavis@mvista.com> or <source@mvista.com>
  *
  * Copyright (C) 2006 David Brownell (new RTC framework)
+ * Copyright (C) 2014 Johan Hovold <johan@kernel.org>
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -25,7 +26,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/io.h>
 
-/* The OMAP1 RTC is a year/month/day/hours/minutes/seconds BCD clock
+/*
+ * The OMAP RTC is a year/month/day/hours/minutes/seconds BCD clock
  * with century-range alarm matching, driven by the 32kHz clock.
  *
  * The main user-visible ways it differs from PC RTCs are by omitting
@@ -39,10 +41,6 @@
  * the SoC). See the BOARD-SPECIFIC CUSTOMIZATION comment.
  */
 
-#define	DRIVER_NAME			"omap_rtc"
-
-#define OMAP_RTC_BASE			0xfffb4800
-
 /* RTC registers */
 #define OMAP_RTC_SECONDS_REG		0x00
 #define OMAP_RTC_MINUTES_REG		0x04
@@ -72,6 +70,15 @@
 
 #define OMAP_RTC_IRQWAKEEN		0x7c
 
+#define OMAP_RTC_ALARM2_SECONDS_REG	0x80
+#define OMAP_RTC_ALARM2_MINUTES_REG	0x84
+#define OMAP_RTC_ALARM2_HOURS_REG	0x88
+#define OMAP_RTC_ALARM2_DAYS_REG	0x8c
+#define OMAP_RTC_ALARM2_MONTHS_REG	0x90
+#define OMAP_RTC_ALARM2_YEARS_REG	0x94
+
+#define OMAP_RTC_PMIC_REG		0x98
+
 /* OMAP_RTC_CTRL_REG bit fields: */
 #define OMAP_RTC_CTRL_SPLIT		BIT(7)
 #define OMAP_RTC_CTRL_DISABLE		BIT(6)
@@ -84,6 +91,7 @@
 
 /* OMAP_RTC_STATUS_REG bit fields: */
 #define OMAP_RTC_STATUS_POWER_UP	BIT(7)
+#define OMAP_RTC_STATUS_ALARM2		BIT(7)
 #define OMAP_RTC_STATUS_ALARM		BIT(6)
 #define OMAP_RTC_STATUS_1D_EVENT	BIT(5)
 #define OMAP_RTC_STATUS_1H_EVENT	BIT(4)
@@ -93,6 +101,7 @@
 #define OMAP_RTC_STATUS_BUSY		BIT(0)
 
 /* OMAP_RTC_INTERRUPTS_REG bit fields: */
+#define OMAP_RTC_INTERRUPTS_IT_ALARM2	BIT(4)
 #define OMAP_RTC_INTERRUPTS_IT_ALARM	BIT(3)
 #define OMAP_RTC_INTERRUPTS_IT_TIMER	BIT(2)
 
@@ -102,61 +111,82 @@
 /* OMAP_RTC_IRQWAKEEN bit fields: */
 #define OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN	BIT(1)
 
+/* OMAP_RTC_PMIC bit fields: */
+#define OMAP_RTC_PMIC_POWER_EN_EN	BIT(16)
+
 /* OMAP_RTC_KICKER values */
 #define	KICK0_VALUE			0x83e70b13
 #define	KICK1_VALUE			0x95a4f1e0
 
-#define	OMAP_RTC_HAS_KICKER		BIT(0)
-
-/*
- * Few RTC IP revisions has special WAKE-EN Register to enable Wakeup
- * generation for event Alarm.
- */
-#define	OMAP_RTC_HAS_IRQWAKEEN		BIT(1)
+struct omap_rtc_device_type {
+	bool has_32kclk_en;
+	bool has_kicker;
+	bool has_irqwakeen;
+	bool has_pmic_mode;
+	bool has_power_up_reset;
+};
 
-/*
- * Some RTC IP revisions (like those in AM335x and DRA7x) need
- * the 32KHz clock to be explicitly enabled.
- */
-#define OMAP_RTC_HAS_32KCLK_EN		BIT(2)
+struct omap_rtc {
+	struct rtc_device *rtc;
+	void __iomem *base;
+	int irq_alarm;
+	int irq_timer;
+	u8 interrupts_reg;
+	bool is_pmic_controller;
+	const struct omap_rtc_device_type *type;
+};
 
-static void __iomem	*rtc_base;
+static inline u8 rtc_read(struct omap_rtc *rtc, unsigned int reg)
+{
+	return readb(rtc->base + reg);
+}
 
-#define rtc_read(addr)		readb(rtc_base + (addr))
-#define rtc_write(val, addr)	writeb(val, rtc_base + (addr))
+static inline u32 rtc_readl(struct omap_rtc *rtc, unsigned int reg)
+{
+	return readl(rtc->base + reg);
+}
 
-#define rtc_writel(val, addr)	writel(val, rtc_base + (addr))
+static inline void rtc_write(struct omap_rtc *rtc, unsigned int reg, u8 val)
+{
+	writeb(val, rtc->base + reg);
+}
 
+static inline void rtc_writel(struct omap_rtc *rtc, unsigned int reg, u32 val)
+{
+	writel(val, rtc->base + reg);
+}
 
-/* we rely on the rtc framework to handle locking (rtc->ops_lock),
+/*
+ * We rely on the rtc framework to handle locking (rtc->ops_lock),
  * so the only other requirement is that register accesses which
  * require BUSY to be clear are made with IRQs locally disabled
  */
-static void rtc_wait_not_busy(void)
+static void rtc_wait_not_busy(struct omap_rtc *rtc)
 {
-	int	count = 0;
-	u8	status;
+	int count;
+	u8 status;
 
 	/* BUSY may stay active for 1/32768 second (~30 usec) */
 	for (count = 0; count < 50; count++) {
-		status = rtc_read(OMAP_RTC_STATUS_REG);
-		if ((status & (u8)OMAP_RTC_STATUS_BUSY) == 0)
+		status = rtc_read(rtc, OMAP_RTC_STATUS_REG);
+		if (!(status & OMAP_RTC_STATUS_BUSY))
 			break;
 		udelay(1);
 	}
 	/* now we have ~15 usec to read/write various registers */
 }
 
-static irqreturn_t rtc_irq(int irq, void *rtc)
+static irqreturn_t rtc_irq(int irq, void *dev_id)
 {
-	unsigned long		events = 0;
-	u8			irq_data;
+	struct omap_rtc	*rtc = dev_id;
+	unsigned long events = 0;
+	u8 irq_data;
 
-	irq_data = rtc_read(OMAP_RTC_STATUS_REG);
+	irq_data = rtc_read(rtc, OMAP_RTC_STATUS_REG);
 
 	/* alarm irq? */
 	if (irq_data & OMAP_RTC_STATUS_ALARM) {
-		rtc_write(OMAP_RTC_STATUS_ALARM, OMAP_RTC_STATUS_REG);
+		rtc_write(rtc, OMAP_RTC_STATUS_REG, OMAP_RTC_STATUS_ALARM);
 		events |= RTC_IRQF | RTC_AF;
 	}
 
@@ -164,23 +194,21 @@ static irqreturn_t rtc_irq(int irq, void *rtc)
 	if (irq_data & OMAP_RTC_STATUS_1S_EVENT)
 		events |= RTC_IRQF | RTC_UF;
 
-	rtc_update_irq(rtc, 1, events);
+	rtc_update_irq(rtc->rtc, 1, events);
 
 	return IRQ_HANDLED;
 }
 
 static int omap_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
 {
+	struct omap_rtc *rtc = dev_get_drvdata(dev);
 	u8 reg, irqwake_reg = 0;
-	struct platform_device *pdev = to_platform_device(dev);
-	const struct platform_device_id *id_entry =
-					platform_get_device_id(pdev);
 
 	local_irq_disable();
-	rtc_wait_not_busy();
-	reg = rtc_read(OMAP_RTC_INTERRUPTS_REG);
-	if (id_entry->driver_data & OMAP_RTC_HAS_IRQWAKEEN)
-		irqwake_reg = rtc_read(OMAP_RTC_IRQWAKEEN);
+	rtc_wait_not_busy(rtc);
+	reg = rtc_read(rtc, OMAP_RTC_INTERRUPTS_REG);
+	if (rtc->type->has_irqwakeen)
+		irqwake_reg = rtc_read(rtc, OMAP_RTC_IRQWAKEEN);
 
 	if (enabled) {
 		reg |= OMAP_RTC_INTERRUPTS_IT_ALARM;
@@ -189,10 +217,10 @@ static int omap_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
 		reg &= ~OMAP_RTC_INTERRUPTS_IT_ALARM;
 		irqwake_reg &= ~OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN;
 	}
-	rtc_wait_not_busy();
-	rtc_write(reg, OMAP_RTC_INTERRUPTS_REG);
-	if (id_entry->driver_data & OMAP_RTC_HAS_IRQWAKEEN)
-		rtc_write(irqwake_reg, OMAP_RTC_IRQWAKEEN);
+	rtc_wait_not_busy(rtc);
+	rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, reg);
+	if (rtc->type->has_irqwakeen)
+		rtc_write(rtc, OMAP_RTC_IRQWAKEEN, irqwake_reg);
 	local_irq_enable();
 
 	return 0;
@@ -230,39 +258,47 @@ static void bcd2tm(struct rtc_time *tm)
 	tm->tm_year = bcd2bin(tm->tm_year) + 100;
 }
 
+static void omap_rtc_read_time_raw(struct omap_rtc *rtc, struct rtc_time *tm)
+{
+	tm->tm_sec = rtc_read(rtc, OMAP_RTC_SECONDS_REG);
+	tm->tm_min = rtc_read(rtc, OMAP_RTC_MINUTES_REG);
+	tm->tm_hour = rtc_read(rtc, OMAP_RTC_HOURS_REG);
+	tm->tm_mday = rtc_read(rtc, OMAP_RTC_DAYS_REG);
+	tm->tm_mon = rtc_read(rtc, OMAP_RTC_MONTHS_REG);
+	tm->tm_year = rtc_read(rtc, OMAP_RTC_YEARS_REG);
+}
 
 static int omap_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
+	struct omap_rtc *rtc = dev_get_drvdata(dev);
+
 	/* we don't report wday/yday/isdst ... */
 	local_irq_disable();
-	rtc_wait_not_busy();
-
-	tm->tm_sec = rtc_read(OMAP_RTC_SECONDS_REG);
-	tm->tm_min = rtc_read(OMAP_RTC_MINUTES_REG);
-	tm->tm_hour = rtc_read(OMAP_RTC_HOURS_REG);
-	tm->tm_mday = rtc_read(OMAP_RTC_DAYS_REG);
-	tm->tm_mon = rtc_read(OMAP_RTC_MONTHS_REG);
-	tm->tm_year = rtc_read(OMAP_RTC_YEARS_REG);
-
+	rtc_wait_not_busy(rtc);
+	omap_rtc_read_time_raw(rtc, tm);
 	local_irq_enable();
 
 	bcd2tm(tm);
+
 	return 0;
 }
 
 static int omap_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
+	struct omap_rtc *rtc = dev_get_drvdata(dev);
+
 	if (tm2bcd(tm) < 0)
 		return -EINVAL;
+
 	local_irq_disable();
-	rtc_wait_not_busy();
+	rtc_wait_not_busy(rtc);
 
-	rtc_write(tm->tm_year, OMAP_RTC_YEARS_REG);
-	rtc_write(tm->tm_mon, OMAP_RTC_MONTHS_REG);
-	rtc_write(tm->tm_mday, OMAP_RTC_DAYS_REG);
-	rtc_write(tm->tm_hour, OMAP_RTC_HOURS_REG);
-	rtc_write(tm->tm_min, OMAP_RTC_MINUTES_REG);
-	rtc_write(tm->tm_sec, OMAP_RTC_SECONDS_REG);
+	rtc_write(rtc, OMAP_RTC_YEARS_REG, tm->tm_year);
+	rtc_write(rtc, OMAP_RTC_MONTHS_REG, tm->tm_mon);
+	rtc_write(rtc, OMAP_RTC_DAYS_REG, tm->tm_mday);
+	rtc_write(rtc, OMAP_RTC_HOURS_REG, tm->tm_hour);
+	rtc_write(rtc, OMAP_RTC_MINUTES_REG, tm->tm_min);
+	rtc_write(rtc, OMAP_RTC_SECONDS_REG, tm->tm_sec);
 
 	local_irq_enable();
 
@@ -271,48 +307,50 @@ static int omap_rtc_set_time(struct device *dev, struct rtc_time *tm)
 
 static int omap_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
 {
+	struct omap_rtc *rtc = dev_get_drvdata(dev);
+	u8 interrupts;
+
 	local_irq_disable();
-	rtc_wait_not_busy();
+	rtc_wait_not_busy(rtc);
 
-	alm->time.tm_sec = rtc_read(OMAP_RTC_ALARM_SECONDS_REG);
-	alm->time.tm_min = rtc_read(OMAP_RTC_ALARM_MINUTES_REG);
-	alm->time.tm_hour = rtc_read(OMAP_RTC_ALARM_HOURS_REG);
-	alm->time.tm_mday = rtc_read(OMAP_RTC_ALARM_DAYS_REG);
-	alm->time.tm_mon = rtc_read(OMAP_RTC_ALARM_MONTHS_REG);
-	alm->time.tm_year = rtc_read(OMAP_RTC_ALARM_YEARS_REG);
+	alm->time.tm_sec = rtc_read(rtc, OMAP_RTC_ALARM_SECONDS_REG);
+	alm->time.tm_min = rtc_read(rtc, OMAP_RTC_ALARM_MINUTES_REG);
+	alm->time.tm_hour = rtc_read(rtc, OMAP_RTC_ALARM_HOURS_REG);
+	alm->time.tm_mday = rtc_read(rtc, OMAP_RTC_ALARM_DAYS_REG);
+	alm->time.tm_mon = rtc_read(rtc, OMAP_RTC_ALARM_MONTHS_REG);
+	alm->time.tm_year = rtc_read(rtc, OMAP_RTC_ALARM_YEARS_REG);
 
 	local_irq_enable();
 
 	bcd2tm(&alm->time);
-	alm->enabled = !!(rtc_read(OMAP_RTC_INTERRUPTS_REG)
-			& OMAP_RTC_INTERRUPTS_IT_ALARM);
+
+	interrupts = rtc_read(rtc, OMAP_RTC_INTERRUPTS_REG);
+	alm->enabled = !!(interrupts & OMAP_RTC_INTERRUPTS_IT_ALARM);
 
 	return 0;
 }
 
 static int omap_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
 {
+	struct omap_rtc *rtc = dev_get_drvdata(dev);
 	u8 reg, irqwake_reg = 0;
-	struct platform_device *pdev = to_platform_device(dev);
-	const struct platform_device_id *id_entry =
-					platform_get_device_id(pdev);
 
 	if (tm2bcd(&alm->time) < 0)
 		return -EINVAL;
 
 	local_irq_disable();
-	rtc_wait_not_busy();
+	rtc_wait_not_busy(rtc);
 
-	rtc_write(alm->time.tm_year, OMAP_RTC_ALARM_YEARS_REG);
-	rtc_write(alm->time.tm_mon, OMAP_RTC_ALARM_MONTHS_REG);
-	rtc_write(alm->time.tm_mday, OMAP_RTC_ALARM_DAYS_REG);
-	rtc_write(alm->time.tm_hour, OMAP_RTC_ALARM_HOURS_REG);
-	rtc_write(alm->time.tm_min, OMAP_RTC_ALARM_MINUTES_REG);
-	rtc_write(alm->time.tm_sec, OMAP_RTC_ALARM_SECONDS_REG);
+	rtc_write(rtc, OMAP_RTC_ALARM_YEARS_REG, alm->time.tm_year);
+	rtc_write(rtc, OMAP_RTC_ALARM_MONTHS_REG, alm->time.tm_mon);
+	rtc_write(rtc, OMAP_RTC_ALARM_DAYS_REG, alm->time.tm_mday);
+	rtc_write(rtc, OMAP_RTC_ALARM_HOURS_REG, alm->time.tm_hour);
+	rtc_write(rtc, OMAP_RTC_ALARM_MINUTES_REG, alm->time.tm_min);
+	rtc_write(rtc, OMAP_RTC_ALARM_SECONDS_REG, alm->time.tm_sec);
 
-	reg = rtc_read(OMAP_RTC_INTERRUPTS_REG);
-	if (id_entry->driver_data & OMAP_RTC_HAS_IRQWAKEEN)
-		irqwake_reg = rtc_read(OMAP_RTC_IRQWAKEEN);
+	reg = rtc_read(rtc, OMAP_RTC_INTERRUPTS_REG);
+	if (rtc->type->has_irqwakeen)
+		irqwake_reg = rtc_read(rtc, OMAP_RTC_IRQWAKEEN);
 
 	if (alm->enabled) {
 		reg |= OMAP_RTC_INTERRUPTS_IT_ALARM;
@@ -321,15 +359,79 @@ static int omap_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
 		reg &= ~OMAP_RTC_INTERRUPTS_IT_ALARM;
 		irqwake_reg &= ~OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN;
 	}
-	rtc_write(reg, OMAP_RTC_INTERRUPTS_REG);
-	if (id_entry->driver_data & OMAP_RTC_HAS_IRQWAKEEN)
-		rtc_write(irqwake_reg, OMAP_RTC_IRQWAKEEN);
+	rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, reg);
+	if (rtc->type->has_irqwakeen)
+		rtc_write(rtc, OMAP_RTC_IRQWAKEEN, irqwake_reg);
 
 	local_irq_enable();
 
 	return 0;
 }
 
+static struct omap_rtc *omap_rtc_power_off_rtc;
+
+/*
+ * omap_rtc_poweroff: RTC-controlled power off
+ *
+ * The RTC can be used to control an external PMIC via the pmic_power_en pin,
+ * which can be configured to transition to OFF on ALARM2 events.
+ *
+ * Notes:
+ * The two-second alarm offset is the shortest offset possible as the alarm
+ * registers must be set before the next timer update and the offset
+ * calculation is too heavy for everything to be done within a single access
+ * period (~15 us).
+ *
+ * Called with local interrupts disabled.
+ */
+static void omap_rtc_power_off(void)
+{
+	struct omap_rtc *rtc = omap_rtc_power_off_rtc;
+	struct rtc_time tm;
+	unsigned long now;
+	u32 val;
+
+	/* enable pmic_power_en control */
+	val = rtc_readl(rtc, OMAP_RTC_PMIC_REG);
+	rtc_writel(rtc, OMAP_RTC_PMIC_REG, val | OMAP_RTC_PMIC_POWER_EN_EN);
+
+	/* set alarm two seconds from now */
+	omap_rtc_read_time_raw(rtc, &tm);
+	bcd2tm(&tm);
+	rtc_tm_to_time(&tm, &now);
+	rtc_time_to_tm(now + 2, &tm);
+
+	if (tm2bcd(&tm) < 0) {
+		dev_err(&rtc->rtc->dev, "power off failed\n");
+		return;
+	}
+
+	rtc_wait_not_busy(rtc);
+
+	rtc_write(rtc, OMAP_RTC_ALARM2_SECONDS_REG, tm.tm_sec);
+	rtc_write(rtc, OMAP_RTC_ALARM2_MINUTES_REG, tm.tm_min);
+	rtc_write(rtc, OMAP_RTC_ALARM2_HOURS_REG, tm.tm_hour);
+	rtc_write(rtc, OMAP_RTC_ALARM2_DAYS_REG, tm.tm_mday);
+	rtc_write(rtc, OMAP_RTC_ALARM2_MONTHS_REG, tm.tm_mon);
+	rtc_write(rtc, OMAP_RTC_ALARM2_YEARS_REG, tm.tm_year);
+
+	/*
+	 * enable ALARM2 interrupt
+	 *
+	 * NOTE: this fails on AM3352 if rtc_write (writeb) is used
+	 */
+	val = rtc_read(rtc, OMAP_RTC_INTERRUPTS_REG);
+	rtc_writel(rtc, OMAP_RTC_INTERRUPTS_REG,
+			val | OMAP_RTC_INTERRUPTS_IT_ALARM2);
+
+	/*
+	 * Wait for alarm to trigger (within two seconds) and external PMIC to
+	 * power off the system. Add a 500 ms margin for external latencies
+	 * (e.g. debounce circuits).
+	 */
+	mdelay(2500);
+}
+
 static struct rtc_class_ops omap_rtc_ops = {
 	.read_time	= omap_rtc_read_time,
 	.set_time	= omap_rtc_set_time,
@@ -338,137 +440,140 @@ static struct rtc_class_ops omap_rtc_ops = {
 	.alarm_irq_enable = omap_rtc_alarm_irq_enable,
 };
 
-static int omap_rtc_alarm;
-static int omap_rtc_timer;
+static const struct omap_rtc_device_type omap_rtc_default_type = {
+	.has_power_up_reset = true,
+};
 
-#define	OMAP_RTC_DATA_AM3352_IDX	1
-#define	OMAP_RTC_DATA_DA830_IDX		2
+static const struct omap_rtc_device_type omap_rtc_am3352_type = {
+	.has_32kclk_en	= true,
+	.has_kicker	= true,
+	.has_irqwakeen	= true,
+	.has_pmic_mode	= true,
+};
 
-static struct platform_device_id omap_rtc_devtype[] = {
+static const struct omap_rtc_device_type omap_rtc_da830_type = {
+	.has_kicker	= true,
+};
+
+static const struct platform_device_id omap_rtc_id_table[] = {
 	{
-		.name	= DRIVER_NAME,
-	},
-	[OMAP_RTC_DATA_AM3352_IDX] = {
+		.name	= "omap_rtc",
+		.driver_data = (kernel_ulong_t)&omap_rtc_default_type,
+	}, {
 		.name	= "am3352-rtc",
-		.driver_data = OMAP_RTC_HAS_KICKER | OMAP_RTC_HAS_IRQWAKEEN |
-			       OMAP_RTC_HAS_32KCLK_EN,
-	},
-	[OMAP_RTC_DATA_DA830_IDX] = {
+		.driver_data = (kernel_ulong_t)&omap_rtc_am3352_type,
+	}, {
 		.name	= "da830-rtc",
-		.driver_data = OMAP_RTC_HAS_KICKER,
-	},
-	{},
+		.driver_data = (kernel_ulong_t)&omap_rtc_da830_type,
+	}, {
+		/* sentinel */
+	}
 };
-MODULE_DEVICE_TABLE(platform, omap_rtc_devtype);
+MODULE_DEVICE_TABLE(platform, omap_rtc_id_table);
 
 static const struct of_device_id omap_rtc_of_match[] = {
-	{	.compatible	= "ti,da830-rtc",
-		.data		= &omap_rtc_devtype[OMAP_RTC_DATA_DA830_IDX],
-	},
-	{	.compatible	= "ti,am3352-rtc",
-		.data		= &omap_rtc_devtype[OMAP_RTC_DATA_AM3352_IDX],
-	},
-	{},
+	{
+		.compatible	= "ti,am3352-rtc",
+		.data		= &omap_rtc_am3352_type,
+	}, {
+		.compatible	= "ti,da830-rtc",
+		.data		= &omap_rtc_da830_type,
+	}, {
+		/* sentinel */
+	}
 };
 MODULE_DEVICE_TABLE(of, omap_rtc_of_match);
 
 static int __init omap_rtc_probe(struct platform_device *pdev)
 {
-	struct resource		*res;
-	struct rtc_device	*rtc;
-	u8			reg, new_ctrl;
+	struct omap_rtc	*rtc;
+	struct resource	*res;
+	u8 reg, mask, new_ctrl;
 	const struct platform_device_id *id_entry;
 	const struct of_device_id *of_id;
+	int ret;
 
-	of_id = of_match_device(omap_rtc_of_match, &pdev->dev);
-	if (of_id)
-		pdev->id_entry = of_id->data;
+	rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
+	if (!rtc)
+		return -ENOMEM;
 
-	id_entry = platform_get_device_id(pdev);
-	if (!id_entry) {
-		dev_err(&pdev->dev, "no matching device entry\n");
-		return -ENODEV;
+	of_id = of_match_device(omap_rtc_of_match, &pdev->dev);
+	if (of_id) {
+		rtc->type = of_id->data;
+		rtc->is_pmic_controller = rtc->type->has_pmic_mode &&
+				of_property_read_bool(pdev->dev.of_node,
+						"system-power-controller");
+	} else {
+		id_entry = platform_get_device_id(pdev);
+		rtc->type = (void *)id_entry->driver_data;
 	}
 
-	omap_rtc_timer = platform_get_irq(pdev, 0);
-	if (omap_rtc_timer <= 0) {
-		pr_debug("%s: no update irq?\n", pdev->name);
+	rtc->irq_timer = platform_get_irq(pdev, 0);
+	if (rtc->irq_timer <= 0)
 		return -ENOENT;
-	}
 
-	omap_rtc_alarm = platform_get_irq(pdev, 1);
-	if (omap_rtc_alarm <= 0) {
-		pr_debug("%s: no alarm irq?\n", pdev->name);
+	rtc->irq_alarm = platform_get_irq(pdev, 1);
+	if (rtc->irq_alarm <= 0)
 		return -ENOENT;
-	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	rtc_base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(rtc_base))
-		return PTR_ERR(rtc_base);
+	rtc->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(rtc->base))
+		return PTR_ERR(rtc->base);
+
+	platform_set_drvdata(pdev, rtc);
 
 	/* Enable the clock/module so that we can access the registers */
 	pm_runtime_enable(&pdev->dev);
 	pm_runtime_get_sync(&pdev->dev);
 
-	if (id_entry->driver_data & OMAP_RTC_HAS_KICKER) {
-		rtc_writel(KICK0_VALUE, OMAP_RTC_KICK0_REG);
-		rtc_writel(KICK1_VALUE, OMAP_RTC_KICK1_REG);
-	}
-
-	rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-			&omap_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtc)) {
-		pr_debug("%s: can't register RTC device, err %ld\n",
-			pdev->name, PTR_ERR(rtc));
-		goto fail0;
+	if (rtc->type->has_kicker) {
+		rtc_writel(rtc, OMAP_RTC_KICK0_REG, KICK0_VALUE);
+		rtc_writel(rtc, OMAP_RTC_KICK1_REG, KICK1_VALUE);
 	}
-	platform_set_drvdata(pdev, rtc);
 
-	/* clear pending irqs, and set 1/second periodic,
-	 * which we'll use instead of update irqs
+	/*
+	 * disable interrupts
+	 *
+	 * NOTE: ALARM2 is not cleared on AM3352 if rtc_write (writeb) is used
 	 */
-	rtc_write(0, OMAP_RTC_INTERRUPTS_REG);
+	rtc_writel(rtc, OMAP_RTC_INTERRUPTS_REG, 0);
 
 	/* enable RTC functional clock */
-	if (id_entry->driver_data & OMAP_RTC_HAS_32KCLK_EN)
-		rtc_writel(OMAP_RTC_OSC_32KCLK_EN, OMAP_RTC_OSC_REG);
+	if (rtc->type->has_32kclk_en) {
+		reg = rtc_read(rtc, OMAP_RTC_OSC_REG);
+		rtc_writel(rtc, OMAP_RTC_OSC_REG,
+				reg | OMAP_RTC_OSC_32KCLK_EN);
+	}
 
 	/* clear old status */
-	reg = rtc_read(OMAP_RTC_STATUS_REG);
-	if (reg & (u8) OMAP_RTC_STATUS_POWER_UP) {
-		pr_info("%s: RTC power up reset detected\n",
-			pdev->name);
-		rtc_write(OMAP_RTC_STATUS_POWER_UP, OMAP_RTC_STATUS_REG);
-	}
-	if (reg & (u8) OMAP_RTC_STATUS_ALARM)
-		rtc_write(OMAP_RTC_STATUS_ALARM, OMAP_RTC_STATUS_REG);
+	reg = rtc_read(rtc, OMAP_RTC_STATUS_REG);
 
-	/* handle periodic and alarm irqs */
-	if (devm_request_irq(&pdev->dev, omap_rtc_timer, rtc_irq, 0,
-			dev_name(&rtc->dev), rtc)) {
-		pr_debug("%s: RTC timer interrupt IRQ%d already claimed\n",
-			pdev->name, omap_rtc_timer);
-		goto fail0;
-	}
-	if ((omap_rtc_timer != omap_rtc_alarm) &&
-		(devm_request_irq(&pdev->dev, omap_rtc_alarm, rtc_irq, 0,
-			dev_name(&rtc->dev), rtc))) {
-		pr_debug("%s: RTC alarm interrupt IRQ%d already claimed\n",
-			pdev->name, omap_rtc_alarm);
-		goto fail0;
+	mask = OMAP_RTC_STATUS_ALARM;
+
+	if (rtc->type->has_pmic_mode)
+		mask |= OMAP_RTC_STATUS_ALARM2;
+
+	if (rtc->type->has_power_up_reset) {
+		mask |= OMAP_RTC_STATUS_POWER_UP;
+		if (reg & OMAP_RTC_STATUS_POWER_UP)
+			dev_info(&pdev->dev, "RTC power up reset detected\n");
 	}
 
+	if (reg & mask)
+		rtc_write(rtc, OMAP_RTC_STATUS_REG, reg & mask);
+
 	/* On boards with split power, RTC_ON_NOFF won't reset the RTC */
-	reg = rtc_read(OMAP_RTC_CTRL_REG);
-	if (reg & (u8) OMAP_RTC_CTRL_STOP)
-		pr_info("%s: already running\n", pdev->name);
+	reg = rtc_read(rtc, OMAP_RTC_CTRL_REG);
+	if (reg & OMAP_RTC_CTRL_STOP)
+		dev_info(&pdev->dev, "already running\n");
 
 	/* force to 24 hour mode */
-	new_ctrl = reg & (OMAP_RTC_CTRL_SPLIT|OMAP_RTC_CTRL_AUTO_COMP);
+	new_ctrl = reg & (OMAP_RTC_CTRL_SPLIT | OMAP_RTC_CTRL_AUTO_COMP);
 	new_ctrl |= OMAP_RTC_CTRL_STOP;
 
-	/* BOARD-SPECIFIC CUSTOMIZATION CAN GO HERE:
+	/*
+	 * BOARD-SPECIFIC CUSTOMIZATION CAN GO HERE:
 	 *
 	 *  - Device wake-up capability setting should come through chip
 	 *    init logic. OMAP1 boards should initialize the "wakeup capable"
@@ -482,36 +587,70 @@ static int __init omap_rtc_probe(struct platform_device *pdev)
 	 *    is write-only, and always reads as zero...)
 	 */
 
+	if (new_ctrl & OMAP_RTC_CTRL_SPLIT)
+		dev_info(&pdev->dev, "split power mode\n");
+
+	if (reg != new_ctrl)
+		rtc_write(rtc, OMAP_RTC_CTRL_REG, new_ctrl);
+
 	device_init_wakeup(&pdev->dev, true);
 
-	if (new_ctrl & (u8) OMAP_RTC_CTRL_SPLIT)
-		pr_info("%s: split power mode\n", pdev->name);
+	rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
+			&omap_rtc_ops, THIS_MODULE);
+	if (IS_ERR(rtc->rtc)) {
+		ret = PTR_ERR(rtc->rtc);
+		goto err;
+	}
 
-	if (reg != new_ctrl)
-		rtc_write(new_ctrl, OMAP_RTC_CTRL_REG);
+	/* handle periodic and alarm irqs */
+	ret = devm_request_irq(&pdev->dev, rtc->irq_timer, rtc_irq, 0,
+			dev_name(&rtc->rtc->dev), rtc);
+	if (ret)
+		goto err;
+
+	if (rtc->irq_timer != rtc->irq_alarm) {
+		ret = devm_request_irq(&pdev->dev, rtc->irq_alarm, rtc_irq, 0,
+				dev_name(&rtc->rtc->dev), rtc);
+		if (ret)
+			goto err;
+	}
+
+	if (rtc->is_pmic_controller) {
+		if (!pm_power_off) {
+			omap_rtc_power_off_rtc = rtc;
+			pm_power_off = omap_rtc_power_off;
+		}
+	}
 
 	return 0;
 
-fail0:
-	if (id_entry->driver_data & OMAP_RTC_HAS_KICKER)
-		rtc_writel(0, OMAP_RTC_KICK0_REG);
+err:
+	device_init_wakeup(&pdev->dev, false);
+	if (rtc->type->has_kicker)
+		rtc_writel(rtc, OMAP_RTC_KICK0_REG, 0);
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
-	return -EIO;
+
+	return ret;
 }
 
 static int __exit omap_rtc_remove(struct platform_device *pdev)
 {
-	const struct platform_device_id *id_entry =
-				platform_get_device_id(pdev);
+	struct omap_rtc *rtc = platform_get_drvdata(pdev);
+
+	if (pm_power_off == omap_rtc_power_off &&
+			omap_rtc_power_off_rtc == rtc) {
+		pm_power_off = NULL;
+		omap_rtc_power_off_rtc = NULL;
+	}
 
 	device_init_wakeup(&pdev->dev, 0);
 
 	/* leave rtc running, but disable irqs */
-	rtc_write(0, OMAP_RTC_INTERRUPTS_REG);
+	rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, 0);
 
-	if (id_entry->driver_data & OMAP_RTC_HAS_KICKER)
-		rtc_writel(0, OMAP_RTC_KICK0_REG);
+	if (rtc->type->has_kicker)
+		rtc_writel(rtc, OMAP_RTC_KICK0_REG, 0);
 
 	/* Disable the clock/module */
 	pm_runtime_put_sync(&pdev->dev);
@@ -521,20 +660,21 @@ static int __exit omap_rtc_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM_SLEEP
-static u8 irqstat;
-
 static int omap_rtc_suspend(struct device *dev)
 {
-	irqstat = rtc_read(OMAP_RTC_INTERRUPTS_REG);
+	struct omap_rtc *rtc = dev_get_drvdata(dev);
 
-	/* FIXME the RTC alarm is not currently acting as a wakeup event
+	rtc->interrupts_reg = rtc_read(rtc, OMAP_RTC_INTERRUPTS_REG);
+
+	/*
+	 * FIXME: the RTC alarm is not currently acting as a wakeup event
 	 * source on some platforms, and in fact this enable() call is just
 	 * saving a flag that's never used...
 	 */
 	if (device_may_wakeup(dev))
-		enable_irq_wake(omap_rtc_alarm);
+		enable_irq_wake(rtc->irq_alarm);
 	else
-		rtc_write(0, OMAP_RTC_INTERRUPTS_REG);
+		rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, 0);
 
 	/* Disable the clock/module */
 	pm_runtime_put_sync(dev);
@@ -544,13 +684,15 @@ static int omap_rtc_suspend(struct device *dev)
 
 static int omap_rtc_resume(struct device *dev)
 {
+	struct omap_rtc *rtc = dev_get_drvdata(dev);
+
 	/* Enable the clock/module so that we can access the registers */
 	pm_runtime_get_sync(dev);
 
 	if (device_may_wakeup(dev))
-		disable_irq_wake(omap_rtc_alarm);
+		disable_irq_wake(rtc->irq_alarm);
 	else
-		rtc_write(irqstat, OMAP_RTC_INTERRUPTS_REG);
+		rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, rtc->interrupts_reg);
 
 	return 0;
 }
@@ -560,23 +702,32 @@ static SIMPLE_DEV_PM_OPS(omap_rtc_pm_ops, omap_rtc_suspend, omap_rtc_resume);
 
 static void omap_rtc_shutdown(struct platform_device *pdev)
 {
-	rtc_write(0, OMAP_RTC_INTERRUPTS_REG);
+	struct omap_rtc *rtc = platform_get_drvdata(pdev);
+	u8 mask;
+
+	/*
+	 * Keep the ALARM interrupt enabled to allow the system to power up on
+	 * alarm events.
+	 */
+	mask = rtc_read(rtc, OMAP_RTC_INTERRUPTS_REG);
+	mask &= OMAP_RTC_INTERRUPTS_IT_ALARM;
+	rtc_write(rtc, OMAP_RTC_INTERRUPTS_REG, mask);
 }
 
-MODULE_ALIAS("platform:omap_rtc");
 static struct platform_driver omap_rtc_driver = {
 	.remove		= __exit_p(omap_rtc_remove),
 	.shutdown	= omap_rtc_shutdown,
 	.driver		= {
-		.name	= DRIVER_NAME,
+		.name	= "omap_rtc",
 		.owner	= THIS_MODULE,
 		.pm	= &omap_rtc_pm_ops,
 		.of_match_table = omap_rtc_of_match,
 	},
-	.id_table	= omap_rtc_devtype,
+	.id_table	= omap_rtc_id_table,
 };
 
 module_platform_driver_probe(omap_rtc_driver, omap_rtc_probe);
 
+MODULE_ALIAS("platform:omap_rtc");
 MODULE_AUTHOR("George G. Davis (and others)");
 MODULE_LICENSE("GPL");

+ 46 - 9
drivers/rtc/rtc-pcf8563.c

@@ -28,6 +28,7 @@
 #define PCF8563_REG_ST2		0x01
 #define PCF8563_BIT_AIE		(1 << 1)
 #define PCF8563_BIT_AF		(1 << 3)
+#define PCF8563_BITS_ST2_N	(7 << 5)
 
 #define PCF8563_REG_SC		0x02 /* datetime */
 #define PCF8563_REG_MN		0x03
@@ -41,6 +42,13 @@
 
 #define PCF8563_REG_CLKO	0x0D /* clock out */
 #define PCF8563_REG_TMRC	0x0E /* timer control */
+#define PCF8563_TMRC_ENABLE	BIT(7)
+#define PCF8563_TMRC_4096	0
+#define PCF8563_TMRC_64		1
+#define PCF8563_TMRC_1		2
+#define PCF8563_TMRC_1_60	3
+#define PCF8563_TMRC_MASK	3
+
 #define PCF8563_REG_TMR		0x0F /* timer */
 
 #define PCF8563_SC_LV		0x80 /* low voltage */
@@ -118,22 +126,21 @@ static int pcf8563_write_block_data(struct i2c_client *client,
 
 static int pcf8563_set_alarm_mode(struct i2c_client *client, bool on)
 {
-	unsigned char buf[2];
+	unsigned char buf;
 	int err;
 
-	err = pcf8563_read_block_data(client, PCF8563_REG_ST2, 1, buf + 1);
+	err = pcf8563_read_block_data(client, PCF8563_REG_ST2, 1, &buf);
 	if (err < 0)
 		return err;
 
 	if (on)
-		buf[1] |= PCF8563_BIT_AIE;
+		buf |= PCF8563_BIT_AIE;
 	else
-		buf[1] &= ~PCF8563_BIT_AIE;
+		buf &= ~PCF8563_BIT_AIE;
 
-	buf[1] &= ~PCF8563_BIT_AF;
-	buf[0] = PCF8563_REG_ST2;
+	buf &= ~(PCF8563_BIT_AF | PCF8563_BITS_ST2_N);
 
-	err = pcf8563_write_block_data(client, PCF8563_REG_ST2, 1, buf + 1);
+	err = pcf8563_write_block_data(client, PCF8563_REG_ST2, 1, &buf);
 	if (err < 0) {
 		dev_err(&client->dev, "%s: write error\n", __func__);
 		return -EIO;
@@ -336,8 +343,8 @@ static int pcf8563_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *tm)
 		__func__, buf[0], buf[1], buf[2], buf[3]);
 
 	tm->time.tm_min = bcd2bin(buf[0] & 0x7F);
-	tm->time.tm_hour = bcd2bin(buf[1] & 0x7F);
-	tm->time.tm_mday = bcd2bin(buf[2] & 0x1F);
+	tm->time.tm_hour = bcd2bin(buf[1] & 0x3F);
+	tm->time.tm_mday = bcd2bin(buf[2] & 0x3F);
 	tm->time.tm_wday = bcd2bin(buf[3] & 0x7);
 	tm->time.tm_mon = -1;
 	tm->time.tm_year = -1;
@@ -361,6 +368,14 @@ static int pcf8563_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *tm)
 	struct i2c_client *client = to_i2c_client(dev);
 	unsigned char buf[4];
 	int err;
+	unsigned long alarm_time;
+
+	/* The alarm has no seconds, round up to nearest minute */
+	if (tm->time.tm_sec) {
+		rtc_tm_to_time(&tm->time, &alarm_time);
+		alarm_time += 60-tm->time.tm_sec;
+		rtc_time_to_tm(alarm_time, &tm->time);
+	}
 
 	dev_dbg(dev, "%s, min=%d hour=%d wday=%d mday=%d "
 		"enabled=%d pending=%d\n", __func__,
@@ -381,6 +396,7 @@ static int pcf8563_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *tm)
 
 static int pcf8563_irq_enable(struct device *dev, unsigned int enabled)
 {
+	dev_dbg(dev, "%s: en=%d\n", __func__, enabled);
 	return pcf8563_set_alarm_mode(to_i2c_client(dev), !!enabled);
 }
 
@@ -398,6 +414,8 @@ static int pcf8563_probe(struct i2c_client *client,
 {
 	struct pcf8563 *pcf8563;
 	int err;
+	unsigned char buf;
+	unsigned char alm_pending;
 
 	dev_dbg(&client->dev, "%s\n", __func__);
 
@@ -415,6 +433,22 @@ static int pcf8563_probe(struct i2c_client *client,
 	pcf8563->client = client;
 	device_set_wakeup_capable(&client->dev, 1);
 
+	/* Set timer to lowest frequency to save power (ref Haoyu datasheet) */
+	buf = PCF8563_TMRC_1_60;
+	err = pcf8563_write_block_data(client, PCF8563_REG_TMRC, 1, &buf);
+	if (err < 0) {
+		dev_err(&client->dev, "%s: write error\n", __func__);
+		return err;
+	}
+
+	err = pcf8563_get_alarm_mode(client, NULL, &alm_pending);
+	if (err < 0) {
+		dev_err(&client->dev, "%s: read error\n", __func__);
+		return err;
+	}
+	if (alm_pending)
+		pcf8563_set_alarm_mode(client, 0);
+
 	pcf8563->rtc = devm_rtc_device_register(&client->dev,
 				pcf8563_driver.driver.name,
 				&pcf8563_rtc_ops, THIS_MODULE);
@@ -435,6 +469,9 @@ static int pcf8563_probe(struct i2c_client *client,
 
 	}
 
+	/* the pcf8563 alarm only supports a minute accuracy */
+	pcf8563->rtc->uie_unsupported = 1;
+
 	return 0;
 }
 

+ 51 - 15
drivers/rtc/rtc-sirfsoc.c

@@ -47,6 +47,7 @@ struct sirfsoc_rtc_drv {
 	unsigned		irq_wake;
 	/* Overflow for every 8 years extra time */
 	u32			overflow_rtc;
+	spinlock_t		lock;
 #ifdef CONFIG_PM
 	u32		saved_counter;
 	u32		saved_overflow_rtc;
@@ -61,7 +62,7 @@ static int sirfsoc_rtc_read_alarm(struct device *dev,
 
 	rtcdrv = dev_get_drvdata(dev);
 
-	local_irq_disable();
+	spin_lock_irq(&rtcdrv->lock);
 
 	rtc_count = sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_CN);
 
@@ -84,7 +85,8 @@ static int sirfsoc_rtc_read_alarm(struct device *dev,
 	if (sirfsoc_rtc_iobrg_readl(
 			rtcdrv->rtc_base + RTC_STATUS) & SIRFSOC_RTC_AL0E)
 		alrm->enabled = 1;
-	local_irq_enable();
+
+	spin_unlock_irq(&rtcdrv->lock);
 
 	return 0;
 }
@@ -99,7 +101,7 @@ static int sirfsoc_rtc_set_alarm(struct device *dev,
 	if (alrm->enabled) {
 		rtc_tm_to_time(&(alrm->time), &rtc_alarm);
 
-		local_irq_disable();
+		spin_lock_irq(&rtcdrv->lock);
 
 		rtc_status_reg = sirfsoc_rtc_iobrg_readl(
 				rtcdrv->rtc_base + RTC_STATUS);
@@ -123,14 +125,15 @@ static int sirfsoc_rtc_set_alarm(struct device *dev,
 		rtc_status_reg |= SIRFSOC_RTC_AL0E;
 		sirfsoc_rtc_iobrg_writel(
 			rtc_status_reg, rtcdrv->rtc_base + RTC_STATUS);
-		local_irq_enable();
+
+		spin_unlock_irq(&rtcdrv->lock);
 	} else {
 		/*
 		 * if this function was called with enabled=0
 		 * then it could mean that the application is
 		 * trying to cancel an ongoing alarm
 		 */
-		local_irq_disable();
+		spin_lock_irq(&rtcdrv->lock);
 
 		rtc_status_reg = sirfsoc_rtc_iobrg_readl(
 				rtcdrv->rtc_base + RTC_STATUS);
@@ -146,7 +149,7 @@ static int sirfsoc_rtc_set_alarm(struct device *dev,
 					rtcdrv->rtc_base + RTC_STATUS);
 		}
 
-		local_irq_enable();
+		spin_unlock_irq(&rtcdrv->lock);
 	}
 
 	return 0;
@@ -209,12 +212,38 @@ static int sirfsoc_rtc_ioctl(struct device *dev, unsigned int cmd,
 	}
 }
 
+static int sirfsoc_rtc_alarm_irq_enable(struct device *dev,
+		unsigned int enabled)
+{
+	unsigned long rtc_status_reg = 0x0;
+	struct sirfsoc_rtc_drv *rtcdrv;
+
+	rtcdrv = dev_get_drvdata(dev);
+
+	spin_lock_irq(&rtcdrv->lock);
+
+	rtc_status_reg = sirfsoc_rtc_iobrg_readl(
+				rtcdrv->rtc_base + RTC_STATUS);
+	if (enabled)
+		rtc_status_reg |= SIRFSOC_RTC_AL0E;
+	else
+		rtc_status_reg &= ~SIRFSOC_RTC_AL0E;
+
+	sirfsoc_rtc_iobrg_writel(rtc_status_reg, rtcdrv->rtc_base + RTC_STATUS);
+
+	spin_unlock_irq(&rtcdrv->lock);
+
+	return 0;
+
+}
+
 static const struct rtc_class_ops sirfsoc_rtc_ops = {
 	.read_time = sirfsoc_rtc_read_time,
 	.set_time = sirfsoc_rtc_set_time,
 	.read_alarm = sirfsoc_rtc_read_alarm,
 	.set_alarm = sirfsoc_rtc_set_alarm,
-	.ioctl = sirfsoc_rtc_ioctl
+	.ioctl = sirfsoc_rtc_ioctl,
+	.alarm_irq_enable = sirfsoc_rtc_alarm_irq_enable
 };
 
 static irqreturn_t sirfsoc_rtc_irq_handler(int irq, void *pdata)
@@ -223,6 +252,8 @@ static irqreturn_t sirfsoc_rtc_irq_handler(int irq, void *pdata)
 	unsigned long rtc_status_reg = 0x0;
 	unsigned long events = 0x0;
 
+	spin_lock(&rtcdrv->lock);
+
 	rtc_status_reg = sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_STATUS);
 	/* this bit will be set ONLY if an alarm was active
 	 * and it expired NOW
@@ -240,6 +271,9 @@ static irqreturn_t sirfsoc_rtc_irq_handler(int irq, void *pdata)
 		rtc_status_reg &= ~(SIRFSOC_RTC_AL0E);
 	}
 	sirfsoc_rtc_iobrg_writel(rtc_status_reg, rtcdrv->rtc_base + RTC_STATUS);
+
+	spin_unlock(&rtcdrv->lock);
+
 	/* this should wake up any apps polling/waiting on the read
 	 * after setting the alarm
 	 */
@@ -267,6 +301,8 @@ static int sirfsoc_rtc_probe(struct platform_device *pdev)
 	if (rtcdrv == NULL)
 		return -ENOMEM;
 
+	spin_lock_init(&rtcdrv->lock);
+
 	err = of_property_read_u32(np, "reg", &rtcdrv->rtc_base);
 	if (err) {
 		dev_err(&pdev->dev, "unable to find base address of rtc node in dtb\n");
@@ -286,14 +322,6 @@ static int sirfsoc_rtc_probe(struct platform_device *pdev)
 	rtc_div = ((32768 / RTC_HZ) / 2) - 1;
 	sirfsoc_rtc_iobrg_writel(rtc_div, rtcdrv->rtc_base + RTC_DIV);
 
-	rtcdrv->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
-			&sirfsoc_rtc_ops, THIS_MODULE);
-	if (IS_ERR(rtcdrv->rtc)) {
-		err = PTR_ERR(rtcdrv->rtc);
-		dev_err(&pdev->dev, "can't register RTC device\n");
-		return err;
-	}
-
 	/* 0x3 -> RTC_CLK */
 	sirfsoc_rtc_iobrg_writel(SIRFSOC_RTC_CLK,
 			rtcdrv->rtc_base + RTC_CLOCK_SWITCH);
@@ -308,6 +336,14 @@ static int sirfsoc_rtc_probe(struct platform_device *pdev)
 	rtcdrv->overflow_rtc =
 		sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_SW_VALUE);
 
+	rtcdrv->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
+			&sirfsoc_rtc_ops, THIS_MODULE);
+	if (IS_ERR(rtcdrv->rtc)) {
+		err = PTR_ERR(rtcdrv->rtc);
+		dev_err(&pdev->dev, "can't register RTC device\n");
+		return err;
+	}
+
 	rtcdrv->irq = platform_get_irq(pdev, 0);
 	err = devm_request_irq(
 			&pdev->dev,

+ 36 - 3
drivers/rtc/rtc-snvs.c

@@ -17,6 +17,7 @@
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/rtc.h>
+#include <linux/clk.h>
 
 /* These register offsets are relative to LP (Low Power) range */
 #define SNVS_LPCR		0x04
@@ -39,6 +40,7 @@ struct snvs_rtc_data {
 	void __iomem *ioaddr;
 	int irq;
 	spinlock_t lock;
+	struct clk *clk;
 };
 
 static u32 rtc_read_lp_counter(void __iomem *ioaddr)
@@ -260,6 +262,18 @@ static int snvs_rtc_probe(struct platform_device *pdev)
 	if (data->irq < 0)
 		return data->irq;
 
+	data->clk = devm_clk_get(&pdev->dev, "snvs-rtc");
+	if (IS_ERR(data->clk)) {
+		data->clk = NULL;
+	} else {
+		ret = clk_prepare_enable(data->clk);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"Could not prepare or enable the snvs clock\n");
+			return ret;
+		}
+	}
+
 	platform_set_drvdata(pdev, data);
 
 	spin_lock_init(&data->lock);
@@ -280,7 +294,7 @@ static int snvs_rtc_probe(struct platform_device *pdev)
 	if (ret) {
 		dev_err(&pdev->dev, "failed to request irq %d: %d\n",
 			data->irq, ret);
-		return ret;
+		goto error_rtc_device_register;
 	}
 
 	data->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
@@ -288,10 +302,16 @@ static int snvs_rtc_probe(struct platform_device *pdev)
 	if (IS_ERR(data->rtc)) {
 		ret = PTR_ERR(data->rtc);
 		dev_err(&pdev->dev, "failed to register rtc: %d\n", ret);
-		return ret;
+		goto error_rtc_device_register;
 	}
 
 	return 0;
+
+error_rtc_device_register:
+	if (data->clk)
+		clk_disable_unprepare(data->clk);
+
+	return ret;
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -302,21 +322,34 @@ static int snvs_rtc_suspend(struct device *dev)
 	if (device_may_wakeup(dev))
 		enable_irq_wake(data->irq);
 
+	if (data->clk)
+		clk_disable_unprepare(data->clk);
+
 	return 0;
 }
 
 static int snvs_rtc_resume(struct device *dev)
 {
 	struct snvs_rtc_data *data = dev_get_drvdata(dev);
+	int ret;
 
 	if (device_may_wakeup(dev))
 		disable_irq_wake(data->irq);
 
+	if (data->clk) {
+		ret = clk_prepare_enable(data->clk);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 #endif
 
-static SIMPLE_DEV_PM_OPS(snvs_rtc_pm_ops, snvs_rtc_suspend, snvs_rtc_resume);
+static const struct dev_pm_ops snvs_rtc_pm_ops = {
+	.suspend_noirq = snvs_rtc_suspend,
+	.resume_noirq = snvs_rtc_resume,
+};
 
 static const struct of_device_id snvs_dt_ids[] = {
 	{ .compatible = "fsl,sec-v4.0-mon-rtc-lp", },

+ 1 - 1
drivers/usb/storage/debug.c

@@ -188,7 +188,7 @@ int usb_stor_dbg(const struct us_data *us, const char *fmt, ...)
 
 	va_start(args, fmt);
 
-	r = dev_vprintk_emit(7, &us->pusb_dev->dev, fmt, args);
+	r = dev_vprintk_emit(LOGLEVEL_DEBUG, &us->pusb_dev->dev, fmt, args);
 
 	va_end(args);
 

+ 22 - 18
fs/binfmt_elf.c

@@ -1994,18 +1994,6 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
 	shdr4extnum->sh_info = segs;
 }
 
-static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
-				     unsigned long mm_flags)
-{
-	struct vm_area_struct *vma;
-	size_t size = 0;
-
-	for (vma = first_vma(current, gate_vma); vma != NULL;
-	     vma = next_vma(vma, gate_vma))
-		size += vma_dump_size(vma, mm_flags);
-	return size;
-}
-
 /*
  * Actual dumper
  *
@@ -2017,7 +2005,8 @@ static int elf_core_dump(struct coredump_params *cprm)
 {
 	int has_dumped = 0;
 	mm_segment_t fs;
-	int segs;
+	int segs, i;
+	size_t vma_data_size = 0;
 	struct vm_area_struct *vma, *gate_vma;
 	struct elfhdr *elf = NULL;
 	loff_t offset = 0, dataoff;
@@ -2026,6 +2015,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	struct elf_shdr *shdr4extnum = NULL;
 	Elf_Half e_phnum;
 	elf_addr_t e_shoff;
+	elf_addr_t *vma_filesz = NULL;
 
 	/*
 	 * We no longer stop all VM operations.
@@ -2093,7 +2083,20 @@ static int elf_core_dump(struct coredump_params *cprm)
 
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
-	offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
+	vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
+	if (!vma_filesz)
+		goto end_coredump;
+
+	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
+			vma = next_vma(vma, gate_vma)) {
+		unsigned long dump_size;
+
+		dump_size = vma_dump_size(vma, cprm->mm_flags);
+		vma_filesz[i++] = dump_size;
+		vma_data_size += dump_size;
+	}
+
+	offset += vma_data_size;
 	offset += elf_core_extra_data_size();
 	e_shoff = offset;
 
@@ -2113,7 +2116,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 		goto end_coredump;
 
 	/* Write program headers for segments dump */
-	for (vma = first_vma(current, gate_vma); vma != NULL;
+	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma)) {
 		struct elf_phdr phdr;
 
@@ -2121,7 +2124,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 		phdr.p_offset = offset;
 		phdr.p_vaddr = vma->vm_start;
 		phdr.p_paddr = 0;
-		phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
+		phdr.p_filesz = vma_filesz[i++];
 		phdr.p_memsz = vma->vm_end - vma->vm_start;
 		offset += phdr.p_filesz;
 		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
@@ -2149,12 +2152,12 @@ static int elf_core_dump(struct coredump_params *cprm)
 	if (!dump_skip(cprm, dataoff - cprm->written))
 		goto end_coredump;
 
-	for (vma = first_vma(current, gate_vma); vma != NULL;
+	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma)) {
 		unsigned long addr;
 		unsigned long end;
 
-		end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
+		end = vma->vm_start + vma_filesz[i++];
 
 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
 			struct page *page;
@@ -2187,6 +2190,7 @@ end_coredump:
 cleanup:
 	free_note_info(&info);
 	kfree(shdr4extnum);
+	kfree(vma_filesz);
 	kfree(phdr4note);
 	kfree(elf);
 out:

+ 248 - 145
fs/binfmt_misc.c

@@ -1,21 +1,14 @@
 /*
- *  binfmt_misc.c
+ * binfmt_misc.c
  *
- *  Copyright (C) 1997 Richard Günther
+ * Copyright (C) 1997 Richard Günther
  *
- *  binfmt_misc detects binaries via a magic or filename extension and invokes
- *  a specified wrapper. This should obsolete binfmt_java, binfmt_em86 and
- *  binfmt_mz.
- *
- *  1997-04-25 first version
- *  [...]
- *  1997-05-19 cleanup
- *  1997-06-26 hpa: pass the real filename rather than argv[0]
- *  1997-06-30 minor cleanup
- *  1997-08-09 removed extension stripping, locking cleanup
- *  2001-02-28 AV: rewritten into something that resembles C. Original didn't.
+ * binfmt_misc detects binaries via a magic or filename extension and invokes
+ * a specified wrapper. See Documentation/binfmt_misc.txt for more details.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sched.h>
@@ -30,8 +23,13 @@
 #include <linux/mount.h>
 #include <linux/syscalls.h>
 #include <linux/fs.h>
+#include <linux/uaccess.h>
 
-#include <asm/uaccess.h>
+#ifdef DEBUG
+# define USE_DEBUG 1
+#else
+# define USE_DEBUG 0
+#endif
 
 enum {
 	VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */
@@ -41,9 +39,9 @@ static LIST_HEAD(entries);
 static int enabled = 1;
 
 enum {Enabled, Magic};
-#define MISC_FMT_PRESERVE_ARGV0 (1<<31)
-#define MISC_FMT_OPEN_BINARY (1<<30)
-#define MISC_FMT_CREDENTIALS (1<<29)
+#define MISC_FMT_PRESERVE_ARGV0 (1 << 31)
+#define MISC_FMT_OPEN_BINARY (1 << 30)
+#define MISC_FMT_CREDENTIALS (1 << 29)
 
 typedef struct {
 	struct list_head list;
@@ -87,20 +85,24 @@ static Node *check_file(struct linux_binprm *bprm)
 	char *p = strrchr(bprm->interp, '.');
 	struct list_head *l;
 
+	/* Walk all the registered handlers. */
 	list_for_each(l, &entries) {
 		Node *e = list_entry(l, Node, list);
 		char *s;
 		int j;
 
+		/* Make sure this one is currently enabled. */
 		if (!test_bit(Enabled, &e->flags))
 			continue;
 
+		/* Do matching based on extension if applicable. */
 		if (!test_bit(Magic, &e->flags)) {
 			if (p && !strcmp(e->magic, p + 1))
 				return e;
 			continue;
 		}
 
+		/* Do matching based on magic & mask. */
 		s = bprm->buf + e->offset;
 		if (e->mask) {
 			for (j = 0; j < e->size; j++)
@@ -123,7 +125,7 @@ static Node *check_file(struct linux_binprm *bprm)
 static int load_misc_binary(struct linux_binprm *bprm)
 {
 	Node *fmt;
-	struct file * interp_file = NULL;
+	struct file *interp_file = NULL;
 	char iname[BINPRM_BUF_SIZE];
 	const char *iname_addr = iname;
 	int retval;
@@ -131,7 +133,7 @@ static int load_misc_binary(struct linux_binprm *bprm)
 
 	retval = -ENOEXEC;
 	if (!enabled)
-		goto _ret;
+		goto ret;
 
 	/* to keep locking time low, we copy the interpreter string */
 	read_lock(&entries_lock);
@@ -140,25 +142,26 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE);
 	read_unlock(&entries_lock);
 	if (!fmt)
-		goto _ret;
+		goto ret;
 
 	if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
 		retval = remove_arg_zero(bprm);
 		if (retval)
-			goto _ret;
+			goto ret;
 	}
 
 	if (fmt->flags & MISC_FMT_OPEN_BINARY) {
 
 		/* if the binary should be opened on behalf of the
 		 * interpreter than keep it open and assign descriptor
-		 * to it */
- 		fd_binary = get_unused_fd();
- 		if (fd_binary < 0) {
- 			retval = fd_binary;
- 			goto _ret;
- 		}
- 		fd_install(fd_binary, bprm->file);
+		 * to it
+		 */
+		fd_binary = get_unused_fd_flags(0);
+		if (fd_binary < 0) {
+			retval = fd_binary;
+			goto ret;
+		}
+		fd_install(fd_binary, bprm->file);
 
 		/* if the binary is not readable than enforce mm->dumpable=0
 		   regardless of the interpreter's permissions */
@@ -171,32 +174,32 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		bprm->interp_flags |= BINPRM_FLAGS_EXECFD;
 		bprm->interp_data = fd_binary;
 
- 	} else {
- 		allow_write_access(bprm->file);
- 		fput(bprm->file);
- 		bprm->file = NULL;
- 	}
+	} else {
+		allow_write_access(bprm->file);
+		fput(bprm->file);
+		bprm->file = NULL;
+	}
 	/* make argv[1] be the path to the binary */
-	retval = copy_strings_kernel (1, &bprm->interp, bprm);
+	retval = copy_strings_kernel(1, &bprm->interp, bprm);
 	if (retval < 0)
-		goto _error;
+		goto error;
 	bprm->argc++;
 
 	/* add the interp as argv[0] */
-	retval = copy_strings_kernel (1, &iname_addr, bprm);
+	retval = copy_strings_kernel(1, &iname_addr, bprm);
 	if (retval < 0)
-		goto _error;
-	bprm->argc ++;
+		goto error;
+	bprm->argc++;
 
 	/* Update interp in case binfmt_script needs it. */
 	retval = bprm_change_interp(iname, bprm);
 	if (retval < 0)
-		goto _error;
+		goto error;
 
-	interp_file = open_exec (iname);
-	retval = PTR_ERR (interp_file);
-	if (IS_ERR (interp_file))
-		goto _error;
+	interp_file = open_exec(iname);
+	retval = PTR_ERR(interp_file);
+	if (IS_ERR(interp_file))
+		goto error;
 
 	bprm->file = interp_file;
 	if (fmt->flags & MISC_FMT_CREDENTIALS) {
@@ -207,23 +210,23 @@ static int load_misc_binary(struct linux_binprm *bprm)
 		memset(bprm->buf, 0, BINPRM_BUF_SIZE);
 		retval = kernel_read(bprm->file, 0, bprm->buf, BINPRM_BUF_SIZE);
 	} else
-		retval = prepare_binprm (bprm);
+		retval = prepare_binprm(bprm);
 
 	if (retval < 0)
-		goto _error;
+		goto error;
 
 	retval = search_binary_handler(bprm);
 	if (retval < 0)
-		goto _error;
+		goto error;
 
-_ret:
+ret:
 	return retval;
-_error:
+error:
 	if (fd_binary > 0)
 		sys_close(fd_binary);
 	bprm->interp_flags = 0;
 	bprm->interp_data = 0;
-	goto _ret;
+	goto ret;
 }
 
 /* Command parsers */
@@ -250,36 +253,40 @@ static char *scanarg(char *s, char del)
 	return s;
 }
 
-static char * check_special_flags (char * sfs, Node * e)
+static char *check_special_flags(char *sfs, Node *e)
 {
-	char * p = sfs;
+	char *p = sfs;
 	int cont = 1;
 
 	/* special flags */
 	while (cont) {
 		switch (*p) {
-			case 'P':
-				p++;
-				e->flags |= MISC_FMT_PRESERVE_ARGV0;
-				break;
-			case 'O':
-				p++;
-				e->flags |= MISC_FMT_OPEN_BINARY;
-				break;
-			case 'C':
-				p++;
-				/* this flags also implies the
-				   open-binary flag */
-				e->flags |= (MISC_FMT_CREDENTIALS |
-						MISC_FMT_OPEN_BINARY);
-				break;
-			default:
-				cont = 0;
+		case 'P':
+			pr_debug("register: flag: P (preserve argv0)\n");
+			p++;
+			e->flags |= MISC_FMT_PRESERVE_ARGV0;
+			break;
+		case 'O':
+			pr_debug("register: flag: O (open binary)\n");
+			p++;
+			e->flags |= MISC_FMT_OPEN_BINARY;
+			break;
+		case 'C':
+			pr_debug("register: flag: C (preserve creds)\n");
+			p++;
+			/* this flags also implies the
+			   open-binary flag */
+			e->flags |= (MISC_FMT_CREDENTIALS |
+					MISC_FMT_OPEN_BINARY);
+			break;
+		default:
+			cont = 0;
 		}
 	}
 
 	return p;
 }
+
 /*
  * This registers a new binary format, it recognises the syntax
  * ':name:type:offset:magic:mask:interpreter:flags'
@@ -292,6 +299,8 @@ static Node *create_entry(const char __user *buffer, size_t count)
 	char *buf, *p;
 	char del;
 
+	pr_debug("register: received %zu bytes\n", count);
+
 	/* some sanity checks */
 	err = -EINVAL;
 	if ((count < 11) || (count > MAX_REGISTER_LENGTH))
@@ -299,7 +308,7 @@ static Node *create_entry(const char __user *buffer, size_t count)
 
 	err = -ENOMEM;
 	memsize = sizeof(Node) + count + 8;
-	e = kmalloc(memsize, GFP_USER);
+	e = kmalloc(memsize, GFP_KERNEL);
 	if (!e)
 		goto out;
 
@@ -307,98 +316,175 @@ static Node *create_entry(const char __user *buffer, size_t count)
 
 	memset(e, 0, sizeof(Node));
 	if (copy_from_user(buf, buffer, count))
-		goto Efault;
+		goto efault;
 
 	del = *p++;	/* delimeter */
 
-	memset(buf+count, del, 8);
+	pr_debug("register: delim: %#x {%c}\n", del, del);
+
+	/* Pad the buffer with the delim to simplify parsing below. */
+	memset(buf + count, del, 8);
 
+	/* Parse the 'name' field. */
 	e->name = p;
 	p = strchr(p, del);
 	if (!p)
-		goto Einval;
+		goto einval;
 	*p++ = '\0';
 	if (!e->name[0] ||
 	    !strcmp(e->name, ".") ||
 	    !strcmp(e->name, "..") ||
 	    strchr(e->name, '/'))
-		goto Einval;
+		goto einval;
+
+	pr_debug("register: name: {%s}\n", e->name);
+
+	/* Parse the 'type' field. */
 	switch (*p++) {
-		case 'E': e->flags = 1<<Enabled; break;
-		case 'M': e->flags = (1<<Enabled) | (1<<Magic); break;
-		default: goto Einval;
+	case 'E':
+		pr_debug("register: type: E (extension)\n");
+		e->flags = 1 << Enabled;
+		break;
+	case 'M':
+		pr_debug("register: type: M (magic)\n");
+		e->flags = (1 << Enabled) | (1 << Magic);
+		break;
+	default:
+		goto einval;
 	}
 	if (*p++ != del)
-		goto Einval;
+		goto einval;
+
 	if (test_bit(Magic, &e->flags)) {
-		char *s = strchr(p, del);
+		/* Handle the 'M' (magic) format. */
+		char *s;
+
+		/* Parse the 'offset' field. */
+		s = strchr(p, del);
 		if (!s)
-			goto Einval;
+			goto einval;
 		*s++ = '\0';
 		e->offset = simple_strtoul(p, &p, 10);
 		if (*p++)
-			goto Einval;
+			goto einval;
+		pr_debug("register: offset: %#x\n", e->offset);
+
+		/* Parse the 'magic' field. */
 		e->magic = p;
 		p = scanarg(p, del);
 		if (!p)
-			goto Einval;
+			goto einval;
 		p[-1] = '\0';
-		if (!e->magic[0])
-			goto Einval;
+		if (p == e->magic)
+			goto einval;
+		if (USE_DEBUG)
+			print_hex_dump_bytes(
+				KBUILD_MODNAME ": register: magic[raw]: ",
+				DUMP_PREFIX_NONE, e->magic, p - e->magic);
+
+		/* Parse the 'mask' field. */
 		e->mask = p;
 		p = scanarg(p, del);
 		if (!p)
-			goto Einval;
+			goto einval;
 		p[-1] = '\0';
-		if (!e->mask[0])
+		if (p == e->mask) {
 			e->mask = NULL;
+			pr_debug("register:  mask[raw]: none\n");
+		} else if (USE_DEBUG)
+			print_hex_dump_bytes(
+				KBUILD_MODNAME ": register:  mask[raw]: ",
+				DUMP_PREFIX_NONE, e->mask, p - e->mask);
+
+		/*
+		 * Decode the magic & mask fields.
+		 * Note: while we might have accepted embedded NUL bytes from
+		 * above, the unescape helpers here will stop at the first one
+		 * it encounters.
+		 */
 		e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX);
 		if (e->mask &&
 		    string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size)
-			goto Einval;
+			goto einval;
 		if (e->size + e->offset > BINPRM_BUF_SIZE)
-			goto Einval;
+			goto einval;
+		pr_debug("register: magic/mask length: %i\n", e->size);
+		if (USE_DEBUG) {
+			print_hex_dump_bytes(
+				KBUILD_MODNAME ": register: magic[decoded]: ",
+				DUMP_PREFIX_NONE, e->magic, e->size);
+
+			if (e->mask) {
+				int i;
+				char *masked = kmalloc(e->size, GFP_KERNEL);
+
+				print_hex_dump_bytes(
+					KBUILD_MODNAME ": register:  mask[decoded]: ",
+					DUMP_PREFIX_NONE, e->mask, e->size);
+
+				if (masked) {
+					for (i = 0; i < e->size; ++i)
+						masked[i] = e->magic[i] & e->mask[i];
+					print_hex_dump_bytes(
+						KBUILD_MODNAME ": register:  magic[masked]: ",
+						DUMP_PREFIX_NONE, masked, e->size);
+
+					kfree(masked);
+				}
+			}
+		}
 	} else {
+		/* Handle the 'E' (extension) format. */
+
+		/* Skip the 'offset' field. */
 		p = strchr(p, del);
 		if (!p)
-			goto Einval;
+			goto einval;
 		*p++ = '\0';
+
+		/* Parse the 'magic' field. */
 		e->magic = p;
 		p = strchr(p, del);
 		if (!p)
-			goto Einval;
+			goto einval;
 		*p++ = '\0';
 		if (!e->magic[0] || strchr(e->magic, '/'))
-			goto Einval;
+			goto einval;
+		pr_debug("register: extension: {%s}\n", e->magic);
+
+		/* Skip the 'mask' field. */
 		p = strchr(p, del);
 		if (!p)
-			goto Einval;
+			goto einval;
 		*p++ = '\0';
 	}
+
+	/* Parse the 'interpreter' field. */
 	e->interpreter = p;
 	p = strchr(p, del);
 	if (!p)
-		goto Einval;
+		goto einval;
 	*p++ = '\0';
 	if (!e->interpreter[0])
-		goto Einval;
-
-
-	p = check_special_flags (p, e);
+		goto einval;
+	pr_debug("register: interpreter: {%s}\n", e->interpreter);
 
+	/* Parse the 'flags' field. */
+	p = check_special_flags(p, e);
 	if (*p == '\n')
 		p++;
 	if (p != buf + count)
-		goto Einval;
+		goto einval;
+
 	return e;
 
 out:
 	return ERR_PTR(err);
 
-Efault:
+efault:
 	kfree(e);
 	return ERR_PTR(-EFAULT);
-Einval:
+einval:
 	kfree(e);
 	return ERR_PTR(-EINVAL);
 }
@@ -417,7 +503,7 @@ static int parse_command(const char __user *buffer, size_t count)
 		return -EFAULT;
 	if (!count)
 		return 0;
-	if (s[count-1] == '\n')
+	if (s[count - 1] == '\n')
 		count--;
 	if (count == 1 && s[0] == '0')
 		return 1;
@@ -434,7 +520,7 @@ static void entry_status(Node *e, char *page)
 {
 	char *dp;
 	char *status = "disabled";
-	const char * flags = "flags: ";
+	const char *flags = "flags: ";
 
 	if (test_bit(Enabled, &e->flags))
 		status = "enabled";
@@ -448,19 +534,15 @@ static void entry_status(Node *e, char *page)
 	dp = page + strlen(page);
 
 	/* print the special flags */
-	sprintf (dp, "%s", flags);
-	dp += strlen (flags);
-	if (e->flags & MISC_FMT_PRESERVE_ARGV0) {
-		*dp ++ = 'P';
-	}
-	if (e->flags & MISC_FMT_OPEN_BINARY) {
-		*dp ++ = 'O';
-	}
-	if (e->flags & MISC_FMT_CREDENTIALS) {
-		*dp ++ = 'C';
-	}
-	*dp ++ = '\n';
-
+	sprintf(dp, "%s", flags);
+	dp += strlen(flags);
+	if (e->flags & MISC_FMT_PRESERVE_ARGV0)
+		*dp++ = 'P';
+	if (e->flags & MISC_FMT_OPEN_BINARY)
+		*dp++ = 'O';
+	if (e->flags & MISC_FMT_CREDENTIALS)
+		*dp++ = 'C';
+	*dp++ = '\n';
 
 	if (!test_bit(Magic, &e->flags)) {
 		sprintf(dp, "extension .%s\n", e->magic);
@@ -488,7 +570,7 @@ static void entry_status(Node *e, char *page)
 
 static struct inode *bm_get_inode(struct super_block *sb, int mode)
 {
-	struct inode * inode = new_inode(sb);
+	struct inode *inode = new_inode(sb);
 
 	if (inode) {
 		inode->i_ino = get_next_ino();
@@ -528,13 +610,14 @@ static void kill_node(Node *e)
 /* /<entry> */
 
 static ssize_t
-bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
+bm_entry_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 {
 	Node *e = file_inode(file)->i_private;
 	ssize_t res;
 	char *page;
 
-	if (!(page = (char*) __get_free_page(GFP_KERNEL)))
+	page = (char *) __get_free_page(GFP_KERNEL);
+	if (!page)
 		return -ENOMEM;
 
 	entry_status(e, page);
@@ -553,20 +636,28 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
 	int res = parse_command(buffer, count);
 
 	switch (res) {
-		case 1: clear_bit(Enabled, &e->flags);
-			break;
-		case 2: set_bit(Enabled, &e->flags);
-			break;
-		case 3: root = dget(file->f_path.dentry->d_sb->s_root);
-			mutex_lock(&root->d_inode->i_mutex);
-
-			kill_node(e);
-
-			mutex_unlock(&root->d_inode->i_mutex);
-			dput(root);
-			break;
-		default: return res;
+	case 1:
+		/* Disable this handler. */
+		clear_bit(Enabled, &e->flags);
+		break;
+	case 2:
+		/* Enable this handler. */
+		set_bit(Enabled, &e->flags);
+		break;
+	case 3:
+		/* Delete this handler. */
+		root = dget(file->f_path.dentry->d_sb->s_root);
+		mutex_lock(&root->d_inode->i_mutex);
+
+		kill_node(e);
+
+		mutex_unlock(&root->d_inode->i_mutex);
+		dput(root);
+		break;
+	default:
+		return res;
 	}
+
 	return count;
 }
 
@@ -654,26 +745,36 @@ bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
 	return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
 }
 
-static ssize_t bm_status_write(struct file * file, const char __user * buffer,
+static ssize_t bm_status_write(struct file *file, const char __user *buffer,
 		size_t count, loff_t *ppos)
 {
 	int res = parse_command(buffer, count);
 	struct dentry *root;
 
 	switch (res) {
-		case 1: enabled = 0; break;
-		case 2: enabled = 1; break;
-		case 3: root = dget(file->f_path.dentry->d_sb->s_root);
-			mutex_lock(&root->d_inode->i_mutex);
-
-			while (!list_empty(&entries))
-				kill_node(list_entry(entries.next, Node, list));
-
-			mutex_unlock(&root->d_inode->i_mutex);
-			dput(root);
-			break;
-		default: return res;
+	case 1:
+		/* Disable all handlers. */
+		enabled = 0;
+		break;
+	case 2:
+		/* Enable all handlers. */
+		enabled = 1;
+		break;
+	case 3:
+		/* Delete all handlers. */
+		root = dget(file->f_path.dentry->d_sb->s_root);
+		mutex_lock(&root->d_inode->i_mutex);
+
+		while (!list_empty(&entries))
+			kill_node(list_entry(entries.next, Node, list));
+
+		mutex_unlock(&root->d_inode->i_mutex);
+		dput(root);
+		break;
+	default:
+		return res;
 	}
+
 	return count;
 }
 
@@ -690,14 +791,16 @@ static const struct super_operations s_ops = {
 	.evict_inode	= bm_evict_inode,
 };
 
-static int bm_fill_super(struct super_block * sb, void * data, int silent)
+static int bm_fill_super(struct super_block *sb, void *data, int silent)
 {
+	int err;
 	static struct tree_descr bm_files[] = {
 		[2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO},
 		[3] = {"register", &bm_register_operations, S_IWUSR},
 		/* last one */ {""}
 	};
-	int err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
+
+	err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
 	if (!err)
 		sb->s_op = &s_ops;
 	return err;

+ 0 - 1
fs/char_dev.c

@@ -117,7 +117,6 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
 			goto out;
 		}
 		major = i;
-		ret = major;
 	}
 
 	cd->major = major;

+ 1 - 1
fs/cifs/cifsacl.c

@@ -38,7 +38,7 @@ static const struct cifs_sid sid_everyone = {
 	1, 1, {0, 0, 0, 0, 0, 1}, {0} };
 /* security id for Authenticated Users system group */
 static const struct cifs_sid sid_authusers = {
-	1, 1, {0, 0, 0, 0, 0, 5}, {__constant_cpu_to_le32(11)} };
+	1, 1, {0, 0, 0, 0, 0, 5}, {cpu_to_le32(11)} };
 /* group users */
 static const struct cifs_sid sid_user = {1, 2 , {0, 0, 0, 0, 0, 5}, {} };
 

+ 10 - 10
fs/cifs/cifssmb.c

@@ -2477,14 +2477,14 @@ CIFSSMBPosixLock(const unsigned int xid, struct cifs_tcon *tcon,
 		}
 		parm_data = (struct cifs_posix_lock *)
 			((char *)&pSMBr->hdr.Protocol + data_offset);
-		if (parm_data->lock_type == __constant_cpu_to_le16(CIFS_UNLCK))
+		if (parm_data->lock_type == cpu_to_le16(CIFS_UNLCK))
 			pLockData->fl_type = F_UNLCK;
 		else {
 			if (parm_data->lock_type ==
-					__constant_cpu_to_le16(CIFS_RDLCK))
+					cpu_to_le16(CIFS_RDLCK))
 				pLockData->fl_type = F_RDLCK;
 			else if (parm_data->lock_type ==
-					__constant_cpu_to_le16(CIFS_WRLCK))
+					cpu_to_le16(CIFS_WRLCK))
 				pLockData->fl_type = F_WRLCK;
 
 			pLockData->fl_start = le64_to_cpu(parm_data->start);
@@ -3276,25 +3276,25 @@ CIFSSMB_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
 	pSMB->compression_state = cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
 
 	pSMB->TotalParameterCount = 0;
-	pSMB->TotalDataCount = __constant_cpu_to_le32(2);
+	pSMB->TotalDataCount = cpu_to_le32(2);
 	pSMB->MaxParameterCount = 0;
 	pSMB->MaxDataCount = 0;
 	pSMB->MaxSetupCount = 4;
 	pSMB->Reserved = 0;
 	pSMB->ParameterOffset = 0;
-	pSMB->DataCount = __constant_cpu_to_le32(2);
+	pSMB->DataCount = cpu_to_le32(2);
 	pSMB->DataOffset =
 		cpu_to_le32(offsetof(struct smb_com_transaction_compr_ioctl_req,
 				compression_state) - 4);  /* 84 */
 	pSMB->SetupCount = 4;
-	pSMB->SubCommand = __constant_cpu_to_le16(NT_TRANSACT_IOCTL);
+	pSMB->SubCommand = cpu_to_le16(NT_TRANSACT_IOCTL);
 	pSMB->ParameterCount = 0;
-	pSMB->FunctionCode = __constant_cpu_to_le32(FSCTL_SET_COMPRESSION);
+	pSMB->FunctionCode = cpu_to_le32(FSCTL_SET_COMPRESSION);
 	pSMB->IsFsctl = 1; /* FSCTL */
 	pSMB->IsRootFlag = 0;
 	pSMB->Fid = fid; /* file handle always le */
 	/* 3 byte pad, followed by 2 byte compress state */
-	pSMB->ByteCount = __constant_cpu_to_le16(5);
+	pSMB->ByteCount = cpu_to_le16(5);
 	inc_rfc1001_len(pSMB, 5);
 
 	rc = SendReceive(xid, tcon->ses, (struct smb_hdr *) pSMB,
@@ -3430,10 +3430,10 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
 	cifs_acl->version = cpu_to_le16(1);
 	if (acl_type == ACL_TYPE_ACCESS) {
 		cifs_acl->access_entry_count = cpu_to_le16(count);
-		cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF);
+		cifs_acl->default_entry_count = cpu_to_le16(0xFFFF);
 	} else if (acl_type == ACL_TYPE_DEFAULT) {
 		cifs_acl->default_entry_count = cpu_to_le16(count);
-		cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF);
+		cifs_acl->access_entry_count = cpu_to_le16(0xFFFF);
 	} else {
 		cifs_dbg(FYI, "unknown ACL type %d\n", acl_type);
 		return 0;

+ 2 - 2
fs/cifs/file.c

@@ -1066,7 +1066,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
 
 	max_num = (max_buf - sizeof(struct smb_hdr)) /
 						sizeof(LOCKING_ANDX_RANGE);
-	buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
 	if (!buf) {
 		free_xid(xid);
 		return -ENOMEM;
@@ -1401,7 +1401,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 
 	max_num = (max_buf - sizeof(struct smb_hdr)) /
 						sizeof(LOCKING_ANDX_RANGE);
-	buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 

+ 1 - 1
fs/cifs/sess.c

@@ -46,7 +46,7 @@ static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, SESSION_SETUP_ANDX *pSMB)
 					CIFSMaxBufSize + MAX_CIFS_HDR_SIZE - 4,
 					USHRT_MAX));
 	pSMB->req.MaxMpxCount = cpu_to_le16(ses->server->maxReq);
-	pSMB->req.VcNumber = __constant_cpu_to_le16(1);
+	pSMB->req.VcNumber = cpu_to_le16(1);
 
 	/* Now no need to set SMBFLG_CASELESS or obsolete CANONICAL PATH */
 

+ 2 - 2
fs/cifs/smb2file.c

@@ -111,7 +111,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
 		return -EINVAL;
 
 	max_num = max_buf / sizeof(struct smb2_lock_element);
-	buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+	buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
@@ -247,7 +247,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
 	}
 
 	max_num = max_buf / sizeof(struct smb2_lock_element);
-	buf = kzalloc(max_num * sizeof(struct smb2_lock_element), GFP_KERNEL);
+	buf = kcalloc(max_num, sizeof(struct smb2_lock_element), GFP_KERNEL);
 	if (!buf) {
 		free_xid(xid);
 		return -ENOMEM;

+ 19 - 19
fs/cifs/smb2misc.c

@@ -67,27 +67,27 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
  *  indexed by command in host byte order
  */
 static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
-	/* SMB2_NEGOTIATE */ __constant_cpu_to_le16(65),
-	/* SMB2_SESSION_SETUP */ __constant_cpu_to_le16(9),
-	/* SMB2_LOGOFF */ __constant_cpu_to_le16(4),
-	/* SMB2_TREE_CONNECT */ __constant_cpu_to_le16(16),
-	/* SMB2_TREE_DISCONNECT */ __constant_cpu_to_le16(4),
-	/* SMB2_CREATE */ __constant_cpu_to_le16(89),
-	/* SMB2_CLOSE */ __constant_cpu_to_le16(60),
-	/* SMB2_FLUSH */ __constant_cpu_to_le16(4),
-	/* SMB2_READ */ __constant_cpu_to_le16(17),
-	/* SMB2_WRITE */ __constant_cpu_to_le16(17),
-	/* SMB2_LOCK */ __constant_cpu_to_le16(4),
-	/* SMB2_IOCTL */ __constant_cpu_to_le16(49),
+	/* SMB2_NEGOTIATE */ cpu_to_le16(65),
+	/* SMB2_SESSION_SETUP */ cpu_to_le16(9),
+	/* SMB2_LOGOFF */ cpu_to_le16(4),
+	/* SMB2_TREE_CONNECT */ cpu_to_le16(16),
+	/* SMB2_TREE_DISCONNECT */ cpu_to_le16(4),
+	/* SMB2_CREATE */ cpu_to_le16(89),
+	/* SMB2_CLOSE */ cpu_to_le16(60),
+	/* SMB2_FLUSH */ cpu_to_le16(4),
+	/* SMB2_READ */ cpu_to_le16(17),
+	/* SMB2_WRITE */ cpu_to_le16(17),
+	/* SMB2_LOCK */ cpu_to_le16(4),
+	/* SMB2_IOCTL */ cpu_to_le16(49),
 	/* BB CHECK this ... not listed in documentation */
-	/* SMB2_CANCEL */ __constant_cpu_to_le16(0),
-	/* SMB2_ECHO */ __constant_cpu_to_le16(4),
-	/* SMB2_QUERY_DIRECTORY */ __constant_cpu_to_le16(9),
-	/* SMB2_CHANGE_NOTIFY */ __constant_cpu_to_le16(9),
-	/* SMB2_QUERY_INFO */ __constant_cpu_to_le16(9),
-	/* SMB2_SET_INFO */ __constant_cpu_to_le16(2),
+	/* SMB2_CANCEL */ cpu_to_le16(0),
+	/* SMB2_ECHO */ cpu_to_le16(4),
+	/* SMB2_QUERY_DIRECTORY */ cpu_to_le16(9),
+	/* SMB2_CHANGE_NOTIFY */ cpu_to_le16(9),
+	/* SMB2_QUERY_INFO */ cpu_to_le16(9),
+	/* SMB2_SET_INFO */ cpu_to_le16(2),
 	/* BB FIXME can also be 44 for lease break */
-	/* SMB2_OPLOCK_BREAK */ __constant_cpu_to_le16(24)
+	/* SMB2_OPLOCK_BREAK */ cpu_to_le16(24)
 };
 
 int

+ 1 - 1
fs/cifs/smb2ops.c

@@ -600,7 +600,7 @@ smb2_clone_range(const unsigned int xid,
 		goto cchunk_out;
 
 	/* For now array only one chunk long, will make more flexible later */
-	pcchunk->ChunkCount = __constant_cpu_to_le32(1);
+	pcchunk->ChunkCount = cpu_to_le32(1);
 	pcchunk->Reserved = 0;
 	pcchunk->Reserved2 = 0;
 

+ 1 - 1
fs/cifs/smb2pdu.c

@@ -1358,7 +1358,7 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
 	char *ret_data = NULL;
 
 	fsctl_input.CompressionState =
-			__constant_cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
+			cpu_to_le16(COMPRESSION_FORMAT_DEFAULT);
 
 	rc = SMB2_ioctl(xid, tcon, persistent_fid, volatile_fid,
 			FSCTL_SET_COMPRESSION, true /* is_fsctl */,

+ 14 - 14
fs/cifs/smb2pdu.h

@@ -85,7 +85,7 @@
 /* BB FIXME - analyze following length BB */
 #define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
 
-#define SMB2_PROTO_NUMBER __constant_cpu_to_le32(0x424d53fe)
+#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
 
 /*
  * SMB2 Header Definition
@@ -96,7 +96,7 @@
  *
  */
 
-#define SMB2_HEADER_STRUCTURE_SIZE __constant_cpu_to_le16(64)
+#define SMB2_HEADER_STRUCTURE_SIZE cpu_to_le16(64)
 
 struct smb2_hdr {
 	__be32 smb2_buf_length;	/* big endian on wire */
@@ -137,16 +137,16 @@ struct smb2_transform_hdr {
 } __packed;
 
 /* Encryption Algorithms */
-#define SMB2_ENCRYPTION_AES128_CCM	__constant_cpu_to_le16(0x0001)
+#define SMB2_ENCRYPTION_AES128_CCM	cpu_to_le16(0x0001)
 
 /*
  *	SMB2 flag definitions
  */
-#define SMB2_FLAGS_SERVER_TO_REDIR	__constant_cpu_to_le32(0x00000001)
-#define SMB2_FLAGS_ASYNC_COMMAND	__constant_cpu_to_le32(0x00000002)
-#define SMB2_FLAGS_RELATED_OPERATIONS	__constant_cpu_to_le32(0x00000004)
-#define SMB2_FLAGS_SIGNED		__constant_cpu_to_le32(0x00000008)
-#define SMB2_FLAGS_DFS_OPERATIONS	__constant_cpu_to_le32(0x10000000)
+#define SMB2_FLAGS_SERVER_TO_REDIR	cpu_to_le32(0x00000001)
+#define SMB2_FLAGS_ASYNC_COMMAND	cpu_to_le32(0x00000002)
+#define SMB2_FLAGS_RELATED_OPERATIONS	cpu_to_le32(0x00000004)
+#define SMB2_FLAGS_SIGNED		cpu_to_le32(0x00000008)
+#define SMB2_FLAGS_DFS_OPERATIONS	cpu_to_le32(0x10000000)
 
 /*
  *	Definitions for SMB2 Protocol Data Units (network frames)
@@ -157,7 +157,7 @@ struct smb2_transform_hdr {
  *
  */
 
-#define SMB2_ERROR_STRUCTURE_SIZE2 __constant_cpu_to_le16(9)
+#define SMB2_ERROR_STRUCTURE_SIZE2 cpu_to_le16(9)
 
 struct smb2_err_rsp {
 	struct smb2_hdr hdr;
@@ -502,12 +502,12 @@ struct create_context {
 #define SMB2_LEASE_HANDLE_CACHING_HE	0x02
 #define SMB2_LEASE_WRITE_CACHING_HE	0x04
 
-#define SMB2_LEASE_NONE			__constant_cpu_to_le32(0x00)
-#define SMB2_LEASE_READ_CACHING		__constant_cpu_to_le32(0x01)
-#define SMB2_LEASE_HANDLE_CACHING	__constant_cpu_to_le32(0x02)
-#define SMB2_LEASE_WRITE_CACHING	__constant_cpu_to_le32(0x04)
+#define SMB2_LEASE_NONE			cpu_to_le32(0x00)
+#define SMB2_LEASE_READ_CACHING		cpu_to_le32(0x01)
+#define SMB2_LEASE_HANDLE_CACHING	cpu_to_le32(0x02)
+#define SMB2_LEASE_WRITE_CACHING	cpu_to_le32(0x04)
 
-#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS __constant_cpu_to_le32(0x02)
+#define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS cpu_to_le32(0x02)
 
 #define SMB2_LEASE_KEY_SIZE 16
 

+ 1 - 1
fs/file.c

@@ -869,7 +869,7 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes)
 	struct file *file = fget_raw(fildes);
 
 	if (file) {
-		ret = get_unused_fd();
+		ret = get_unused_fd_flags(0);
 		if (ret >= 0)
 			fd_install(ret, file);
 		else

+ 8 - 6
fs/hfs/catalog.c

@@ -162,14 +162,16 @@ err2:
  */
 int hfs_cat_keycmp(const btree_key *key1, const btree_key *key2)
 {
-	int retval;
+	__be32 k1p, k2p;
 
-	retval = be32_to_cpu(key1->cat.ParID) - be32_to_cpu(key2->cat.ParID);
-	if (!retval)
-		retval = hfs_strcmp(key1->cat.CName.name, key1->cat.CName.len,
-				    key2->cat.CName.name, key2->cat.CName.len);
+	k1p = key1->cat.ParID;
+	k2p = key2->cat.ParID;
 
-	return retval;
+	if (k1p != k2p)
+		return be32_to_cpu(k1p) < be32_to_cpu(k2p) ? -1 : 1;
+
+	return hfs_strcmp(key1->cat.CName.name, key1->cat.CName.len,
+			  key2->cat.CName.name, key2->cat.CName.len);
 }
 
 /* Try to get a catalog entry for given catalog id */

+ 0 - 1
fs/ncpfs/ioctl.c

@@ -447,7 +447,6 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
 						result = -EIO;
 					}
 				}
-				result = 0;
 			}
 			mutex_unlock(&server->root_setup_lock);
 

+ 2 - 8
fs/nilfs2/file.c

@@ -39,21 +39,15 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	 */
 	struct the_nilfs *nilfs;
 	struct inode *inode = file->f_mapping->host;
-	int err;
-
-	err = filemap_write_and_wait_range(inode->i_mapping, start, end);
-	if (err)
-		return err;
-	mutex_lock(&inode->i_mutex);
+	int err = 0;
 
 	if (nilfs_inode_dirty(inode)) {
 		if (datasync)
 			err = nilfs_construct_dsync_segment(inode->i_sb, inode,
-							    0, LLONG_MAX);
+							    start, end);
 		else
 			err = nilfs_construct_segment(inode->i_sb);
 	}
-	mutex_unlock(&inode->i_mutex);
 
 	nilfs = inode->i_sb->s_fs_info;
 	if (!err)

+ 24 - 8
fs/nilfs2/inode.c

@@ -49,6 +49,8 @@ struct nilfs_iget_args {
 	int for_gc;
 };
 
+static int nilfs_iget_test(struct inode *inode, void *opaque);
+
 void nilfs_inode_add_blocks(struct inode *inode, int n)
 {
 	struct nilfs_root *root = NILFS_I(inode)->i_root;
@@ -348,6 +350,17 @@ const struct address_space_operations nilfs_aops = {
 	.is_partially_uptodate  = block_is_partially_uptodate,
 };
 
+static int nilfs_insert_inode_locked(struct inode *inode,
+				     struct nilfs_root *root,
+				     unsigned long ino)
+{
+	struct nilfs_iget_args args = {
+		.ino = ino, .root = root, .cno = 0, .for_gc = 0
+	};
+
+	return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
+}
+
 struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
 {
 	struct super_block *sb = dir->i_sb;
@@ -383,7 +396,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
 		err = nilfs_bmap_read(ii->i_bmap, NULL);
 		if (err < 0)
-			goto failed_bmap;
+			goto failed_after_creation;
 
 		set_bit(NILFS_I_BMAP, &ii->i_state);
 		/* No lock is needed; iget() ensures it. */
@@ -399,21 +412,24 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
 	spin_lock(&nilfs->ns_next_gen_lock);
 	inode->i_generation = nilfs->ns_next_generation++;
 	spin_unlock(&nilfs->ns_next_gen_lock);
-	insert_inode_hash(inode);
+	if (nilfs_insert_inode_locked(inode, root, ino) < 0) {
+		err = -EIO;
+		goto failed_after_creation;
+	}
 
 	err = nilfs_init_acl(inode, dir);
 	if (unlikely(err))
-		goto failed_acl; /* never occur. When supporting
+		goto failed_after_creation; /* never occur. When supporting
 				    nilfs_init_acl(), proper cancellation of
 				    above jobs should be considered */
 
 	return inode;
 
- failed_acl:
- failed_bmap:
+ failed_after_creation:
 	clear_nlink(inode);
+	unlock_new_inode(inode);
 	iput(inode);  /* raw_inode will be deleted through
-			 generic_delete_inode() */
+			 nilfs_evict_inode() */
 	goto failed;
 
  failed_ifile_create_inode:
@@ -461,8 +477,8 @@ int nilfs_read_inode_common(struct inode *inode,
 	inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
 	inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
 	inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
-	if (inode->i_nlink == 0 && inode->i_mode == 0)
-		return -EINVAL; /* this inode is deleted */
+	if (inode->i_nlink == 0)
+		return -ESTALE; /* this inode is deleted */
 
 	inode->i_blocks = le64_to_cpu(raw_inode->i_blocks);
 	ii->i_flags = le32_to_cpu(raw_inode->i_flags);

+ 12 - 3
fs/nilfs2/namei.c

@@ -51,9 +51,11 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
 	int err = nilfs_add_link(dentry, inode);
 	if (!err) {
 		d_instantiate(dentry, inode);
+		unlock_new_inode(inode);
 		return 0;
 	}
 	inode_dec_link_count(inode);
+	unlock_new_inode(inode);
 	iput(inode);
 	return err;
 }
@@ -182,6 +184,7 @@ out:
 out_fail:
 	drop_nlink(inode);
 	nilfs_mark_inode_dirty(inode);
+	unlock_new_inode(inode);
 	iput(inode);
 	goto out;
 }
@@ -201,11 +204,15 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
 	inode_inc_link_count(inode);
 	ihold(inode);
 
-	err = nilfs_add_nondir(dentry, inode);
-	if (!err)
+	err = nilfs_add_link(dentry, inode);
+	if (!err) {
+		d_instantiate(dentry, inode);
 		err = nilfs_transaction_commit(dir->i_sb);
-	else
+	} else {
+		inode_dec_link_count(inode);
+		iput(inode);
 		nilfs_transaction_abort(dir->i_sb);
+	}
 
 	return err;
 }
@@ -243,6 +250,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 
 	nilfs_mark_inode_dirty(inode);
 	d_instantiate(dentry, inode);
+	unlock_new_inode(inode);
 out:
 	if (!err)
 		err = nilfs_transaction_commit(dir->i_sb);
@@ -255,6 +263,7 @@ out_fail:
 	drop_nlink(inode);
 	drop_nlink(inode);
 	nilfs_mark_inode_dirty(inode);
+	unlock_new_inode(inode);
 	iput(inode);
 out_dir:
 	drop_nlink(dir);

+ 1 - 2
fs/nilfs2/the_nilfs.c

@@ -808,8 +808,7 @@ void nilfs_put_root(struct nilfs_root *root)
 		spin_lock(&nilfs->ns_cptree_lock);
 		rb_erase(&root->rb_node, &nilfs->ns_cptree);
 		spin_unlock(&nilfs->ns_cptree_lock);
-		if (root->ifile)
-			iput(root->ifile);
+		iput(root->ifile);
 
 		kfree(root);
 	}

+ 1 - 1
fs/ocfs2/aops.c

@@ -1251,7 +1251,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
 	ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL,
 					  NULL);
 	if (ret < 0) {
-		ocfs2_error(inode->i_sb, "Corrupting extend for inode %llu, "
+		mlog(ML_ERROR, "Get physical blkno failed for inode %llu, "
 			    "at logical block %llu",
 			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
 			    (unsigned long long)v_blkno);

+ 2 - 2
fs/ocfs2/cluster/heartbeat.c

@@ -1127,10 +1127,10 @@ static int o2hb_thread(void *data)
 		elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
 
 		mlog(ML_HEARTBEAT,
-		     "start = %lu.%lu, end = %lu.%lu, msec = %u\n",
+		     "start = %lu.%lu, end = %lu.%lu, msec = %u, ret = %d\n",
 		     before_hb.tv_sec, (unsigned long) before_hb.tv_usec,
 		     after_hb.tv_sec, (unsigned long) after_hb.tv_usec,
-		     elapsed_msec);
+		     elapsed_msec, ret);
 
 		if (!kthread_should_stop() &&
 		    elapsed_msec < reg->hr_timeout_ms) {

+ 1 - 1
fs/ocfs2/cluster/tcp.c

@@ -1736,7 +1736,7 @@ static void o2net_connect_expired(struct work_struct *work)
 		     o2net_idle_timeout() / 1000,
 		     o2net_idle_timeout() % 1000);
 
-		o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
+		o2net_set_nn_state(nn, NULL, 0, 0);
 	}
 	spin_unlock(&nn->nn_lock);
 }

+ 1 - 1
fs/ocfs2/dir.c

@@ -744,7 +744,7 @@ restart:
 		if (ocfs2_read_dir_block(dir, block, &bh, 0)) {
 			/* read error, skip block & hope for the best.
 			 * ocfs2_read_dir_block() has released the bh. */
-			ocfs2_error(dir->i_sb, "reading directory %llu, "
+			mlog(ML_ERROR, "reading directory %llu, "
 				    "offset %lu\n",
 				    (unsigned long long)OCFS2_I(dir)->ip_blkno,
 				    block);

+ 1 - 1
fs/ocfs2/dlm/dlmdomain.c

@@ -877,7 +877,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
 	 * to be put in someone's domain map.
 	 * Also, explicitly disallow joining at certain troublesome
 	 * times (ie. during recovery). */
-	if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) {
+	if (dlm->dlm_state != DLM_CTXT_LEAVING) {
 		int bit = query->node_idx;
 		spin_lock(&dlm->spinlock);
 

+ 12 - 0
fs/ocfs2/dlm/dlmmaster.c

@@ -1460,6 +1460,18 @@ way_up_top:
 
 		/* take care of the easy cases up front */
 		spin_lock(&res->spinlock);
+
+		/*
+		 * Right after dlm spinlock was released, dlm_thread could have
+		 * purged the lockres. Check if lockres got unhashed. If so
+		 * start over.
+		 */
+		if (hlist_unhashed(&res->hash_node)) {
+			spin_unlock(&res->spinlock);
+			dlm_lockres_put(res);
+			goto way_up_top;
+		}
+
 		if (res->state & (DLM_LOCK_RES_RECOVERING|
 				  DLM_LOCK_RES_MIGRATING)) {
 			spin_unlock(&res->spinlock);

+ 13 - 5
fs/ocfs2/dlm/dlmrecovery.c

@@ -1656,14 +1656,18 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
 	req.namelen = res->lockname.len;
 	memcpy(req.name, res->lockname.name, res->lockname.len);
 
+resend:
 	ret = o2net_send_message(DLM_MASTER_REQUERY_MSG, dlm->key,
 				 &req, sizeof(req), nodenum, &status);
-	/* XXX: negative status not handled properly here. */
 	if (ret < 0)
 		mlog(ML_ERROR, "Error %d when sending message %u (key "
 		     "0x%x) to node %u\n", ret, DLM_MASTER_REQUERY_MSG,
 		     dlm->key, nodenum);
-	else {
+	else if (status == -ENOMEM) {
+		mlog_errno(status);
+		msleep(50);
+		goto resend;
+	} else {
 		BUG_ON(status < 0);
 		BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN);
 		*real_master = (u8) (status & 0xff);
@@ -1705,9 +1709,13 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
 			int ret = dlm_dispatch_assert_master(dlm, res,
 							     0, 0, flags);
 			if (ret < 0) {
-				mlog_errno(-ENOMEM);
-				/* retry!? */
-				BUG();
+				mlog_errno(ret);
+				spin_unlock(&res->spinlock);
+				dlm_lockres_put(res);
+				spin_unlock(&dlm->spinlock);
+				dlm_put(dlm);
+				/* sender will take care of this and retry */
+				return ret;
 			} else
 				__dlm_lockres_grab_inflight_worker(dlm, res);
 			spin_unlock(&res->spinlock);

+ 31 - 6
fs/ocfs2/dlmglue.c

@@ -861,8 +861,13 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
 	 * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
 	 * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
 	 * downconverting the lock before the upconvert has fully completed.
+	 * Do not prevent the dc thread from downconverting if NONBLOCK lock
+	 * had already returned.
 	 */
-	lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
+	if (!(lockres->l_flags & OCFS2_LOCK_NONBLOCK_FINISHED))
+		lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
+	else
+		lockres_clear_flags(lockres, OCFS2_LOCK_NONBLOCK_FINISHED);
 
 	lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
 }
@@ -1324,13 +1329,12 @@ static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
 
 /* returns 0 if the mw that was removed was already satisfied, -EBUSY
  * if the mask still hadn't reached its goal */
-static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
+static int __lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
 				      struct ocfs2_mask_waiter *mw)
 {
-	unsigned long flags;
 	int ret = 0;
 
-	spin_lock_irqsave(&lockres->l_lock, flags);
+	assert_spin_locked(&lockres->l_lock);
 	if (!list_empty(&mw->mw_item)) {
 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
 			ret = -EBUSY;
@@ -1338,6 +1342,18 @@ static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
 		list_del_init(&mw->mw_item);
 		init_completion(&mw->mw_complete);
 	}
+
+	return ret;
+}
+
+static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
+				      struct ocfs2_mask_waiter *mw)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&lockres->l_lock, flags);
+	ret = __lockres_remove_mask_waiter(lockres, mw);
 	spin_unlock_irqrestore(&lockres->l_lock, flags);
 
 	return ret;
@@ -1373,6 +1389,7 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
 	unsigned long flags;
 	unsigned int gen;
 	int noqueue_attempted = 0;
+	int dlm_locked = 0;
 
 	ocfs2_init_mask_waiter(&mw);
 
@@ -1481,6 +1498,7 @@ again:
 			ocfs2_recover_from_dlm_error(lockres, 1);
 			goto out;
 		}
+		dlm_locked = 1;
 
 		mlog(0, "lock %s, successful return from ocfs2_dlm_lock\n",
 		     lockres->l_name);
@@ -1514,10 +1532,17 @@ out:
 	if (wait && arg_flags & OCFS2_LOCK_NONBLOCK &&
 	    mw.mw_mask & (OCFS2_LOCK_BUSY|OCFS2_LOCK_BLOCKED)) {
 		wait = 0;
-		if (lockres_remove_mask_waiter(lockres, &mw))
+		spin_lock_irqsave(&lockres->l_lock, flags);
+		if (__lockres_remove_mask_waiter(lockres, &mw)) {
+			if (dlm_locked)
+				lockres_or_flags(lockres,
+					OCFS2_LOCK_NONBLOCK_FINISHED);
+			spin_unlock_irqrestore(&lockres->l_lock, flags);
 			ret = -EAGAIN;
-		else
+		} else {
+			spin_unlock_irqrestore(&lockres->l_lock, flags);
 			goto again;
+		}
 	}
 	if (wait) {
 		ret = ocfs2_wait_for_mask(&mw);

+ 1 - 3
fs/ocfs2/file.c

@@ -2381,9 +2381,7 @@ out_dio:
 		if (ret < 0)
 			written = ret;
 
-		if (!ret && ((old_size != i_size_read(inode)) ||
-			     (old_clusters != OCFS2_I(inode)->ip_clusters) ||
-			     has_refcount)) {
+		if (!ret) {
 			ret = jbd2_journal_force_commit(osb->journal->j_journal);
 			if (ret < 0)
 				written = ret;

+ 1 - 2
fs/ocfs2/inode.c

@@ -540,8 +540,7 @@ bail:
 	if (status < 0)
 		make_bad_inode(inode);
 
-	if (args && bh)
-		brelse(bh);
+	brelse(bh);
 
 	return status;
 }

+ 0 - 3
fs/ocfs2/move_extents.c

@@ -904,9 +904,6 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
 	struct buffer_head *di_bh = NULL;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
-	if (!inode)
-		return -ENOENT;
-
 	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
 		return -EROFS;
 

+ 6 - 0
fs/ocfs2/ocfs2.h

@@ -144,6 +144,12 @@ enum ocfs2_unlock_action {
 						     * before the upconvert
 						     * has completed */
 
+#define OCFS2_LOCK_NONBLOCK_FINISHED (0x00001000) /* NONBLOCK cluster
+						   * lock has already
+						   * returned, do not block
+						   * dc thread from
+						   * downconverting */
+
 struct ocfs2_lock_res_ops;
 
 typedef void (*ocfs2_lock_callback)(int status, unsigned long data);

+ 1 - 1
fs/ocfs2/slot_map.c

@@ -306,7 +306,7 @@ int ocfs2_slot_to_node_num_locked(struct ocfs2_super *osb, int slot_num,
 	assert_spin_locked(&osb->osb_lock);
 
 	BUG_ON(slot_num < 0);
-	BUG_ON(slot_num > osb->max_slots);
+	BUG_ON(slot_num >= osb->max_slots);
 
 	if (!si->si_slots[slot_num].sl_valid)
 		return -ENOENT;

+ 2 - 1
fs/ocfs2/super.c

@@ -1629,8 +1629,9 @@ static int __init ocfs2_init(void)
 
 	ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
 	if (!ocfs2_debugfs_root) {
-		status = -EFAULT;
+		status = -ENOMEM;
 		mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
+		goto out4;
 	}
 
 	ocfs2_set_locking_protocol();

+ 1 - 1
fs/ocfs2/xattr.c

@@ -1284,7 +1284,7 @@ int ocfs2_xattr_get_nolock(struct inode *inode,
 		return -EOPNOTSUPP;
 
 	if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
-		ret = -ENODATA;
+		return -ENODATA;
 
 	xis.inode_bh = xbs.inode_bh = di_bh;
 	di = (struct ocfs2_dinode *)di_bh->b_data;

+ 22 - 25
fs/proc/array.c

@@ -157,20 +157,29 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 	struct user_namespace *user_ns = seq_user_ns(m);
 	struct group_info *group_info;
 	int g;
-	struct fdtable *fdt = NULL;
+	struct task_struct *tracer;
 	const struct cred *cred;
-	pid_t ppid, tpid;
+	pid_t ppid, tpid = 0, tgid, ngid;
+	unsigned int max_fds = 0;
 
 	rcu_read_lock();
 	ppid = pid_alive(p) ?
 		task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
-	tpid = 0;
-	if (pid_alive(p)) {
-		struct task_struct *tracer = ptrace_parent(p);
-		if (tracer)
-			tpid = task_pid_nr_ns(tracer, ns);
-	}
+
+	tracer = ptrace_parent(p);
+	if (tracer)
+		tpid = task_pid_nr_ns(tracer, ns);
+
+	tgid = task_tgid_nr_ns(p, ns);
+	ngid = task_numa_group_id(p);
 	cred = get_task_cred(p);
+
+	task_lock(p);
+	if (p->files)
+		max_fds = files_fdtable(p->files)->max_fds;
+	task_unlock(p);
+	rcu_read_unlock();
+
 	seq_printf(m,
 		"State:\t%s\n"
 		"Tgid:\t%d\n"
@@ -179,12 +188,10 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		"PPid:\t%d\n"
 		"TracerPid:\t%d\n"
 		"Uid:\t%d\t%d\t%d\t%d\n"
-		"Gid:\t%d\t%d\t%d\t%d\n",
+		"Gid:\t%d\t%d\t%d\t%d\n"
+		"FDSize:\t%d\nGroups:\t",
 		get_task_state(p),
-		task_tgid_nr_ns(p, ns),
-		task_numa_group_id(p),
-		pid_nr_ns(pid, ns),
-		ppid, tpid,
+		tgid, ngid, pid_nr_ns(pid, ns), ppid, tpid,
 		from_kuid_munged(user_ns, cred->uid),
 		from_kuid_munged(user_ns, cred->euid),
 		from_kuid_munged(user_ns, cred->suid),
@@ -192,20 +199,10 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
 		from_kgid_munged(user_ns, cred->gid),
 		from_kgid_munged(user_ns, cred->egid),
 		from_kgid_munged(user_ns, cred->sgid),
-		from_kgid_munged(user_ns, cred->fsgid));
-
-	task_lock(p);
-	if (p->files)
-		fdt = files_fdtable(p->files);
-	seq_printf(m,
-		"FDSize:\t%d\n"
-		"Groups:\t",
-		fdt ? fdt->max_fds : 0);
-	rcu_read_unlock();
+		from_kgid_munged(user_ns, cred->fsgid),
+		max_fds);
 
 	group_info = cred->group_info;
-	task_unlock(p);
-
 	for (g = 0; g < group_info->ngroups; g++)
 		seq_printf(m, "%d ",
 			   from_kgid_munged(user_ns, GROUP_AT(group_info, g)));

+ 3 - 0
fs/proc/base.c

@@ -2618,6 +2618,9 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid)
 		dput(dentry);
 	}
 
+	if (pid == tgid)
+		return;
+
 	name.name = buf;
 	name.len = snprintf(buf, sizeof(buf), "%d", tgid);
 	leader = d_hash_and_lookup(mnt->mnt_root, &name);

+ 104 - 59
fs/proc/generic.c

@@ -31,9 +31,73 @@ static DEFINE_SPINLOCK(proc_subdir_lock);
 
 static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
 {
-	if (de->namelen != len)
-		return 0;
-	return !memcmp(name, de->name, len);
+	if (len < de->namelen)
+		return -1;
+	if (len > de->namelen)
+		return 1;
+
+	return memcmp(name, de->name, len);
+}
+
+static struct proc_dir_entry *pde_subdir_first(struct proc_dir_entry *dir)
+{
+	return rb_entry_safe(rb_first(&dir->subdir), struct proc_dir_entry,
+			     subdir_node);
+}
+
+static struct proc_dir_entry *pde_subdir_next(struct proc_dir_entry *dir)
+{
+	return rb_entry_safe(rb_next(&dir->subdir_node), struct proc_dir_entry,
+			     subdir_node);
+}
+
+static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir,
+					      const char *name,
+					      unsigned int len)
+{
+	struct rb_node *node = dir->subdir.rb_node;
+
+	while (node) {
+		struct proc_dir_entry *de = container_of(node,
+							 struct proc_dir_entry,
+							 subdir_node);
+		int result = proc_match(len, name, de);
+
+		if (result < 0)
+			node = node->rb_left;
+		else if (result > 0)
+			node = node->rb_right;
+		else
+			return de;
+	}
+	return NULL;
+}
+
+static bool pde_subdir_insert(struct proc_dir_entry *dir,
+			      struct proc_dir_entry *de)
+{
+	struct rb_root *root = &dir->subdir;
+	struct rb_node **new = &root->rb_node, *parent = NULL;
+
+	/* Figure out where to put new node */
+	while (*new) {
+		struct proc_dir_entry *this =
+			container_of(*new, struct proc_dir_entry, subdir_node);
+		int result = proc_match(de->namelen, de->name, this);
+
+		parent = *new;
+		if (result < 0)
+			new = &(*new)->rb_left;
+		else if (result > 0)
+			new = &(*new)->rb_right;
+		else
+			return false;
+	}
+
+	/* Add new node and rebalance tree. */
+	rb_link_node(&de->subdir_node, parent, new);
+	rb_insert_color(&de->subdir_node, root);
+	return true;
 }
 
 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
@@ -92,10 +156,7 @@ static int __xlate_proc_name(const char *name, struct proc_dir_entry **ret,
 			break;
 
 		len = next - cp;
-		for (de = de->subdir; de ; de = de->next) {
-			if (proc_match(len, cp, de))
-				break;
-		}
+		de = pde_subdir_find(de, cp, len);
 		if (!de) {
 			WARN(1, "name '%s'\n", name);
 			return -ENOENT;
@@ -183,19 +244,16 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
 	struct inode *inode;
 
 	spin_lock(&proc_subdir_lock);
-	for (de = de->subdir; de ; de = de->next) {
-		if (de->namelen != dentry->d_name.len)
-			continue;
-		if (!memcmp(dentry->d_name.name, de->name, de->namelen)) {
-			pde_get(de);
-			spin_unlock(&proc_subdir_lock);
-			inode = proc_get_inode(dir->i_sb, de);
-			if (!inode)
-				return ERR_PTR(-ENOMEM);
-			d_set_d_op(dentry, &simple_dentry_operations);
-			d_add(dentry, inode);
-			return NULL;
-		}
+	de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len);
+	if (de) {
+		pde_get(de);
+		spin_unlock(&proc_subdir_lock);
+		inode = proc_get_inode(dir->i_sb, de);
+		if (!inode)
+			return ERR_PTR(-ENOMEM);
+		d_set_d_op(dentry, &simple_dentry_operations);
+		d_add(dentry, inode);
+		return NULL;
 	}
 	spin_unlock(&proc_subdir_lock);
 	return ERR_PTR(-ENOENT);
@@ -225,7 +283,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
 		return 0;
 
 	spin_lock(&proc_subdir_lock);
-	de = de->subdir;
+	de = pde_subdir_first(de);
 	i = ctx->pos - 2;
 	for (;;) {
 		if (!de) {
@@ -234,7 +292,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
 		}
 		if (!i)
 			break;
-		de = de->next;
+		de = pde_subdir_next(de);
 		i--;
 	}
 
@@ -249,7 +307,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
 		}
 		spin_lock(&proc_subdir_lock);
 		ctx->pos++;
-		next = de->next;
+		next = pde_subdir_next(de);
 		pde_put(de);
 		de = next;
 	} while (de);
@@ -286,9 +344,8 @@ static const struct inode_operations proc_dir_inode_operations = {
 
 static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
 {
-	struct proc_dir_entry *tmp;
 	int ret;
-	
+
 	ret = proc_alloc_inum(&dp->low_ino);
 	if (ret)
 		return ret;
@@ -304,21 +361,21 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
 		dp->proc_iops = &proc_file_inode_operations;
 	} else {
 		WARN_ON(1);
+		proc_free_inum(dp->low_ino);
 		return -EINVAL;
 	}
 
 	spin_lock(&proc_subdir_lock);
-
-	for (tmp = dir->subdir; tmp; tmp = tmp->next)
-		if (strcmp(tmp->name, dp->name) == 0) {
-			WARN(1, "proc_dir_entry '%s/%s' already registered\n",
-				dir->name, dp->name);
-			break;
-		}
-
-	dp->next = dir->subdir;
 	dp->parent = dir;
-	dir->subdir = dp;
+	if (pde_subdir_insert(dir, dp) == false) {
+		WARN(1, "proc_dir_entry '%s/%s' already registered\n",
+		     dir->name, dp->name);
+		spin_unlock(&proc_subdir_lock);
+		if (S_ISDIR(dp->mode))
+			dir->nlink--;
+		proc_free_inum(dp->low_ino);
+		return -EEXIST;
+	}
 	spin_unlock(&proc_subdir_lock);
 
 	return 0;
@@ -354,6 +411,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
 	ent->namelen = qstr.len;
 	ent->mode = mode;
 	ent->nlink = nlink;
+	ent->subdir = RB_ROOT;
 	atomic_set(&ent->count, 1);
 	spin_lock_init(&ent->pde_unload_lock);
 	INIT_LIST_HEAD(&ent->pde_openers);
@@ -485,7 +543,6 @@ void pde_put(struct proc_dir_entry *pde)
  */
 void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 {
-	struct proc_dir_entry **p;
 	struct proc_dir_entry *de = NULL;
 	const char *fn = name;
 	unsigned int len;
@@ -497,14 +554,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 	}
 	len = strlen(fn);
 
-	for (p = &parent->subdir; *p; p=&(*p)->next ) {
-		if (proc_match(len, fn, *p)) {
-			de = *p;
-			*p = de->next;
-			de->next = NULL;
-			break;
-		}
-	}
+	de = pde_subdir_find(parent, fn, len);
+	if (de)
+		rb_erase(&de->subdir_node, &parent->subdir);
 	spin_unlock(&proc_subdir_lock);
 	if (!de) {
 		WARN(1, "name '%s'\n", name);
@@ -516,16 +568,15 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
 	if (S_ISDIR(de->mode))
 		parent->nlink--;
 	de->nlink = 0;
-	WARN(de->subdir, "%s: removing non-empty directory "
-			 "'%s/%s', leaking at least '%s'\n", __func__,
-			 de->parent->name, de->name, de->subdir->name);
+	WARN(pde_subdir_first(de),
+	     "%s: removing non-empty directory '%s/%s', leaking at least '%s'\n",
+	     __func__, de->parent->name, de->name, pde_subdir_first(de)->name);
 	pde_put(de);
 }
 EXPORT_SYMBOL(remove_proc_entry);
 
 int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
 {
-	struct proc_dir_entry **p;
 	struct proc_dir_entry *root = NULL, *de, *next;
 	const char *fn = name;
 	unsigned int len;
@@ -537,24 +588,18 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
 	}
 	len = strlen(fn);
 
-	for (p = &parent->subdir; *p; p=&(*p)->next ) {
-		if (proc_match(len, fn, *p)) {
-			root = *p;
-			*p = root->next;
-			root->next = NULL;
-			break;
-		}
-	}
+	root = pde_subdir_find(parent, fn, len);
 	if (!root) {
 		spin_unlock(&proc_subdir_lock);
 		return -ENOENT;
 	}
+	rb_erase(&root->subdir_node, &parent->subdir);
+
 	de = root;
 	while (1) {
-		next = de->subdir;
+		next = pde_subdir_first(de);
 		if (next) {
-			de->subdir = next->next;
-			next->next = NULL;
+			rb_erase(&next->subdir_node, &de->subdir);
 			de = next;
 			continue;
 		}

+ 6 - 5
fs/proc/internal.h

@@ -24,10 +24,9 @@ struct mempolicy;
  * tree) of these proc_dir_entries, so that we can dynamically
  * add new files to /proc.
  *
- * The "next" pointer creates a linked list of one /proc directory,
- * while parent/subdir create the directory structure (every
- * /proc file has a parent, but "subdir" is NULL for all
- * non-directory entries).
+ * parent/subdir are used for the directory structure (every /proc file has a
+ * parent, but "subdir" is empty for all non-directory entries).
+ * subdir_node is used to build the rb tree "subdir" of the parent.
  */
 struct proc_dir_entry {
 	unsigned int low_ino;
@@ -38,7 +37,9 @@ struct proc_dir_entry {
 	loff_t size;
 	const struct inode_operations *proc_iops;
 	const struct file_operations *proc_fops;
-	struct proc_dir_entry *next, *parent, *subdir;
+	struct proc_dir_entry *parent;
+	struct rb_root subdir;
+	struct rb_node subdir_node;
 	void *data;
 	atomic_t count;		/* use count */
 	atomic_t in_use;	/* number of callers into module in progress; */

+ 1 - 0
fs/proc/proc_net.c

@@ -192,6 +192,7 @@ static __net_init int proc_net_ns_init(struct net *net)
 	if (!netd)
 		goto out;
 
+	netd->subdir = RB_ROOT;
 	netd->data = net;
 	netd->nlink = 2;
 	netd->namelen = 3;

+ 1 - 0
fs/proc/root.c

@@ -251,6 +251,7 @@ struct proc_dir_entry proc_root = {
 	.proc_iops	= &proc_root_inode_operations, 
 	.proc_fops	= &proc_root_operations,
 	.parent		= &proc_root,
+	.subdir		= RB_ROOT,
 	.name		= "/proc",
 };
 

+ 68 - 36
fs/proc/task_mmu.c

@@ -447,58 +447,91 @@ struct mem_size_stats {
 	u64 pss;
 };
 
+static void smaps_account(struct mem_size_stats *mss, struct page *page,
+		unsigned long size, bool young, bool dirty)
+{
+	int mapcount;
+
+	if (PageAnon(page))
+		mss->anonymous += size;
 
-static void smaps_pte_entry(pte_t ptent, unsigned long addr,
-		unsigned long ptent_size, struct mm_walk *walk)
+	mss->resident += size;
+	/* Accumulate the size in pages that have been accessed. */
+	if (young || PageReferenced(page))
+		mss->referenced += size;
+	mapcount = page_mapcount(page);
+	if (mapcount >= 2) {
+		u64 pss_delta;
+
+		if (dirty || PageDirty(page))
+			mss->shared_dirty += size;
+		else
+			mss->shared_clean += size;
+		pss_delta = (u64)size << PSS_SHIFT;
+		do_div(pss_delta, mapcount);
+		mss->pss += pss_delta;
+	} else {
+		if (dirty || PageDirty(page))
+			mss->private_dirty += size;
+		else
+			mss->private_clean += size;
+		mss->pss += (u64)size << PSS_SHIFT;
+	}
+}
+
+static void smaps_pte_entry(pte_t *pte, unsigned long addr,
+		struct mm_walk *walk)
 {
 	struct mem_size_stats *mss = walk->private;
 	struct vm_area_struct *vma = mss->vma;
 	pgoff_t pgoff = linear_page_index(vma, addr);
 	struct page *page = NULL;
-	int mapcount;
 
-	if (pte_present(ptent)) {
-		page = vm_normal_page(vma, addr, ptent);
-	} else if (is_swap_pte(ptent)) {
-		swp_entry_t swpent = pte_to_swp_entry(ptent);
+	if (pte_present(*pte)) {
+		page = vm_normal_page(vma, addr, *pte);
+	} else if (is_swap_pte(*pte)) {
+		swp_entry_t swpent = pte_to_swp_entry(*pte);
 
 		if (!non_swap_entry(swpent))
-			mss->swap += ptent_size;
+			mss->swap += PAGE_SIZE;
 		else if (is_migration_entry(swpent))
 			page = migration_entry_to_page(swpent);
-	} else if (pte_file(ptent)) {
-		if (pte_to_pgoff(ptent) != pgoff)
-			mss->nonlinear += ptent_size;
+	} else if (pte_file(*pte)) {
+		if (pte_to_pgoff(*pte) != pgoff)
+			mss->nonlinear += PAGE_SIZE;
 	}
 
 	if (!page)
 		return;
 
-	if (PageAnon(page))
-		mss->anonymous += ptent_size;
-
 	if (page->index != pgoff)
-		mss->nonlinear += ptent_size;
+		mss->nonlinear += PAGE_SIZE;
 
-	mss->resident += ptent_size;
-	/* Accumulate the size in pages that have been accessed. */
-	if (pte_young(ptent) || PageReferenced(page))
-		mss->referenced += ptent_size;
-	mapcount = page_mapcount(page);
-	if (mapcount >= 2) {
-		if (pte_dirty(ptent) || PageDirty(page))
-			mss->shared_dirty += ptent_size;
-		else
-			mss->shared_clean += ptent_size;
-		mss->pss += (ptent_size << PSS_SHIFT) / mapcount;
-	} else {
-		if (pte_dirty(ptent) || PageDirty(page))
-			mss->private_dirty += ptent_size;
-		else
-			mss->private_clean += ptent_size;
-		mss->pss += (ptent_size << PSS_SHIFT);
-	}
+	smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte));
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
+		struct mm_walk *walk)
+{
+	struct mem_size_stats *mss = walk->private;
+	struct vm_area_struct *vma = mss->vma;
+	struct page *page;
+
+	/* FOLL_DUMP will return -EFAULT on huge zero page */
+	page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
+	if (IS_ERR_OR_NULL(page))
+		return;
+	mss->anonymous_thp += HPAGE_PMD_SIZE;
+	smaps_account(mss, page, HPAGE_PMD_SIZE,
+			pmd_young(*pmd), pmd_dirty(*pmd));
 }
+#else
+static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
+		struct mm_walk *walk)
+{
+}
+#endif
 
 static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 			   struct mm_walk *walk)
@@ -509,9 +542,8 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	spinlock_t *ptl;
 
 	if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
-		smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
+		smaps_pmd_entry(pmd, addr, walk);
 		spin_unlock(ptl);
-		mss->anonymous_thp += HPAGE_PMD_SIZE;
 		return 0;
 	}
 
@@ -524,7 +556,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 	 */
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	for (; addr != end; pte++, addr += PAGE_SIZE)
-		smaps_pte_entry(*pte, addr, PAGE_SIZE, walk);
+		smaps_pte_entry(pte, addr, walk);
 	pte_unmap_unlock(pte - 1, ptl);
 	cond_resched();
 	return 0;

+ 26 - 0
include/linux/cgroup.h

@@ -112,6 +112,19 @@ static inline void css_get(struct cgroup_subsys_state *css)
 		percpu_ref_get(&css->refcnt);
 }
 
+/**
+ * css_get_many - obtain references on the specified css
+ * @css: target css
+ * @n: number of references to get
+ *
+ * The caller must already have a reference.
+ */
+static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
+{
+	if (!(css->flags & CSS_NO_REF))
+		percpu_ref_get_many(&css->refcnt, n);
+}
+
 /**
  * css_tryget - try to obtain a reference on the specified css
  * @css: target css
@@ -159,6 +172,19 @@ static inline void css_put(struct cgroup_subsys_state *css)
 		percpu_ref_put(&css->refcnt);
 }
 
+/**
+ * css_put_many - put css references
+ * @css: target css
+ * @n: number of references to put
+ *
+ * Put references obtained via css_get() and css_tryget_online().
+ */
+static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
+{
+	if (!(css->flags & CSS_NO_REF))
+		percpu_ref_put_many(&css->refcnt, n);
+}
+
 /* bits in struct cgroup flags field */
 enum {
 	/* Control Group requires release notifications to userspace */

+ 6 - 4
include/linux/compaction.h

@@ -33,10 +33,11 @@ extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
 			enum migrate_mode mode, int *contended,
-			struct zone **candidate_zone);
+			int alloc_flags, int classzone_idx);
 extern void compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
-extern unsigned long compaction_suitable(struct zone *zone, int order);
+extern unsigned long compaction_suitable(struct zone *zone, int order,
+					int alloc_flags, int classzone_idx);
 
 /* Do not skip compaction more than 64 times */
 #define COMPACT_MAX_DEFER_SHIFT 6
@@ -103,7 +104,7 @@ static inline bool compaction_restarting(struct zone *zone, int order)
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
 			enum migrate_mode mode, int *contended,
-			struct zone **candidate_zone)
+			int alloc_flags, int classzone_idx)
 {
 	return COMPACT_CONTINUE;
 }
@@ -116,7 +117,8 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat)
 {
 }
 
-static inline unsigned long compaction_suitable(struct zone *zone, int order)
+static inline unsigned long compaction_suitable(struct zone *zone, int order,
+					int alloc_flags, int classzone_idx)
 {
 	return COMPACT_SKIPPED;
 }

+ 0 - 1
include/linux/file.h

@@ -66,7 +66,6 @@ extern void set_close_on_exec(unsigned int fd, int flag);
 extern bool get_close_on_exec(unsigned int fd);
 extern void put_filp(struct file *);
 extern int get_unused_fd_flags(unsigned flags);
-#define get_unused_fd() get_unused_fd_flags(0)
 extern void put_unused_fd(unsigned int fd);
 
 extern void fd_install(unsigned int fd, struct file *file);

+ 2 - 2
include/linux/gfp.h

@@ -381,8 +381,8 @@ extern void free_kmem_pages(unsigned long addr, unsigned int order);
 
 void page_alloc_init(void);
 void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
-void drain_all_pages(void);
-void drain_local_pages(void *dummy);
+void drain_all_pages(struct zone *zone);
+void drain_local_pages(struct zone *zone);
 
 /*
  * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what

+ 2 - 1
include/linux/hugetlb.h

@@ -311,7 +311,8 @@ static inline struct hstate *hstate_sizelog(int page_size_log)
 {
 	if (!page_size_log)
 		return &default_hstate;
-	return size_to_hstate(1 << page_size_log);
+
+	return size_to_hstate(1UL << page_size_log);
 }
 
 static inline struct hstate *hstate_vma(struct vm_area_struct *vma)

+ 0 - 1
include/linux/hugetlb_cgroup.h

@@ -16,7 +16,6 @@
 #define _LINUX_HUGETLB_CGROUP_H
 
 #include <linux/mmdebug.h>
-#include <linux/res_counter.h>
 
 struct hugetlb_cgroup;
 /*

+ 13 - 0
include/linux/kern_levels.h

@@ -22,4 +22,17 @@
  */
 #define KERN_CONT	""
 
+/* integer equivalents of KERN_<LEVEL> */
+#define LOGLEVEL_SCHED		-2	/* Deferred messages from sched code
+					 * are set to this special level */
+#define LOGLEVEL_DEFAULT	-1	/* default (or last) loglevel */
+#define LOGLEVEL_EMERG		0	/* system is unusable */
+#define LOGLEVEL_ALERT		1	/* action must be taken immediately */
+#define LOGLEVEL_CRIT		2	/* critical conditions */
+#define LOGLEVEL_ERR		3	/* error conditions */
+#define LOGLEVEL_WARNING	4	/* warning conditions */
+#define LOGLEVEL_NOTICE		5	/* normal but significant condition */
+#define LOGLEVEL_INFO		6	/* informational */
+#define LOGLEVEL_DEBUG		7	/* debug-level messages */
+
 #endif

+ 1 - 0
include/linux/kernel.h

@@ -427,6 +427,7 @@ extern int panic_timeout;
 extern int panic_on_oops;
 extern int panic_on_unrecovered_nmi;
 extern int panic_on_io_nmi;
+extern int panic_on_warn;
 extern int sysctl_panic_on_stackoverflow;
 /*
  * Only to be used by arch init code. If the user over-wrote the default

+ 14 - 36
include/linux/memcontrol.h

@@ -25,7 +25,6 @@
 #include <linux/jump_label.h>
 
 struct mem_cgroup;
-struct page_cgroup;
 struct page;
 struct mm_struct;
 struct kmem_cache;
@@ -68,10 +67,9 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
 struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
 
-bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
-				  struct mem_cgroup *memcg);
-bool task_in_mem_cgroup(struct task_struct *task,
-			const struct mem_cgroup *memcg);
+bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
+			      struct mem_cgroup *root);
+bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
 
 extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
@@ -79,15 +77,16 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
 extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css);
 
-static inline
-bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
+static inline bool mm_match_cgroup(struct mm_struct *mm,
+				   struct mem_cgroup *memcg)
 {
 	struct mem_cgroup *task_memcg;
-	bool match;
+	bool match = false;
 
 	rcu_read_lock();
 	task_memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
-	match = __mem_cgroup_same_or_subtree(memcg, task_memcg);
+	if (task_memcg)
+		match = mem_cgroup_is_descendant(task_memcg, memcg);
 	rcu_read_unlock();
 	return match;
 }
@@ -141,8 +140,8 @@ static inline bool mem_cgroup_disabled(void)
 
 struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked,
 					      unsigned long *flags);
-void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
-			      unsigned long flags);
+void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool *locked,
+			      unsigned long *flags);
 void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
 				 enum mem_cgroup_stat_index idx, int val);
 
@@ -174,10 +173,6 @@ static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
 void mem_cgroup_split_huge_fixup(struct page *head);
 #endif
 
-#ifdef CONFIG_DEBUG_VM
-bool mem_cgroup_bad_page_check(struct page *page);
-void mem_cgroup_print_bad_page(struct page *page);
-#endif
 #else /* CONFIG_MEMCG */
 struct mem_cgroup;
 
@@ -297,7 +292,7 @@ static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
 }
 
 static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg,
-					bool locked, unsigned long flags)
+					bool *locked, unsigned long *flags)
 {
 }
 
@@ -347,19 +342,6 @@ void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
 }
 #endif /* CONFIG_MEMCG */
 
-#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM)
-static inline bool
-mem_cgroup_bad_page_check(struct page *page)
-{
-	return false;
-}
-
-static inline void
-mem_cgroup_print_bad_page(struct page *page)
-{
-}
-#endif
-
 enum {
 	UNDER_LIMIT,
 	SOFT_LIMIT,
@@ -447,9 +429,8 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
 	/*
 	 * __GFP_NOFAIL allocations will move on even if charging is not
 	 * possible. Therefore we don't even try, and have this allocation
-	 * unaccounted. We could in theory charge it with
-	 * res_counter_charge_nofail, but we hope those allocations are rare,
-	 * and won't be worth the trouble.
+	 * unaccounted. We could in theory charge it forcibly, but we hope
+	 * those allocations are rare, and won't be worth the trouble.
 	 */
 	if (gfp & __GFP_NOFAIL)
 		return true;
@@ -467,8 +448,6 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
  * memcg_kmem_uncharge_pages: uncharge pages from memcg
  * @page: pointer to struct page being freed
  * @order: allocation order.
- *
- * there is no need to specify memcg here, since it is embedded in page_cgroup
  */
 static inline void
 memcg_kmem_uncharge_pages(struct page *page, int order)
@@ -485,8 +464,7 @@ memcg_kmem_uncharge_pages(struct page *page, int order)
  *
  * Needs to be called after memcg_kmem_newpage_charge, regardless of success or
  * failure of the allocation. if @page is NULL, this function will revert the
- * charges. Otherwise, it will commit the memcg given by @memcg to the
- * corresponding page_cgroup.
+ * charges. Otherwise, it will commit @page to @memcg.
  */
 static inline void
 memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)

+ 5 - 0
include/linux/mm_types.h

@@ -22,6 +22,7 @@
 #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
 
 struct address_space;
+struct mem_cgroup;
 
 #define USE_SPLIT_PTE_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
 #define USE_SPLIT_PMD_PTLOCKS	(USE_SPLIT_PTE_PTLOCKS && \
@@ -167,6 +168,10 @@ struct page {
 		struct page *first_page;	/* Compound tail pages */
 	};
 
+#ifdef CONFIG_MEMCG
+	struct mem_cgroup *mem_cgroup;
+#endif
+
 	/*
 	 * On machines where all RAM is mapped into kernel address space,
 	 * we can simply calculate the virtual address. On machines with

+ 0 - 12
include/linux/mmzone.h

@@ -722,9 +722,6 @@ typedef struct pglist_data {
 	int nr_zones;
 #ifdef CONFIG_FLAT_NODE_MEM_MAP	/* means !SPARSEMEM */
 	struct page *node_mem_map;
-#ifdef CONFIG_MEMCG
-	struct page_cgroup *node_page_cgroup;
-#endif
 #endif
 #ifndef CONFIG_NO_BOOTMEM
 	struct bootmem_data *bdata;
@@ -1078,7 +1075,6 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 #define SECTION_ALIGN_DOWN(pfn)	((pfn) & PAGE_SECTION_MASK)
 
 struct page;
-struct page_cgroup;
 struct mem_section {
 	/*
 	 * This is, logically, a pointer to an array of struct
@@ -1096,14 +1092,6 @@ struct mem_section {
 
 	/* See declaration of similar field in struct zone */
 	unsigned long *pageblock_flags;
-#ifdef CONFIG_MEMCG
-	/*
-	 * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
-	 * section. (see memcontrol.h/page_cgroup.h about this.)
-	 */
-	struct page_cgroup *page_cgroup;
-	unsigned long pad;
-#endif
 	/*
 	 * WARNING: mem_section must be a power-of-2 in size for the
 	 * calculation and use of SECTION_ROOT_MASK to make sense.

+ 0 - 105
include/linux/page_cgroup.h

@@ -1,105 +0,0 @@
-#ifndef __LINUX_PAGE_CGROUP_H
-#define __LINUX_PAGE_CGROUP_H
-
-enum {
-	/* flags for mem_cgroup */
-	PCG_USED = 0x01,	/* This page is charged to a memcg */
-	PCG_MEM = 0x02,		/* This page holds a memory charge */
-	PCG_MEMSW = 0x04,	/* This page holds a memory+swap charge */
-};
-
-struct pglist_data;
-
-#ifdef CONFIG_MEMCG
-struct mem_cgroup;
-
-/*
- * Page Cgroup can be considered as an extended mem_map.
- * A page_cgroup page is associated with every page descriptor. The
- * page_cgroup helps us identify information about the cgroup
- * All page cgroups are allocated at boot or memory hotplug event,
- * then the page cgroup for pfn always exists.
- */
-struct page_cgroup {
-	unsigned long flags;
-	struct mem_cgroup *mem_cgroup;
-};
-
-extern void pgdat_page_cgroup_init(struct pglist_data *pgdat);
-
-#ifdef CONFIG_SPARSEMEM
-static inline void page_cgroup_init_flatmem(void)
-{
-}
-extern void page_cgroup_init(void);
-#else
-extern void page_cgroup_init_flatmem(void);
-static inline void page_cgroup_init(void)
-{
-}
-#endif
-
-struct page_cgroup *lookup_page_cgroup(struct page *page);
-
-static inline int PageCgroupUsed(struct page_cgroup *pc)
-{
-	return !!(pc->flags & PCG_USED);
-}
-#else /* !CONFIG_MEMCG */
-struct page_cgroup;
-
-static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
-{
-}
-
-static inline struct page_cgroup *lookup_page_cgroup(struct page *page)
-{
-	return NULL;
-}
-
-static inline void page_cgroup_init(void)
-{
-}
-
-static inline void page_cgroup_init_flatmem(void)
-{
-}
-#endif /* CONFIG_MEMCG */
-
-#include <linux/swap.h>
-
-#ifdef CONFIG_MEMCG_SWAP
-extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
-					unsigned short old, unsigned short new);
-extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
-extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
-extern int swap_cgroup_swapon(int type, unsigned long max_pages);
-extern void swap_cgroup_swapoff(int type);
-#else
-
-static inline
-unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
-{
-	return 0;
-}
-
-static inline
-unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
-{
-	return 0;
-}
-
-static inline int
-swap_cgroup_swapon(int type, unsigned long max_pages)
-{
-	return 0;
-}
-
-static inline void swap_cgroup_swapoff(int type)
-{
-	return;
-}
-
-#endif /* CONFIG_MEMCG_SWAP */
-
-#endif /* __LINUX_PAGE_CGROUP_H */

+ 51 - 0
include/linux/page_counter.h

@@ -0,0 +1,51 @@
+#ifndef _LINUX_PAGE_COUNTER_H
+#define _LINUX_PAGE_COUNTER_H
+
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <asm/page.h>
+
+struct page_counter {
+	atomic_long_t count;
+	unsigned long limit;
+	struct page_counter *parent;
+
+	/* legacy */
+	unsigned long watermark;
+	unsigned long failcnt;
+};
+
+#if BITS_PER_LONG == 32
+#define PAGE_COUNTER_MAX LONG_MAX
+#else
+#define PAGE_COUNTER_MAX (LONG_MAX / PAGE_SIZE)
+#endif
+
+static inline void page_counter_init(struct page_counter *counter,
+				     struct page_counter *parent)
+{
+	atomic_long_set(&counter->count, 0);
+	counter->limit = PAGE_COUNTER_MAX;
+	counter->parent = parent;
+}
+
+static inline unsigned long page_counter_read(struct page_counter *counter)
+{
+	return atomic_long_read(&counter->count);
+}
+
+void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages);
+void page_counter_charge(struct page_counter *counter, unsigned long nr_pages);
+int page_counter_try_charge(struct page_counter *counter,
+			    unsigned long nr_pages,
+			    struct page_counter **fail);
+void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages);
+int page_counter_limit(struct page_counter *counter, unsigned long limit);
+int page_counter_memparse(const char *buf, unsigned long *nr_pages);
+
+static inline void page_counter_reset_watermark(struct page_counter *counter)
+{
+	counter->watermark = page_counter_read(counter);
+}
+
+#endif /* _LINUX_PAGE_COUNTER_H */

+ 38 - 9
include/linux/percpu-refcount.h

@@ -147,27 +147,41 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
 }
 
 /**
- * percpu_ref_get - increment a percpu refcount
+ * percpu_ref_get_many - increment a percpu refcount
  * @ref: percpu_ref to get
+ * @nr: number of references to get
  *
- * Analagous to atomic_long_inc().
+ * Analogous to atomic_long_add().
  *
  * This function is safe to call as long as @ref is between init and exit.
  */
-static inline void percpu_ref_get(struct percpu_ref *ref)
+static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr)
 {
 	unsigned long __percpu *percpu_count;
 
 	rcu_read_lock_sched();
 
 	if (__ref_is_percpu(ref, &percpu_count))
-		this_cpu_inc(*percpu_count);
+		this_cpu_add(*percpu_count, nr);
 	else
-		atomic_long_inc(&ref->count);
+		atomic_long_add(nr, &ref->count);
 
 	rcu_read_unlock_sched();
 }
 
+/**
+ * percpu_ref_get - increment a percpu refcount
+ * @ref: percpu_ref to get
+ *
+ * Analagous to atomic_long_inc().
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline void percpu_ref_get(struct percpu_ref *ref)
+{
+	percpu_ref_get_many(ref, 1);
+}
+
 /**
  * percpu_ref_tryget - try to increment a percpu refcount
  * @ref: percpu_ref to try-get
@@ -231,28 +245,43 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 }
 
 /**
- * percpu_ref_put - decrement a percpu refcount
+ * percpu_ref_put_many - decrement a percpu refcount
  * @ref: percpu_ref to put
+ * @nr: number of references to put
  *
  * Decrement the refcount, and if 0, call the release function (which was passed
  * to percpu_ref_init())
  *
  * This function is safe to call as long as @ref is between init and exit.
  */
-static inline void percpu_ref_put(struct percpu_ref *ref)
+static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr)
 {
 	unsigned long __percpu *percpu_count;
 
 	rcu_read_lock_sched();
 
 	if (__ref_is_percpu(ref, &percpu_count))
-		this_cpu_dec(*percpu_count);
-	else if (unlikely(atomic_long_dec_and_test(&ref->count)))
+		this_cpu_sub(*percpu_count, nr);
+	else if (unlikely(atomic_long_sub_and_test(nr, &ref->count)))
 		ref->release(ref);
 
 	rcu_read_unlock_sched();
 }
 
+/**
+ * percpu_ref_put - decrement a percpu refcount
+ * @ref: percpu_ref to put
+ *
+ * Decrement the refcount, and if 0, call the release function (which was passed
+ * to percpu_ref_init())
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline void percpu_ref_put(struct percpu_ref *ref)
+{
+	percpu_ref_put_many(ref, 1);
+}
+
 /**
  * percpu_ref_is_zero - test whether a percpu refcount reached zero
  * @ref: percpu_ref to test

+ 0 - 1
include/linux/printk.h

@@ -118,7 +118,6 @@ int no_printk(const char *fmt, ...)
 #ifdef CONFIG_EARLY_PRINTK
 extern asmlinkage __printf(1, 2)
 void early_printk(const char *fmt, ...);
-void early_vprintk(const char *fmt, va_list ap);
 #else
 static inline __printf(1, 2) __cold
 void early_printk(const char *s, ...) { }

+ 1 - 1
include/linux/ptrace.h

@@ -52,7 +52,7 @@ extern void ptrace_notify(int exit_code);
 extern void __ptrace_link(struct task_struct *child,
 			  struct task_struct *new_parent);
 extern void __ptrace_unlink(struct task_struct *child);
-extern void exit_ptrace(struct task_struct *tracer);
+extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
 #define PTRACE_MODE_READ	0x01
 #define PTRACE_MODE_ATTACH	0x02
 #define PTRACE_MODE_NOAUDIT	0x04

+ 0 - 223
include/linux/res_counter.h

@@ -1,223 +0,0 @@
-#ifndef __RES_COUNTER_H__
-#define __RES_COUNTER_H__
-
-/*
- * Resource Counters
- * Contain common data types and routines for resource accounting
- *
- * Copyright 2007 OpenVZ SWsoft Inc
- *
- * Author: Pavel Emelianov <xemul@openvz.org>
- *
- * See Documentation/cgroups/resource_counter.txt for more
- * info about what this counter is.
- */
-
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-
-/*
- * The core object. the cgroup that wishes to account for some
- * resource may include this counter into its structures and use
- * the helpers described beyond
- */
-
-struct res_counter {
-	/*
-	 * the current resource consumption level
-	 */
-	unsigned long long usage;
-	/*
-	 * the maximal value of the usage from the counter creation
-	 */
-	unsigned long long max_usage;
-	/*
-	 * the limit that usage cannot exceed
-	 */
-	unsigned long long limit;
-	/*
-	 * the limit that usage can be exceed
-	 */
-	unsigned long long soft_limit;
-	/*
-	 * the number of unsuccessful attempts to consume the resource
-	 */
-	unsigned long long failcnt;
-	/*
-	 * the lock to protect all of the above.
-	 * the routines below consider this to be IRQ-safe
-	 */
-	spinlock_t lock;
-	/*
-	 * Parent counter, used for hierarchial resource accounting
-	 */
-	struct res_counter *parent;
-};
-
-#define RES_COUNTER_MAX ULLONG_MAX
-
-/**
- * Helpers to interact with userspace
- * res_counter_read_u64() - returns the value of the specified member.
- * res_counter_read/_write - put/get the specified fields from the
- * res_counter struct to/from the user
- *
- * @counter:     the counter in question
- * @member:  the field to work with (see RES_xxx below)
- * @buf:     the buffer to opeate on,...
- * @nbytes:  its size...
- * @pos:     and the offset.
- */
-
-u64 res_counter_read_u64(struct res_counter *counter, int member);
-
-ssize_t res_counter_read(struct res_counter *counter, int member,
-		const char __user *buf, size_t nbytes, loff_t *pos,
-		int (*read_strategy)(unsigned long long val, char *s));
-
-int res_counter_memparse_write_strategy(const char *buf,
-					unsigned long long *res);
-
-/*
- * the field descriptors. one for each member of res_counter
- */
-
-enum {
-	RES_USAGE,
-	RES_MAX_USAGE,
-	RES_LIMIT,
-	RES_FAILCNT,
-	RES_SOFT_LIMIT,
-};
-
-/*
- * helpers for accounting
- */
-
-void res_counter_init(struct res_counter *counter, struct res_counter *parent);
-
-/*
- * charge - try to consume more resource.
- *
- * @counter: the counter
- * @val: the amount of the resource. each controller defines its own
- *       units, e.g. numbers, bytes, Kbytes, etc
- *
- * returns 0 on success and <0 if the counter->usage will exceed the
- * counter->limit
- *
- * charge_nofail works the same, except that it charges the resource
- * counter unconditionally, and returns < 0 if the after the current
- * charge we are over limit.
- */
-
-int __must_check res_counter_charge(struct res_counter *counter,
-		unsigned long val, struct res_counter **limit_fail_at);
-int res_counter_charge_nofail(struct res_counter *counter,
-		unsigned long val, struct res_counter **limit_fail_at);
-
-/*
- * uncharge - tell that some portion of the resource is released
- *
- * @counter: the counter
- * @val: the amount of the resource
- *
- * these calls check for usage underflow and show a warning on the console
- *
- * returns the total charges still present in @counter.
- */
-
-u64 res_counter_uncharge(struct res_counter *counter, unsigned long val);
-
-u64 res_counter_uncharge_until(struct res_counter *counter,
-			       struct res_counter *top,
-			       unsigned long val);
-/**
- * res_counter_margin - calculate chargeable space of a counter
- * @cnt: the counter
- *
- * Returns the difference between the hard limit and the current usage
- * of resource counter @cnt.
- */
-static inline unsigned long long res_counter_margin(struct res_counter *cnt)
-{
-	unsigned long long margin;
-	unsigned long flags;
-
-	spin_lock_irqsave(&cnt->lock, flags);
-	if (cnt->limit > cnt->usage)
-		margin = cnt->limit - cnt->usage;
-	else
-		margin = 0;
-	spin_unlock_irqrestore(&cnt->lock, flags);
-	return margin;
-}
-
-/**
- * Get the difference between the usage and the soft limit
- * @cnt: The counter
- *
- * Returns 0 if usage is less than or equal to soft limit
- * The difference between usage and soft limit, otherwise.
- */
-static inline unsigned long long
-res_counter_soft_limit_excess(struct res_counter *cnt)
-{
-	unsigned long long excess;
-	unsigned long flags;
-
-	spin_lock_irqsave(&cnt->lock, flags);
-	if (cnt->usage <= cnt->soft_limit)
-		excess = 0;
-	else
-		excess = cnt->usage - cnt->soft_limit;
-	spin_unlock_irqrestore(&cnt->lock, flags);
-	return excess;
-}
-
-static inline void res_counter_reset_max(struct res_counter *cnt)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cnt->lock, flags);
-	cnt->max_usage = cnt->usage;
-	spin_unlock_irqrestore(&cnt->lock, flags);
-}
-
-static inline void res_counter_reset_failcnt(struct res_counter *cnt)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cnt->lock, flags);
-	cnt->failcnt = 0;
-	spin_unlock_irqrestore(&cnt->lock, flags);
-}
-
-static inline int res_counter_set_limit(struct res_counter *cnt,
-		unsigned long long limit)
-{
-	unsigned long flags;
-	int ret = -EBUSY;
-
-	spin_lock_irqsave(&cnt->lock, flags);
-	if (cnt->usage <= limit) {
-		cnt->limit = limit;
-		ret = 0;
-	}
-	spin_unlock_irqrestore(&cnt->lock, flags);
-	return ret;
-}
-
-static inline int
-res_counter_set_soft_limit(struct res_counter *cnt,
-				unsigned long long soft_limit)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cnt->lock, flags);
-	cnt->soft_limit = soft_limit;
-	spin_unlock_irqrestore(&cnt->lock, flags);
-	return 0;
-}
-
-#endif

+ 0 - 4
include/linux/slab.h

@@ -513,10 +513,6 @@ struct memcg_cache_params {
 
 int memcg_update_all_caches(int num_memcgs);
 
-struct seq_file;
-int cache_show(struct kmem_cache *s, struct seq_file *m);
-void print_slabinfo_header(struct seq_file *m);
-
 /**
  * kmalloc_array - allocate memory for an array.
  * @n: number of elements.

+ 42 - 0
include/linux/swap_cgroup.h

@@ -0,0 +1,42 @@
+#ifndef __LINUX_SWAP_CGROUP_H
+#define __LINUX_SWAP_CGROUP_H
+
+#include <linux/swap.h>
+
+#ifdef CONFIG_MEMCG_SWAP
+
+extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
+					unsigned short old, unsigned short new);
+extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
+extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
+extern int swap_cgroup_swapon(int type, unsigned long max_pages);
+extern void swap_cgroup_swapoff(int type);
+
+#else
+
+static inline
+unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
+{
+	return 0;
+}
+
+static inline
+unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
+{
+	return 0;
+}
+
+static inline int
+swap_cgroup_swapon(int type, unsigned long max_pages)
+{
+	return 0;
+}
+
+static inline void swap_cgroup_swapoff(int type)
+{
+	return;
+}
+
+#endif /* CONFIG_MEMCG_SWAP */
+
+#endif /* __LINUX_SWAP_CGROUP_H */

+ 9 - 17
include/net/sock.h

@@ -54,8 +54,8 @@
 #include <linux/security.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/page_counter.h>
 #include <linux/memcontrol.h>
-#include <linux/res_counter.h>
 #include <linux/static_key.h>
 #include <linux/aio.h>
 #include <linux/sched.h>
@@ -1062,7 +1062,7 @@ enum cg_proto_flags {
 };
 
 struct cg_proto {
-	struct res_counter	memory_allocated;	/* Current allocated memory. */
+	struct page_counter	memory_allocated;	/* Current allocated memory. */
 	struct percpu_counter	sockets_allocated;	/* Current number of sockets. */
 	int			memory_pressure;
 	long			sysctl_mem[3];
@@ -1214,34 +1214,26 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot,
 					      unsigned long amt,
 					      int *parent_status)
 {
-	struct res_counter *fail;
-	int ret;
+	page_counter_charge(&prot->memory_allocated, amt);
 
-	ret = res_counter_charge_nofail(&prot->memory_allocated,
-					amt << PAGE_SHIFT, &fail);
-	if (ret < 0)
+	if (page_counter_read(&prot->memory_allocated) >
+	    prot->memory_allocated.limit)
 		*parent_status = OVER_LIMIT;
 }
 
 static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
 					      unsigned long amt)
 {
-	res_counter_uncharge(&prot->memory_allocated, amt << PAGE_SHIFT);
-}
-
-static inline u64 memcg_memory_allocated_read(struct cg_proto *prot)
-{
-	u64 ret;
-	ret = res_counter_read_u64(&prot->memory_allocated, RES_USAGE);
-	return ret >> PAGE_SHIFT;
+	page_counter_uncharge(&prot->memory_allocated, amt);
 }
 
 static inline long
 sk_memory_allocated(const struct sock *sk)
 {
 	struct proto *prot = sk->sk_prot;
+
 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		return memcg_memory_allocated_read(sk->sk_cgrp);
+		return page_counter_read(&sk->sk_cgrp->memory_allocated);
 
 	return atomic_long_read(prot->memory_allocated);
 }
@@ -1255,7 +1247,7 @@ sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status)
 		memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status);
 		/* update the root cgroup regardless */
 		atomic_long_add_return(amt, prot->memory_allocated);
-		return memcg_memory_allocated_read(sk->sk_cgrp);
+		return page_counter_read(&sk->sk_cgrp->memory_allocated);
 	}
 
 	return atomic_long_add_return(amt, prot->memory_allocated);

+ 1 - 0
include/uapi/linux/sysctl.h

@@ -153,6 +153,7 @@ enum
 	KERN_MAX_LOCK_DEPTH=74, /* int: rtmutex's maximum lock depth */
 	KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
 	KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
+	KERN_PANIC_ON_WARN=77, /* int: call panic() in WARN() functions */
 };
 
 

+ 29 - 27
init/Kconfig

@@ -893,14 +893,6 @@ config ARCH_SUPPORTS_INT128
 config ARCH_WANT_NUMA_VARIABLE_LOCALITY
 	bool
 
-config NUMA_BALANCING_DEFAULT_ENABLED
-	bool "Automatically enable NUMA aware memory/task placement"
-	default y
-	depends on NUMA_BALANCING
-	help
-	  If set, automatic NUMA balancing will be enabled if running on a NUMA
-	  machine.
-
 config NUMA_BALANCING
 	bool "Memory placement aware NUMA scheduler"
 	depends on ARCH_SUPPORTS_NUMA_BALANCING
@@ -913,6 +905,14 @@ config NUMA_BALANCING
 
 	  This system will be inactive on UMA systems.
 
+config NUMA_BALANCING_DEFAULT_ENABLED
+	bool "Automatically enable NUMA aware memory/task placement"
+	default y
+	depends on NUMA_BALANCING
+	help
+	  If set, automatic NUMA balancing will be enabled if running on a NUMA
+	  machine.
+
 menuconfig CGROUPS
 	boolean "Control Group support"
 	select KERNFS
@@ -972,32 +972,17 @@ config CGROUP_CPUACCT
 	  Provides a simple Resource Controller for monitoring the
 	  total CPU consumed by the tasks in a cgroup.
 
-config RESOURCE_COUNTERS
-	bool "Resource counters"
-	help
-	  This option enables controller independent resource accounting
-	  infrastructure that works with cgroups.
+config PAGE_COUNTER
+       bool
 
 config MEMCG
 	bool "Memory Resource Controller for Control Groups"
-	depends on RESOURCE_COUNTERS
+	select PAGE_COUNTER
 	select EVENTFD
 	help
 	  Provides a memory resource controller that manages both anonymous
 	  memory and page cache. (See Documentation/cgroups/memory.txt)
 
-	  Note that setting this option increases fixed memory overhead
-	  associated with each page of memory in the system. By this,
-	  8(16)bytes/PAGE_SIZE on 32(64)bit system will be occupied by memory
-	  usage tracking struct at boot. Total amount of this is printed out
-	  at boot.
-
-	  Only enable when you're ok with these trade offs and really
-	  sure you need the memory resource controller. Even when you enable
-	  this, you can set "cgroup_disable=memory" at your boot option to
-	  disable memory resource controller and you can avoid overheads.
-	  (and lose benefits of memory resource controller)
-
 config MEMCG_SWAP
 	bool "Memory Resource Controller Swap Extension"
 	depends on MEMCG && SWAP
@@ -1048,7 +1033,8 @@ config MEMCG_KMEM
 
 config CGROUP_HUGETLB
 	bool "HugeTLB Resource Controller for Control Groups"
-	depends on RESOURCE_COUNTERS && HUGETLB_PAGE
+	depends on HUGETLB_PAGE
+	select PAGE_COUNTER
 	default n
 	help
 	  Provides a cgroup Resource Controller for HugeTLB pages.
@@ -1294,6 +1280,22 @@ source "usr/Kconfig"
 
 endif
 
+config INIT_FALLBACK
+	bool "Fall back to defaults if init= parameter is bad"
+	default y
+	help
+	  If enabled, the kernel will try the default init binaries if an
+	  explicit request from the init= parameter fails.
+
+	  This can have unexpected effects.  For example, booting
+	  with init=/sbin/kiosk_app will run /sbin/init or even /bin/sh
+	  if /sbin/kiosk_app cannot be executed.
+
+	  The default value of Y is consistent with historical behavior.
+	  Selecting N is likely to be more appropriate for most uses,
+	  especially on kiosks and on kernels that are intended to be
+	  run under the control of a script.
+
 config CC_OPTIMIZE_FOR_SIZE
 	bool "Optimize for size"
 	help

+ 6 - 8
init/main.c

@@ -51,7 +51,6 @@
 #include <linux/mempolicy.h>
 #include <linux/key.h>
 #include <linux/buffer_head.h>
-#include <linux/page_cgroup.h>
 #include <linux/debug_locks.h>
 #include <linux/debugobjects.h>
 #include <linux/lockdep.h>
@@ -485,11 +484,6 @@ void __init __weak thread_info_cache_init(void)
  */
 static void __init mm_init(void)
 {
-	/*
-	 * page_cgroup requires contiguous pages,
-	 * bigger than MAX_ORDER unless SPARSEMEM.
-	 */
-	page_cgroup_init_flatmem();
 	mem_init();
 	kmem_cache_init();
 	percpu_init_late();
@@ -627,7 +621,6 @@ asmlinkage __visible void __init start_kernel(void)
 		initrd_start = 0;
 	}
 #endif
-	page_cgroup_init();
 	debug_objects_mem_init();
 	kmemleak_init();
 	setup_per_cpu_pageset();
@@ -959,8 +952,13 @@ static int __ref kernel_init(void *unused)
 		ret = run_init_process(execute_command);
 		if (!ret)
 			return 0;
+#ifndef CONFIG_INIT_FALLBACK
+		panic("Requested init %s failed (error %d).",
+		      execute_command, ret);
+#else
 		pr_err("Failed to execute %s (error %d).  Attempting defaults...\n",
-			execute_command, ret);
+		       execute_command, ret);
+#endif
 	}
 	if (!try_to_run_init_process("/sbin/init") ||
 	    !try_to_run_init_process("/etc/init") ||

+ 0 - 1
kernel/Makefile

@@ -57,7 +57,6 @@ obj-$(CONFIG_UTS_NS) += utsname.o
 obj-$(CONFIG_USER_NS) += user_namespace.o
 obj-$(CONFIG_PID_NS) += pid_namespace.o
 obj-$(CONFIG_IKCONFIG) += configs.o
-obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
 obj-$(CONFIG_SMP) += stop_machine.o
 obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o

+ 123 - 122
kernel/exit.c

@@ -118,13 +118,10 @@ static void __exit_signal(struct task_struct *tsk)
 	}
 
 	/*
-	 * Accumulate here the counters for all threads but the group leader
-	 * as they die, so they can be added into the process-wide totals
-	 * when those are taken.  The group leader stays around as a zombie as
-	 * long as there are other threads.  When it gets reaped, the exit.c
-	 * code will add its counts into these totals.  We won't ever get here
-	 * for the group leader, since it will have been the last reference on
-	 * the signal_struct.
+	 * Accumulate here the counters for all threads as they die. We could
+	 * skip the group leader because it is the last user of signal_struct,
+	 * but we want to avoid the race with thread_group_cputime() which can
+	 * see the empty ->thread_head list.
 	 */
 	task_cputime(tsk, &utime, &stime);
 	write_seqlock(&sig->stats_lock);
@@ -462,6 +459,44 @@ static void exit_mm(struct task_struct *tsk)
 	clear_thread_flag(TIF_MEMDIE);
 }
 
+static struct task_struct *find_alive_thread(struct task_struct *p)
+{
+	struct task_struct *t;
+
+	for_each_thread(p, t) {
+		if (!(t->flags & PF_EXITING))
+			return t;
+	}
+	return NULL;
+}
+
+static struct task_struct *find_child_reaper(struct task_struct *father)
+	__releases(&tasklist_lock)
+	__acquires(&tasklist_lock)
+{
+	struct pid_namespace *pid_ns = task_active_pid_ns(father);
+	struct task_struct *reaper = pid_ns->child_reaper;
+
+	if (likely(reaper != father))
+		return reaper;
+
+	reaper = find_alive_thread(father);
+	if (reaper) {
+		pid_ns->child_reaper = reaper;
+		return reaper;
+	}
+
+	write_unlock_irq(&tasklist_lock);
+	if (unlikely(pid_ns == &init_pid_ns)) {
+		panic("Attempted to kill init! exitcode=0x%08x\n",
+			father->signal->group_exit_code ?: father->exit_code);
+	}
+	zap_pid_ns_processes(pid_ns);
+	write_lock_irq(&tasklist_lock);
+
+	return father;
+}
+
 /*
  * When we die, we re-parent all our children, and try to:
  * 1. give them to another thread in our thread group, if such a member exists
@@ -469,58 +504,36 @@ static void exit_mm(struct task_struct *tsk)
  *    child_subreaper for its children (like a service manager)
  * 3. give it to the init process (PID 1) in our pid namespace
  */
-static struct task_struct *find_new_reaper(struct task_struct *father)
-	__releases(&tasklist_lock)
-	__acquires(&tasklist_lock)
+static struct task_struct *find_new_reaper(struct task_struct *father,
+					   struct task_struct *child_reaper)
 {
-	struct pid_namespace *pid_ns = task_active_pid_ns(father);
-	struct task_struct *thread;
+	struct task_struct *thread, *reaper;
 
-	thread = father;
-	while_each_thread(father, thread) {
-		if (thread->flags & PF_EXITING)
-			continue;
-		if (unlikely(pid_ns->child_reaper == father))
-			pid_ns->child_reaper = thread;
+	thread = find_alive_thread(father);
+	if (thread)
 		return thread;
-	}
-
-	if (unlikely(pid_ns->child_reaper == father)) {
-		write_unlock_irq(&tasklist_lock);
-		if (unlikely(pid_ns == &init_pid_ns)) {
-			panic("Attempted to kill init! exitcode=0x%08x\n",
-				father->signal->group_exit_code ?:
-					father->exit_code);
-		}
-
-		zap_pid_ns_processes(pid_ns);
-		write_lock_irq(&tasklist_lock);
-	} else if (father->signal->has_child_subreaper) {
-		struct task_struct *reaper;
 
+	if (father->signal->has_child_subreaper) {
 		/*
-		 * Find the first ancestor marked as child_subreaper.
-		 * Note that the code below checks same_thread_group(reaper,
-		 * pid_ns->child_reaper).  This is what we need to DTRT in a
-		 * PID namespace. However we still need the check above, see
-		 * http://marc.info/?l=linux-kernel&m=131385460420380
+		 * Find the first ->is_child_subreaper ancestor in our pid_ns.
+		 * We start from father to ensure we can not look into another
+		 * namespace, this is safe because all its threads are dead.
 		 */
-		for (reaper = father->real_parent;
-		     reaper != &init_task;
+		for (reaper = father;
+		     !same_thread_group(reaper, child_reaper);
 		     reaper = reaper->real_parent) {
-			if (same_thread_group(reaper, pid_ns->child_reaper))
+			/* call_usermodehelper() descendants need this check */
+			if (reaper == &init_task)
 				break;
 			if (!reaper->signal->is_child_subreaper)
 				continue;
-			thread = reaper;
-			do {
-				if (!(thread->flags & PF_EXITING))
-					return reaper;
-			} while_each_thread(reaper, thread);
+			thread = find_alive_thread(reaper);
+			if (thread)
+				return thread;
 		}
 	}
 
-	return pid_ns->child_reaper;
+	return child_reaper;
 }
 
 /*
@@ -529,15 +542,7 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
 static void reparent_leader(struct task_struct *father, struct task_struct *p,
 				struct list_head *dead)
 {
-	list_move_tail(&p->sibling, &p->real_parent->children);
-
-	if (p->exit_state == EXIT_DEAD)
-		return;
-	/*
-	 * If this is a threaded reparent there is no need to
-	 * notify anyone anything has happened.
-	 */
-	if (same_thread_group(p->real_parent, father))
+	if (unlikely(p->exit_state == EXIT_DEAD))
 		return;
 
 	/* We don't want people slaying init. */
@@ -548,49 +553,53 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,
 	    p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
 		if (do_notify_parent(p, p->exit_signal)) {
 			p->exit_state = EXIT_DEAD;
-			list_move_tail(&p->sibling, dead);
+			list_add(&p->ptrace_entry, dead);
 		}
 	}
 
 	kill_orphaned_pgrp(p, father);
 }
 
-static void forget_original_parent(struct task_struct *father)
+/*
+ * This does two things:
+ *
+ * A.  Make init inherit all the child processes
+ * B.  Check to see if any process groups have become orphaned
+ *	as a result of our exiting, and if they have any stopped
+ *	jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
+ */
+static void forget_original_parent(struct task_struct *father,
+					struct list_head *dead)
 {
-	struct task_struct *p, *n, *reaper;
-	LIST_HEAD(dead_children);
+	struct task_struct *p, *t, *reaper;
 
-	write_lock_irq(&tasklist_lock);
-	/*
-	 * Note that exit_ptrace() and find_new_reaper() might
-	 * drop tasklist_lock and reacquire it.
-	 */
-	exit_ptrace(father);
-	reaper = find_new_reaper(father);
+	if (unlikely(!list_empty(&father->ptraced)))
+		exit_ptrace(father, dead);
 
-	list_for_each_entry_safe(p, n, &father->children, sibling) {
-		struct task_struct *t = p;
+	/* Can drop and reacquire tasklist_lock */
+	reaper = find_child_reaper(father);
+	if (list_empty(&father->children))
+		return;
 
-		do {
+	reaper = find_new_reaper(father, reaper);
+	list_for_each_entry(p, &father->children, sibling) {
+		for_each_thread(p, t) {
 			t->real_parent = reaper;
-			if (t->parent == father) {
-				BUG_ON(t->ptrace);
+			BUG_ON((!t->ptrace) != (t->parent == father));
+			if (likely(!t->ptrace))
 				t->parent = t->real_parent;
-			}
 			if (t->pdeath_signal)
 				group_send_sig_info(t->pdeath_signal,
 						    SEND_SIG_NOINFO, t);
-		} while_each_thread(p, t);
-		reparent_leader(father, p, &dead_children);
-	}
-	write_unlock_irq(&tasklist_lock);
-
-	BUG_ON(!list_empty(&father->children));
-
-	list_for_each_entry_safe(p, n, &dead_children, sibling) {
-		list_del_init(&p->sibling);
-		release_task(p);
+		}
+		/*
+		 * If this is a threaded reparent there is no need to
+		 * notify anyone anything has happened.
+		 */
+		if (!same_thread_group(reaper, father))
+			reparent_leader(father, p, dead);
 	}
+	list_splice_tail_init(&father->children, &reaper->children);
 }
 
 /*
@@ -600,18 +609,12 @@ static void forget_original_parent(struct task_struct *father)
 static void exit_notify(struct task_struct *tsk, int group_dead)
 {
 	bool autoreap;
-
-	/*
-	 * This does two things:
-	 *
-	 * A.  Make init inherit all the child processes
-	 * B.  Check to see if any process groups have become orphaned
-	 *	as a result of our exiting, and if they have any stopped
-	 *	jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
-	 */
-	forget_original_parent(tsk);
+	struct task_struct *p, *n;
+	LIST_HEAD(dead);
 
 	write_lock_irq(&tasklist_lock);
+	forget_original_parent(tsk, &dead);
+
 	if (group_dead)
 		kill_orphaned_pgrp(tsk->group_leader, NULL);
 
@@ -629,15 +632,18 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
 	}
 
 	tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;
+	if (tsk->exit_state == EXIT_DEAD)
+		list_add(&tsk->ptrace_entry, &dead);
 
 	/* mt-exec, de_thread() is waiting for group leader */
 	if (unlikely(tsk->signal->notify_count < 0))
 		wake_up_process(tsk->signal->group_exit_task);
 	write_unlock_irq(&tasklist_lock);
 
-	/* If the process is dead, release it - nobody will wait for it */
-	if (autoreap)
-		release_task(tsk);
+	list_for_each_entry_safe(p, n, &dead, ptrace_entry) {
+		list_del_init(&p->ptrace_entry);
+		release_task(p);
+	}
 }
 
 #ifdef CONFIG_DEBUG_STACK_USAGE
@@ -982,8 +988,7 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p,
  */
 static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 {
-	unsigned long state;
-	int retval, status, traced;
+	int state, retval, status;
 	pid_t pid = task_pid_vnr(p);
 	uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
 	struct siginfo __user *infop;
@@ -1008,21 +1013,25 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		}
 		return wait_noreap_copyout(wo, p, pid, uid, why, status);
 	}
-
-	traced = ptrace_reparented(p);
 	/*
 	 * Move the task's state to DEAD/TRACE, only one thread can do this.
 	 */
-	state = traced && thread_group_leader(p) ? EXIT_TRACE : EXIT_DEAD;
+	state = (ptrace_reparented(p) && thread_group_leader(p)) ?
+		EXIT_TRACE : EXIT_DEAD;
 	if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE)
 		return 0;
 	/*
-	 * It can be ptraced but not reparented, check
-	 * thread_group_leader() to filter out sub-threads.
+	 * We own this thread, nobody else can reap it.
 	 */
-	if (likely(!traced) && thread_group_leader(p)) {
-		struct signal_struct *psig;
-		struct signal_struct *sig;
+	read_unlock(&tasklist_lock);
+	sched_annotate_sleep();
+
+	/*
+	 * Check thread_group_leader() to exclude the traced sub-threads.
+	 */
+	if (state == EXIT_DEAD && thread_group_leader(p)) {
+		struct signal_struct *sig = p->signal;
+		struct signal_struct *psig = current->signal;
 		unsigned long maxrss;
 		cputime_t tgutime, tgstime;
 
@@ -1034,21 +1043,20 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		 * accumulate in the parent's signal_struct c* fields.
 		 *
 		 * We don't bother to take a lock here to protect these
-		 * p->signal fields, because they are only touched by
-		 * __exit_signal, which runs with tasklist_lock
-		 * write-locked anyway, and so is excluded here.  We do
-		 * need to protect the access to parent->signal fields,
-		 * as other threads in the parent group can be right
-		 * here reaping other children at the same time.
+		 * p->signal fields because the whole thread group is dead
+		 * and nobody can change them.
+		 *
+		 * psig->stats_lock also protects us from our sub-theads
+		 * which can reap other children at the same time. Until
+		 * we change k_getrusage()-like users to rely on this lock
+		 * we have to take ->siglock as well.
 		 *
 		 * We use thread_group_cputime_adjusted() to get times for
 		 * the thread group, which consolidates times for all threads
 		 * in the group including the group leader.
 		 */
 		thread_group_cputime_adjusted(p, &tgutime, &tgstime);
-		spin_lock_irq(&p->real_parent->sighand->siglock);
-		psig = p->real_parent->signal;
-		sig = p->signal;
+		spin_lock_irq(&current->sighand->siglock);
 		write_seqlock(&psig->stats_lock);
 		psig->cutime += tgutime + sig->cutime;
 		psig->cstime += tgstime + sig->cstime;
@@ -1073,16 +1081,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		task_io_accounting_add(&psig->ioac, &p->ioac);
 		task_io_accounting_add(&psig->ioac, &sig->ioac);
 		write_sequnlock(&psig->stats_lock);
-		spin_unlock_irq(&p->real_parent->sighand->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 
-	/*
-	 * Now we are sure this task is interesting, and no other
-	 * thread can reap it because we its state == DEAD/TRACE.
-	 */
-	read_unlock(&tasklist_lock);
-	sched_annotate_sleep();
-
 	retval = wo->wo_rusage
 		? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
 	status = (p->signal->flags & SIGNAL_GROUP_EXIT)

+ 5 - 38
kernel/kmod.c

@@ -47,13 +47,6 @@ extern int max_threads;
 
 static struct workqueue_struct *khelper_wq;
 
-/*
- * kmod_thread_locker is used for deadlock avoidance.  There is no explicit
- * locking to protect this global - it is private to the singleton khelper
- * thread and should only ever be modified by that thread.
- */
-static const struct task_struct *kmod_thread_locker;
-
 #define CAP_BSET	(void *)1
 #define CAP_PI		(void *)2
 
@@ -223,7 +216,6 @@ static void umh_complete(struct subprocess_info *sub_info)
 static int ____call_usermodehelper(void *data)
 {
 	struct subprocess_info *sub_info = data;
-	int wait = sub_info->wait & ~UMH_KILLABLE;
 	struct cred *new;
 	int retval;
 
@@ -267,20 +259,13 @@ static int ____call_usermodehelper(void *data)
 out:
 	sub_info->retval = retval;
 	/* wait_for_helper() will call umh_complete if UHM_WAIT_PROC. */
-	if (wait != UMH_WAIT_PROC)
+	if (!(sub_info->wait & UMH_WAIT_PROC))
 		umh_complete(sub_info);
 	if (!retval)
 		return 0;
 	do_exit(0);
 }
 
-static int call_helper(void *data)
-{
-	/* Worker thread started blocking khelper thread. */
-	kmod_thread_locker = current;
-	return ____call_usermodehelper(data);
-}
-
 /* Keventd can't block, but this (a child) can. */
 static int wait_for_helper(void *data)
 {
@@ -323,21 +308,14 @@ static void __call_usermodehelper(struct work_struct *work)
 {
 	struct subprocess_info *sub_info =
 		container_of(work, struct subprocess_info, work);
-	int wait = sub_info->wait & ~UMH_KILLABLE;
 	pid_t pid;
 
-	/* CLONE_VFORK: wait until the usermode helper has execve'd
-	 * successfully We need the data structures to stay around
-	 * until that is done.  */
-	if (wait == UMH_WAIT_PROC)
+	if (sub_info->wait & UMH_WAIT_PROC)
 		pid = kernel_thread(wait_for_helper, sub_info,
 				    CLONE_FS | CLONE_FILES | SIGCHLD);
-	else {
-		pid = kernel_thread(call_helper, sub_info,
-				    CLONE_VFORK | SIGCHLD);
-		/* Worker thread stopped blocking khelper thread. */
-		kmod_thread_locker = NULL;
-	}
+	else
+		pid = kernel_thread(____call_usermodehelper, sub_info,
+				    SIGCHLD);
 
 	if (pid < 0) {
 		sub_info->retval = pid;
@@ -570,17 +548,6 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
 		retval = -EBUSY;
 		goto out;
 	}
-	/*
-	 * Worker thread must not wait for khelper thread at below
-	 * wait_for_completion() if the thread was created with CLONE_VFORK
-	 * flag, for khelper thread is already waiting for the thread at
-	 * wait_for_completion() in do_fork().
-	 */
-	if (wait != UMH_NO_WAIT && current == kmod_thread_locker) {
-		retval = -EBUSY;
-		goto out;
-	}
-
 	/*
 	 * Set the completion pointer only if there is a waiter.
 	 * This makes it possible to use umh_complete to free

+ 13 - 0
kernel/panic.c

@@ -33,6 +33,7 @@ static int pause_on_oops;
 static int pause_on_oops_flag;
 static DEFINE_SPINLOCK(pause_on_oops_lock);
 static bool crash_kexec_post_notifiers;
+int panic_on_warn __read_mostly;
 
 int panic_timeout = CONFIG_PANIC_TIMEOUT;
 EXPORT_SYMBOL_GPL(panic_timeout);
@@ -428,6 +429,17 @@ static void warn_slowpath_common(const char *file, int line, void *caller,
 	if (args)
 		vprintk(args->fmt, args->args);
 
+	if (panic_on_warn) {
+		/*
+		 * This thread may hit another WARN() in the panic path.
+		 * Resetting this prevents additional WARN() from panicking the
+		 * system on this thread.  Other threads are blocked by the
+		 * panic_mutex in panic().
+		 */
+		panic_on_warn = 0;
+		panic("panic_on_warn set ...\n");
+	}
+
 	print_modules();
 	dump_stack();
 	print_oops_end_marker();
@@ -485,6 +497,7 @@ EXPORT_SYMBOL(__stack_chk_fail);
 
 core_param(panic, panic_timeout, int, 0644);
 core_param(pause_on_oops, pause_on_oops, int, 0644);
+core_param(panic_on_warn, panic_on_warn, int, 0644);
 
 static int __init setup_crash_kexec_post_notifiers(char *s)
 {

Some files were not shown because too many files changed in this diff