浏览代码

Merge branches 'debug', 'fixes', 'l2c' (early part), 'misc' and 'sa1100' into for-next

Russell King 10 年之前
父节点
当前提交
ed8f8ce38d
共有 51 个文件被更改,包括 956 次插入1257 次删除
  1. 20 0
      Documentation/ABI/testing/sysfs-bus-amba
  2. 10 0
      Documentation/devicetree/bindings/arm/l2cc.txt
  3. 1 0
      arch/arm/Kconfig
  4. 31 10
      arch/arm/boot/compressed/head.S
  5. 9 0
      arch/arm/boot/dts/exynos4210.dtsi
  6. 14 0
      arch/arm/boot/dts/exynos4x12.dtsi
  7. 20 0
      arch/arm/include/asm/bitrev.h
  8. 14 1
      arch/arm/include/asm/compiler.h
  9. 3 0
      arch/arm/include/asm/outercache.h
  10. 1 0
      arch/arm/include/uapi/asm/unistd.h
  11. 1 0
      arch/arm/kernel/calls.S
  12. 7 6
      arch/arm/kernel/entry-header.S
  13. 2 0
      arch/arm/kernel/entry-v7m.S
  14. 8 1
      arch/arm/kernel/head.S
  15. 2 1
      arch/arm/kernel/irq.c
  16. 8 2
      arch/arm/kernel/perf_event.c
  17. 5 2
      arch/arm/kernel/setup.c
  18. 0 4
      arch/arm/kernel/suspend.c
  19. 2 13
      arch/arm/lib/Makefile
  20. 0 564
      arch/arm/lib/uaccess.S
  21. 50 0
      arch/arm/mach-exynos/firmware.c
  22. 46 0
      arch/arm/mach-exynos/sleep.S
  23. 6 0
      arch/arm/mach-omap2/board-generic.c
  24. 8 0
      arch/arm/mach-omap2/common.h
  25. 1 15
      arch/arm/mach-omap2/omap4-common.c
  26. 2 2
      arch/arm/mach-qcom/platsmp.c
  27. 1 1
      arch/arm/mach-sa1100/Makefile
  28. 12 0
      arch/arm/mach-sa1100/clock.c
  29. 1 2
      arch/arm/mach-sa1100/collie.c
  30. 6 0
      arch/arm/mach-sa1100/generic.c
  31. 42 31
      arch/arm/mach-sa1100/include/mach/irqs.h
  32. 4 199
      arch/arm/mach-sa1100/irq.c
  33. 1 0
      arch/arm/mach-sa1100/pm.c
  34. 0 139
      arch/arm/mach-sa1100/time.c
  35. 1 0
      arch/arm/mm/Kconfig
  36. 230 209
      arch/arm/mm/cache-l2x0.c
  37. 11 15
      arch/arm/mm/context.c
  38. 3 0
      arch/arm/mm/dma-mapping.c
  39. 2 7
      arch/arm/mm/dump.c
  40. 3 6
      arch/arm/mm/init.c
  41. 2 2
      arch/arm/mm/mmu.c
  42. 1 0
      arch/arm64/Kconfig
  43. 19 0
      arch/arm64/include/asm/bitrev.h
  44. 47 0
      drivers/amba/bus.c
  45. 7 0
      drivers/clocksource/Kconfig
  46. 1 1
      drivers/clocksource/Makefile
  47. 198 1
      drivers/gpio/gpio-sa1100.c
  48. 9 4
      include/linux/amba/bus.h
  49. 73 4
      include/linux/bitrev.h
  50. 9 0
      lib/Kconfig
  51. 2 15
      lib/bitrev.c

+ 20 - 0
Documentation/ABI/testing/sysfs-bus-amba

@@ -0,0 +1,20 @@
+What:		/sys/bus/amba/devices/.../driver_override
+Date:		September 2014
+Contact:	Antonios Motakis <a.motakis@virtualopensystems.com>
+Description:
+		This file allows the driver for a device to be specified which
+		will override standard OF, ACPI, ID table, and name matching.
+		When specified, only a driver with a name matching the value
+		written to driver_override will have an opportunity to bind to
+		the device. The override is specified by writing a string to the
+		driver_override file (echo vfio-amba > driver_override)	and may
+		be cleared with an empty string (echo > driver_override).
+		This returns the device to standard matching rules binding.
+		Writing to driver_override does not automatically unbind the
+		device from its current driver or make any attempt to
+		automatically load the specified driver. If no driver with a
+		matching name is currently loaded in the kernel, the device will
+		not bind to any driver. This also allows devices to opt-out of
+		driver binding using a driver_override name such as "none".
+		Only a single driver may be specified in the override, there is
+		no support for parsing delimiters.

+ 10 - 0
Documentation/devicetree/bindings/arm/l2cc.txt

@@ -57,6 +57,16 @@ Optional properties:
 - cache-id-part: cache id part number to be used if it is not present
   on hardware
 - wt-override: If present then L2 is forced to Write through mode
+- arm,double-linefill : Override double linefill enable setting. Enable if
+  non-zero, disable if zero.
+- arm,double-linefill-incr : Override double linefill on INCR read. Enable
+  if non-zero, disable if zero.
+- arm,double-linefill-wrap : Override double linefill on WRAP read. Enable
+  if non-zero, disable if zero.
+- arm,prefetch-drop : Override prefetch drop enable setting. Enable if non-zero,
+  disable if zero.
+- arm,prefetch-offset : Override prefetch offset value. Valid values are
+  0-7, 15, 23, and 31.
 
 Example:
 

+ 1 - 0
arch/arm/Kconfig

@@ -29,6 +29,7 @@ config ARM
 	select HANDLE_DOMAIN_IRQ
 	select HARDIRQS_SW_RESEND
 	select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT)
+	select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)

+ 31 - 10
arch/arm/boot/compressed/head.S

@@ -178,7 +178,7 @@ not_angel:
 
 		/*
 		 * Set up a page table only if it won't overwrite ourself.
-		 * That means r4 < pc && r4 - 16k page directory > &_end.
+		 * That means r4 < pc || r4 - 16k page directory > &_end.
 		 * Given that r4 > &_end is most unfrequent, we add a rough
 		 * additional 1MB of room for a possible appended DTB.
 		 */
@@ -263,16 +263,37 @@ restart:	adr	r0, LC0
 		 * OK... Let's do some funky business here.
 		 * If we do have a DTB appended to zImage, and we do have
 		 * an ATAG list around, we want the later to be translated
-		 * and folded into the former here.  To be on the safe side,
-		 * let's temporarily move  the stack away into the malloc
-		 * area.  No GOT fixup has occurred yet, but none of the
-		 * code we're about to call uses any global variable.
+		 * and folded into the former here. No GOT fixup has occurred
+		 * yet, but none of the code we're about to call uses any
+		 * global variable.
 		*/
-		add	sp, sp, #0x10000
+
+		/* Get the initial DTB size */
+		ldr	r5, [r6, #4]
+#ifndef __ARMEB__
+		/* convert to little endian */
+		eor	r1, r5, r5, ror #16
+		bic	r1, r1, #0x00ff0000
+		mov	r5, r5, ror #8
+		eor	r5, r5, r1, lsr #8
+#endif
+		/* 50% DTB growth should be good enough */
+		add	r5, r5, r5, lsr #1
+		/* preserve 64-bit alignment */
+		add	r5, r5, #7
+		bic	r5, r5, #7
+		/* clamp to 32KB min and 1MB max */
+		cmp	r5, #(1 << 15)
+		movlo	r5, #(1 << 15)
+		cmp	r5, #(1 << 20)
+		movhi	r5, #(1 << 20)
+		/* temporarily relocate the stack past the DTB work space */
+		add	sp, sp, r5
+
 		stmfd	sp!, {r0-r3, ip, lr}
 		mov	r0, r8
 		mov	r1, r6
-		sub	r2, sp, r6
+		mov	r2, r5
 		bl	atags_to_fdt
 
 		/*
@@ -285,11 +306,11 @@ restart:	adr	r0, LC0
 		bic	r0, r0, #1
 		add	r0, r0, #0x100
 		mov	r1, r6
-		sub	r2, sp, r6
+		mov	r2, r5
 		bleq	atags_to_fdt
 
 		ldmfd	sp!, {r0-r3, ip, lr}
-		sub	sp, sp, #0x10000
+		sub	sp, sp, r5
 #endif
 
 		mov	r8, r6			@ use the appended device tree
@@ -306,7 +327,7 @@ restart:	adr	r0, LC0
 		subs	r1, r5, r1
 		addhi	r9, r9, r1
 
-		/* Get the dtb's size */
+		/* Get the current DTB size */
 		ldr	r5, [r6, #4]
 #ifndef __ARMEB__
 		/* convert r5 (dtb size) to little endian */

+ 9 - 0
arch/arm/boot/dts/exynos4210.dtsi

@@ -81,6 +81,15 @@
 		reg = <0x10023CA0 0x20>;
 	};
 
+	l2c: l2-cache-controller@10502000 {
+		compatible = "arm,pl310-cache";
+		reg = <0x10502000 0x1000>;
+		cache-unified;
+		cache-level = <2>;
+		arm,tag-latency = <2 2 1>;
+		arm,data-latency = <2 2 1>;
+	};
+
 	gic: interrupt-controller@10490000 {
 		cpu-offset = <0x8000>;
 	};

+ 14 - 0
arch/arm/boot/dts/exynos4x12.dtsi

@@ -54,6 +54,20 @@
 		reg = <0x10023CA0 0x20>;
 	};
 
+	l2c: l2-cache-controller@10502000 {
+		compatible = "arm,pl310-cache";
+		reg = <0x10502000 0x1000>;
+		cache-unified;
+		cache-level = <2>;
+		arm,tag-latency = <2 2 1>;
+		arm,data-latency = <3 2 1>;
+		arm,double-linefill = <1>;
+		arm,double-linefill-incr = <0>;
+		arm,double-linefill-wrap = <1>;
+		arm,prefetch-drop = <1>;
+		arm,prefetch-offset = <7>;
+	};
+
 	clock: clock-controller@10030000 {
 		compatible = "samsung,exynos4412-clock";
 		reg = <0x10030000 0x20000>;

+ 20 - 0
arch/arm/include/asm/bitrev.h

@@ -0,0 +1,20 @@
+#ifndef __ASM_BITREV_H
+#define __ASM_BITREV_H
+
+static __always_inline __attribute_const__ u32 __arch_bitrev32(u32 x)
+{
+	__asm__ ("rbit %0, %1" : "=r" (x) : "r" (x));
+	return x;
+}
+
+static __always_inline __attribute_const__ u16 __arch_bitrev16(u16 x)
+{
+	return __arch_bitrev32((u32)x) >> 16;
+}
+
+static __always_inline __attribute_const__ u8 __arch_bitrev8(u8 x)
+{
+	return __arch_bitrev32((u32)x) >> 24;
+}
+
+#endif

+ 14 - 1
arch/arm/include/asm/compiler.h

@@ -8,8 +8,21 @@
  * This string is meant to be concatenated with the inline asm string and
  * will cause compilation to stop on mismatch.
  * (for details, see gcc PR 15089)
+ * For compatibility with clang, we have to specifically take the equivalence
+ * of 'r11' <-> 'fp' and 'r12' <-> 'ip' into account as well.
  */
-#define __asmeq(x, y)  ".ifnc " x "," y " ; .err ; .endif\n\t"
+#define __asmeq(x, y)				\
+	".ifnc " x "," y "; "			\
+	  ".ifnc " x y ",fpr11; " 		\
+	    ".ifnc " x y ",r11fp; "		\
+	      ".ifnc " x y ",ipr12; " 		\
+	        ".ifnc " x y ",r12ip; "		\
+	          ".err; "			\
+	        ".endif; "			\
+	      ".endif; "			\
+	    ".endif; "				\
+	  ".endif; "				\
+	".endif\n\t"
 
 
 #endif /* __ASM_ARM_COMPILER_H */

+ 3 - 0
arch/arm/include/asm/outercache.h

@@ -23,6 +23,8 @@
 
 #include <linux/types.h>
 
+struct l2x0_regs;
+
 struct outer_cache_fns {
 	void (*inv_range)(unsigned long, unsigned long);
 	void (*clean_range)(unsigned long, unsigned long);
@@ -36,6 +38,7 @@ struct outer_cache_fns {
 
 	/* This is an ARM L2C thing */
 	void (*write_sec)(unsigned long, unsigned);
+	void (*configure)(const struct l2x0_regs *);
 };
 
 extern struct outer_cache_fns outer_cache;

+ 1 - 0
arch/arm/include/uapi/asm/unistd.h

@@ -413,6 +413,7 @@
 #define __NR_getrandom			(__NR_SYSCALL_BASE+384)
 #define __NR_memfd_create		(__NR_SYSCALL_BASE+385)
 #define __NR_bpf			(__NR_SYSCALL_BASE+386)
+#define __NR_execveat			(__NR_SYSCALL_BASE+387)
 
 /*
  * The following SWIs are ARM private.

+ 1 - 0
arch/arm/kernel/calls.S

@@ -396,6 +396,7 @@
 		CALL(sys_getrandom)
 /* 385 */	CALL(sys_memfd_create)
 		CALL(sys_bpf)
+		CALL(sys_execveat)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted

+ 7 - 6
arch/arm/kernel/entry-header.S

@@ -253,21 +253,22 @@
 	.endm
 
 	.macro	restore_user_regs, fast = 0, offset = 0
-	ldr	r1, [sp, #\offset + S_PSR]	@ get calling cpsr
-	ldr	lr, [sp, #\offset + S_PC]!	@ get pc
+	mov	r2, sp
+	ldr	r1, [r2, #\offset + S_PSR]	@ get calling cpsr
+	ldr	lr, [r2, #\offset + S_PC]!	@ get pc
 	msr	spsr_cxsf, r1			@ save in spsr_svc
 #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K)
 	@ We must avoid clrex due to Cortex-A15 erratum #830321
-	strex	r1, r2, [sp]			@ clear the exclusive monitor
+	strex	r1, r2, [r2]			@ clear the exclusive monitor
 #endif
 	.if	\fast
-	ldmdb	sp, {r1 - lr}^			@ get calling r1 - lr
+	ldmdb	r2, {r1 - lr}^			@ get calling r1 - lr
 	.else
-	ldmdb	sp, {r0 - lr}^			@ get calling r0 - lr
+	ldmdb	r2, {r0 - lr}^			@ get calling r0 - lr
 	.endif
 	mov	r0, r0				@ ARMv5T and earlier require a nop
 						@ after ldm {}^
-	add	sp, sp, #S_FRAME_SIZE - S_PC
+	add	sp, sp, #\offset + S_FRAME_SIZE
 	movs	pc, lr				@ return & move spsr_svc into cpsr
 	.endm
 

+ 2 - 0
arch/arm/kernel/entry-v7m.S

@@ -22,10 +22,12 @@
 
 __invalid_entry:
 	v7m_exception_entry
+#ifdef CONFIG_PRINTK
 	adr	r0, strerr
 	mrs	r1, ipsr
 	mov	r2, lr
 	bl	printk
+#endif
 	mov	r0, sp
 	bl	show_regs
 1:	b	1b

+ 8 - 1
arch/arm/kernel/head.S

@@ -346,6 +346,12 @@ __turn_mmu_on_loc:
 
 #if defined(CONFIG_SMP)
 	.text
+ENTRY(secondary_startup_arm)
+	.arm
+ THUMB(	adr	r9, BSYM(1f)	)	@ Kernel is entered in ARM.
+ THUMB(	bx	r9		)	@ If this is a Thumb-2 kernel,
+ THUMB(	.thumb			)	@ switch to Thumb now.
+ THUMB(1:			)
 ENTRY(secondary_startup)
 	/*
 	 * Common entry point for secondary CPUs.
@@ -385,6 +391,7 @@ ENTRY(secondary_startup)
  THUMB(	add	r12, r10, #PROCINFO_INITFUNC	)
  THUMB(	ret	r12				)
 ENDPROC(secondary_startup)
+ENDPROC(secondary_startup_arm)
 
 	/*
 	 * r6  = &secondary_data
@@ -586,7 +593,7 @@ __fixup_pv_table:
 	add	r5, r5, r3	@ adjust table end address
 	add	r6, r6, r3	@ adjust __pv_phys_pfn_offset address
 	add	r7, r7, r3	@ adjust __pv_offset address
-	mov	r0, r8, lsr #12	@ convert to PFN
+	mov	r0, r8, lsr #PAGE_SHIFT	@ convert to PFN
 	str	r0, [r6]	@ save computed PHYS_OFFSET to __pv_phys_pfn_offset
 	strcc	ip, [r7, #HIGH_OFFSET]	@ save to __pv_offset high bits
 	mov	r6, r3, lsr #24	@ constant for add/sub instructions

+ 2 - 1
arch/arm/kernel/irq.c

@@ -109,7 +109,8 @@ void __init init_IRQ(void)
 
 	if (IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_CACHE_L2X0) &&
 	    (machine_desc->l2c_aux_mask || machine_desc->l2c_aux_val)) {
-		outer_cache.write_sec = machine_desc->l2c_write_sec;
+		if (!outer_cache.write_sec)
+			outer_cache.write_sec = machine_desc->l2c_write_sec;
 		ret = l2x0_of_init(machine_desc->l2c_aux_val,
 				   machine_desc->l2c_aux_mask);
 		if (ret)

+ 8 - 2
arch/arm/kernel/perf_event.c

@@ -116,8 +116,14 @@ int armpmu_event_set_period(struct perf_event *event)
 		ret = 1;
 	}
 
-	if (left > (s64)armpmu->max_period)
-		left = armpmu->max_period;
+	/*
+	 * Limit the maximum period to prevent the counter value
+	 * from overtaking the one we are about to program. In
+	 * effect we are reducing max_period to account for
+	 * interrupt latency (and we are being very conservative).
+	 */
+	if (left > (armpmu->max_period >> 1))
+		left = armpmu->max_period >> 1;
 
 	local64_set(&hwc->prev_count, (u64)-left);
 

+ 5 - 2
arch/arm/kernel/setup.c

@@ -657,10 +657,13 @@ int __init arm_add_memory(u64 start, u64 size)
 
 	/*
 	 * Ensure that start/size are aligned to a page boundary.
-	 * Size is appropriately rounded down, start is rounded up.
+	 * Size is rounded down, start is rounded up.
 	 */
-	size -= start & ~PAGE_MASK;
 	aligned_start = PAGE_ALIGN(start);
+	if (aligned_start > start + size)
+		size = 0;
+	else
+		size -= aligned_start - start;
 
 #ifndef CONFIG_ARCH_PHYS_ADDR_T_64BIT
 	if (aligned_start > ULONG_MAX) {

+ 0 - 4
arch/arm/kernel/suspend.c

@@ -14,10 +14,6 @@ extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid);
 extern void cpu_resume_mmu(void);
 
 #ifdef CONFIG_MMU
-/*
- * Hide the first two arguments to __cpu_suspend - these are an implementation
- * detail which platform code shouldn't have to know about.
- */
 int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 {
 	struct mm_struct *mm = current->active_mm;

+ 2 - 13
arch/arm/lib/Makefile

@@ -15,19 +15,8 @@ lib-y		:= backtrace.o changebit.o csumipv6.o csumpartial.o   \
 		   io-readsb.o io-writesb.o io-readsl.o io-writesl.o  \
 		   call_with_stack.o bswapsdi2.o
 
-mmu-y	:= clear_user.o copy_page.o getuser.o putuser.o
-
-# the code in uaccess.S is not preemption safe and
-# probably faster on ARMv3 only
-ifeq ($(CONFIG_PREEMPT),y)
-  mmu-y	+= copy_from_user.o copy_to_user.o
-else
-ifneq ($(CONFIG_CPU_32v3),y)
-  mmu-y	+= copy_from_user.o copy_to_user.o
-else
-  mmu-y	+= uaccess.o
-endif
-endif
+mmu-y		:= clear_user.o copy_page.o getuser.o putuser.o       \
+		   copy_from_user.o copy_to_user.o
 
 # using lib_ here won't override already available weak symbols
 obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o

+ 0 - 564
arch/arm/lib/uaccess.S

@@ -1,564 +0,0 @@
-/*
- *  linux/arch/arm/lib/uaccess.S
- *
- *  Copyright (C) 1995, 1996,1997,1998 Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *  Routines to block copy data to/from user memory
- *   These are highly optimised both for the 4k page size
- *   and for various alignments.
- */
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-#include <asm/errno.h>
-#include <asm/domain.h>
-
-		.text
-
-#define PAGE_SHIFT 12
-
-/* Prototype: int __copy_to_user(void *to, const char *from, size_t n)
- * Purpose  : copy a block to user memory from kernel memory
- * Params   : to   - user memory
- *          : from - kernel memory
- *          : n    - number of bytes to copy
- * Returns  : Number of bytes NOT copied.
- */
-
-.Lc2u_dest_not_aligned:
-		rsb	ip, ip, #4
-		cmp	ip, #2
-		ldrb	r3, [r1], #1
-USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault
-		ldrgeb	r3, [r1], #1
-USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault
-		ldrgtb	r3, [r1], #1
-USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
-		sub	r2, r2, ip
-		b	.Lc2u_dest_aligned
-
-ENTRY(__copy_to_user)
-		stmfd	sp!, {r2, r4 - r7, lr}
-		cmp	r2, #4
-		blt	.Lc2u_not_enough
-		ands	ip, r0, #3
-		bne	.Lc2u_dest_not_aligned
-.Lc2u_dest_aligned:
-
-		ands	ip, r1, #3
-		bne	.Lc2u_src_not_aligned
-/*
- * Seeing as there has to be at least 8 bytes to copy, we can
- * copy one word, and force a user-mode page fault...
- */
-
-.Lc2u_0fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lc2u_0nowords
-		ldr	r3, [r1], #4
-USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
-		mov	ip, r0, lsl #32 - PAGE_SHIFT	@ On each page, use a ld/st??t instruction
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lc2u_0fupi
-/*
- * ip = max no. of bytes to copy before needing another "strt" insn
- */
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #32
-		blt	.Lc2u_0rem8lp
-
-.Lc2u_0cpy8lp:	ldmia	r1!, {r3 - r6}
-		stmia	r0!, {r3 - r6}			@ Shouldnt fault
-		ldmia	r1!, {r3 - r6}
-		subs	ip, ip, #32
-		stmia	r0!, {r3 - r6}			@ Shouldnt fault
-		bpl	.Lc2u_0cpy8lp
-
-.Lc2u_0rem8lp:	cmn	ip, #16
-		ldmgeia	r1!, {r3 - r6}
-		stmgeia	r0!, {r3 - r6}			@ Shouldnt fault
-		tst	ip, #8
-		ldmneia	r1!, {r3 - r4}
-		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
-		tst	ip, #4
-		ldrne	r3, [r1], #4
-	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
-		ands	ip, ip, #3
-		beq	.Lc2u_0fupi
-.Lc2u_0nowords:	teq	ip, #0
-		beq	.Lc2u_finished
-.Lc2u_nowords:	cmp	ip, #2
-		ldrb	r3, [r1], #1
-USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault
-		ldrgeb	r3, [r1], #1
-USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault
-		ldrgtb	r3, [r1], #1
-USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
-		b	.Lc2u_finished
-
-.Lc2u_not_enough:
-		movs	ip, r2
-		bne	.Lc2u_nowords
-.Lc2u_finished:	mov	r0, #0
-		ldmfd	sp!, {r2, r4 - r7, pc}
-
-.Lc2u_src_not_aligned:
-		bic	r1, r1, #3
-		ldr	r7, [r1], #4
-		cmp	ip, #2
-		bgt	.Lc2u_3fupi
-		beq	.Lc2u_2fupi
-.Lc2u_1fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lc2u_1nowords
-		mov	r3, r7, lspull #8
-		ldr	r7, [r1], #4
-		orr	r3, r3, r7, lspush #24
-USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
-		mov	ip, r0, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lc2u_1fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lc2u_1rem8lp
-
-.Lc2u_1cpy8lp:	mov	r3, r7, lspull #8
-		ldmia	r1!, {r4 - r7}
-		subs	ip, ip, #16
-		orr	r3, r3, r4, lspush #24
-		mov	r4, r4, lspull #8
-		orr	r4, r4, r5, lspush #24
-		mov	r5, r5, lspull #8
-		orr	r5, r5, r6, lspush #24
-		mov	r6, r6, lspull #8
-		orr	r6, r6, r7, lspush #24
-		stmia	r0!, {r3 - r6}			@ Shouldnt fault
-		bpl	.Lc2u_1cpy8lp
-
-.Lc2u_1rem8lp:	tst	ip, #8
-		movne	r3, r7, lspull #8
-		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, lspush #24
-		movne	r4, r4, lspull #8
-		orrne	r4, r4, r7, lspush #24
-		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
-		tst	ip, #4
-		movne	r3, r7, lspull #8
-		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, lspush #24
-	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
-		ands	ip, ip, #3
-		beq	.Lc2u_1fupi
-.Lc2u_1nowords:	mov	r3, r7, get_byte_1
-		teq	ip, #0
-		beq	.Lc2u_finished
-		cmp	ip, #2
-USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault
-		movge	r3, r7, get_byte_2
-USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault
-		movgt	r3, r7, get_byte_3
-USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
-		b	.Lc2u_finished
-
-.Lc2u_2fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lc2u_2nowords
-		mov	r3, r7, lspull #16
-		ldr	r7, [r1], #4
-		orr	r3, r3, r7, lspush #16
-USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
-		mov	ip, r0, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lc2u_2fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lc2u_2rem8lp
-
-.Lc2u_2cpy8lp:	mov	r3, r7, lspull #16
-		ldmia	r1!, {r4 - r7}
-		subs	ip, ip, #16
-		orr	r3, r3, r4, lspush #16
-		mov	r4, r4, lspull #16
-		orr	r4, r4, r5, lspush #16
-		mov	r5, r5, lspull #16
-		orr	r5, r5, r6, lspush #16
-		mov	r6, r6, lspull #16
-		orr	r6, r6, r7, lspush #16
-		stmia	r0!, {r3 - r6}			@ Shouldnt fault
-		bpl	.Lc2u_2cpy8lp
-
-.Lc2u_2rem8lp:	tst	ip, #8
-		movne	r3, r7, lspull #16
-		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, lspush #16
-		movne	r4, r4, lspull #16
-		orrne	r4, r4, r7, lspush #16
-		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
-		tst	ip, #4
-		movne	r3, r7, lspull #16
-		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, lspush #16
-	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
-		ands	ip, ip, #3
-		beq	.Lc2u_2fupi
-.Lc2u_2nowords:	mov	r3, r7, get_byte_2
-		teq	ip, #0
-		beq	.Lc2u_finished
-		cmp	ip, #2
-USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault
-		movge	r3, r7, get_byte_3
-USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault
-		ldrgtb	r3, [r1], #0
-USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
-		b	.Lc2u_finished
-
-.Lc2u_3fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lc2u_3nowords
-		mov	r3, r7, lspull #24
-		ldr	r7, [r1], #4
-		orr	r3, r3, r7, lspush #8
-USER(	TUSER(	str)	r3, [r0], #4)			@ May fault
-		mov	ip, r0, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lc2u_3fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lc2u_3rem8lp
-
-.Lc2u_3cpy8lp:	mov	r3, r7, lspull #24
-		ldmia	r1!, {r4 - r7}
-		subs	ip, ip, #16
-		orr	r3, r3, r4, lspush #8
-		mov	r4, r4, lspull #24
-		orr	r4, r4, r5, lspush #8
-		mov	r5, r5, lspull #24
-		orr	r5, r5, r6, lspush #8
-		mov	r6, r6, lspull #24
-		orr	r6, r6, r7, lspush #8
-		stmia	r0!, {r3 - r6}			@ Shouldnt fault
-		bpl	.Lc2u_3cpy8lp
-
-.Lc2u_3rem8lp:	tst	ip, #8
-		movne	r3, r7, lspull #24
-		ldmneia	r1!, {r4, r7}
-		orrne	r3, r3, r4, lspush #8
-		movne	r4, r4, lspull #24
-		orrne	r4, r4, r7, lspush #8
-		stmneia	r0!, {r3 - r4}			@ Shouldnt fault
-		tst	ip, #4
-		movne	r3, r7, lspull #24
-		ldrne	r7, [r1], #4
-		orrne	r3, r3, r7, lspush #8
-	TUSER(	strne) r3, [r0], #4			@ Shouldnt fault
-		ands	ip, ip, #3
-		beq	.Lc2u_3fupi
-.Lc2u_3nowords:	mov	r3, r7, get_byte_3
-		teq	ip, #0
-		beq	.Lc2u_finished
-		cmp	ip, #2
-USER(	TUSER(	strb)	r3, [r0], #1)			@ May fault
-		ldrgeb	r3, [r1], #1
-USER(	TUSER(	strgeb) r3, [r0], #1)			@ May fault
-		ldrgtb	r3, [r1], #0
-USER(	TUSER(	strgtb) r3, [r0], #1)			@ May fault
-		b	.Lc2u_finished
-ENDPROC(__copy_to_user)
-
-		.pushsection .fixup,"ax"
-		.align	0
-9001:		ldmfd	sp!, {r0, r4 - r7, pc}
-		.popsection
-
-/* Prototype: unsigned long __copy_from_user(void *to,const void *from,unsigned long n);
- * Purpose  : copy a block from user memory to kernel memory
- * Params   : to   - kernel memory
- *          : from - user memory
- *          : n    - number of bytes to copy
- * Returns  : Number of bytes NOT copied.
- */
-.Lcfu_dest_not_aligned:
-		rsb	ip, ip, #4
-		cmp	ip, #2
-USER(	TUSER(	ldrb)	r3, [r1], #1)			@ May fault
-		strb	r3, [r0], #1
-USER(	TUSER(	ldrgeb) r3, [r1], #1)			@ May fault
-		strgeb	r3, [r0], #1
-USER(	TUSER(	ldrgtb) r3, [r1], #1)			@ May fault
-		strgtb	r3, [r0], #1
-		sub	r2, r2, ip
-		b	.Lcfu_dest_aligned
-
-ENTRY(__copy_from_user)
-		stmfd	sp!, {r0, r2, r4 - r7, lr}
-		cmp	r2, #4
-		blt	.Lcfu_not_enough
-		ands	ip, r0, #3
-		bne	.Lcfu_dest_not_aligned
-.Lcfu_dest_aligned:
-		ands	ip, r1, #3
-		bne	.Lcfu_src_not_aligned
-
-/*
- * Seeing as there has to be at least 8 bytes to copy, we can
- * copy one word, and force a user-mode page fault...
- */
-
-.Lcfu_0fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lcfu_0nowords
-USER(	TUSER(	ldr)	r3, [r1], #4)
-		str	r3, [r0], #4
-		mov	ip, r1, lsl #32 - PAGE_SHIFT	@ On each page, use a ld/st??t instruction
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lcfu_0fupi
-/*
- * ip = max no. of bytes to copy before needing another "strt" insn
- */
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #32
-		blt	.Lcfu_0rem8lp
-
-.Lcfu_0cpy8lp:	ldmia	r1!, {r3 - r6}			@ Shouldnt fault
-		stmia	r0!, {r3 - r6}
-		ldmia	r1!, {r3 - r6}			@ Shouldnt fault
-		subs	ip, ip, #32
-		stmia	r0!, {r3 - r6}
-		bpl	.Lcfu_0cpy8lp
-
-.Lcfu_0rem8lp:	cmn	ip, #16
-		ldmgeia	r1!, {r3 - r6}			@ Shouldnt fault
-		stmgeia	r0!, {r3 - r6}
-		tst	ip, #8
-		ldmneia	r1!, {r3 - r4}			@ Shouldnt fault
-		stmneia	r0!, {r3 - r4}
-		tst	ip, #4
-	TUSER(	ldrne) r3, [r1], #4			@ Shouldnt fault
-		strne	r3, [r0], #4
-		ands	ip, ip, #3
-		beq	.Lcfu_0fupi
-.Lcfu_0nowords:	teq	ip, #0
-		beq	.Lcfu_finished
-.Lcfu_nowords:	cmp	ip, #2
-USER(	TUSER(	ldrb)	r3, [r1], #1)			@ May fault
-		strb	r3, [r0], #1
-USER(	TUSER(	ldrgeb) r3, [r1], #1)			@ May fault
-		strgeb	r3, [r0], #1
-USER(	TUSER(	ldrgtb) r3, [r1], #1)			@ May fault
-		strgtb	r3, [r0], #1
-		b	.Lcfu_finished
-
-.Lcfu_not_enough:
-		movs	ip, r2
-		bne	.Lcfu_nowords
-.Lcfu_finished:	mov	r0, #0
-		add	sp, sp, #8
-		ldmfd	sp!, {r4 - r7, pc}
-
-.Lcfu_src_not_aligned:
-		bic	r1, r1, #3
-USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		cmp	ip, #2
-		bgt	.Lcfu_3fupi
-		beq	.Lcfu_2fupi
-.Lcfu_1fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lcfu_1nowords
-		mov	r3, r7, lspull #8
-USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, lspush #24
-		str	r3, [r0], #4
-		mov	ip, r1, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lcfu_1fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lcfu_1rem8lp
-
-.Lcfu_1cpy8lp:	mov	r3, r7, lspull #8
-		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
-		subs	ip, ip, #16
-		orr	r3, r3, r4, lspush #24
-		mov	r4, r4, lspull #8
-		orr	r4, r4, r5, lspush #24
-		mov	r5, r5, lspull #8
-		orr	r5, r5, r6, lspush #24
-		mov	r6, r6, lspull #8
-		orr	r6, r6, r7, lspush #24
-		stmia	r0!, {r3 - r6}
-		bpl	.Lcfu_1cpy8lp
-
-.Lcfu_1rem8lp:	tst	ip, #8
-		movne	r3, r7, lspull #8
-		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, lspush #24
-		movne	r4, r4, lspull #8
-		orrne	r4, r4, r7, lspush #24
-		stmneia	r0!, {r3 - r4}
-		tst	ip, #4
-		movne	r3, r7, lspull #8
-USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, lspush #24
-		strne	r3, [r0], #4
-		ands	ip, ip, #3
-		beq	.Lcfu_1fupi
-.Lcfu_1nowords:	mov	r3, r7, get_byte_1
-		teq	ip, #0
-		beq	.Lcfu_finished
-		cmp	ip, #2
-		strb	r3, [r0], #1
-		movge	r3, r7, get_byte_2
-		strgeb	r3, [r0], #1
-		movgt	r3, r7, get_byte_3
-		strgtb	r3, [r0], #1
-		b	.Lcfu_finished
-
-.Lcfu_2fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lcfu_2nowords
-		mov	r3, r7, lspull #16
-USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, lspush #16
-		str	r3, [r0], #4
-		mov	ip, r1, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lcfu_2fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lcfu_2rem8lp
-
-
-.Lcfu_2cpy8lp:	mov	r3, r7, lspull #16
-		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
-		subs	ip, ip, #16
-		orr	r3, r3, r4, lspush #16
-		mov	r4, r4, lspull #16
-		orr	r4, r4, r5, lspush #16
-		mov	r5, r5, lspull #16
-		orr	r5, r5, r6, lspush #16
-		mov	r6, r6, lspull #16
-		orr	r6, r6, r7, lspush #16
-		stmia	r0!, {r3 - r6}
-		bpl	.Lcfu_2cpy8lp
-
-.Lcfu_2rem8lp:	tst	ip, #8
-		movne	r3, r7, lspull #16
-		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, lspush #16
-		movne	r4, r4, lspull #16
-		orrne	r4, r4, r7, lspush #16
-		stmneia	r0!, {r3 - r4}
-		tst	ip, #4
-		movne	r3, r7, lspull #16
-USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, lspush #16
-		strne	r3, [r0], #4
-		ands	ip, ip, #3
-		beq	.Lcfu_2fupi
-.Lcfu_2nowords:	mov	r3, r7, get_byte_2
-		teq	ip, #0
-		beq	.Lcfu_finished
-		cmp	ip, #2
-		strb	r3, [r0], #1
-		movge	r3, r7, get_byte_3
-		strgeb	r3, [r0], #1
-USER(	TUSER(	ldrgtb) r3, [r1], #0)			@ May fault
-		strgtb	r3, [r0], #1
-		b	.Lcfu_finished
-
-.Lcfu_3fupi:	subs	r2, r2, #4
-		addmi	ip, r2, #4
-		bmi	.Lcfu_3nowords
-		mov	r3, r7, lspull #24
-USER(	TUSER(	ldr)	r7, [r1], #4)			@ May fault
-		orr	r3, r3, r7, lspush #8
-		str	r3, [r0], #4
-		mov	ip, r1, lsl #32 - PAGE_SHIFT
-		rsb	ip, ip, #0
-		movs	ip, ip, lsr #32 - PAGE_SHIFT
-		beq	.Lcfu_3fupi
-		cmp	r2, ip
-		movlt	ip, r2
-		sub	r2, r2, ip
-		subs	ip, ip, #16
-		blt	.Lcfu_3rem8lp
-
-.Lcfu_3cpy8lp:	mov	r3, r7, lspull #24
-		ldmia	r1!, {r4 - r7}			@ Shouldnt fault
-		orr	r3, r3, r4, lspush #8
-		mov	r4, r4, lspull #24
-		orr	r4, r4, r5, lspush #8
-		mov	r5, r5, lspull #24
-		orr	r5, r5, r6, lspush #8
-		mov	r6, r6, lspull #24
-		orr	r6, r6, r7, lspush #8
-		stmia	r0!, {r3 - r6}
-		subs	ip, ip, #16
-		bpl	.Lcfu_3cpy8lp
-
-.Lcfu_3rem8lp:	tst	ip, #8
-		movne	r3, r7, lspull #24
-		ldmneia	r1!, {r4, r7}			@ Shouldnt fault
-		orrne	r3, r3, r4, lspush #8
-		movne	r4, r4, lspull #24
-		orrne	r4, r4, r7, lspush #8
-		stmneia	r0!, {r3 - r4}
-		tst	ip, #4
-		movne	r3, r7, lspull #24
-USER(	TUSER(	ldrne) r7, [r1], #4)			@ May fault
-		orrne	r3, r3, r7, lspush #8
-		strne	r3, [r0], #4
-		ands	ip, ip, #3
-		beq	.Lcfu_3fupi
-.Lcfu_3nowords:	mov	r3, r7, get_byte_3
-		teq	ip, #0
-		beq	.Lcfu_finished
-		cmp	ip, #2
-		strb	r3, [r0], #1
-USER(	TUSER(	ldrgeb) r3, [r1], #1)			@ May fault
-		strgeb	r3, [r0], #1
-USER(	TUSER(	ldrgtb) r3, [r1], #1)			@ May fault
-		strgtb	r3, [r0], #1
-		b	.Lcfu_finished
-ENDPROC(__copy_from_user)
-
-		.pushsection .fixup,"ax"
-		.align	0
-		/*
-		 * We took an exception.  r0 contains a pointer to
-		 * the byte not copied.
-		 */
-9001:		ldr	r2, [sp], #4			@ void *to
-		sub	r2, r0, r2			@ bytes copied
-		ldr	r1, [sp], #4			@ unsigned long count
-		subs	r4, r1, r2			@ bytes left to copy
-		movne	r1, r4
-		blne	__memzero
-		mov	r0, r4
-		ldmfd	sp!, {r4 - r7, pc}
-		.popsection
-

+ 50 - 0
arch/arm/mach-exynos/firmware.c

@@ -17,6 +17,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cputype.h>
 #include <asm/firmware.h>
+#include <asm/hardware/cache-l2x0.h>
 #include <asm/suspend.h>
 
 #include <mach/map.h>
@@ -136,6 +137,43 @@ static const struct firmware_ops exynos_firmware_ops = {
 	.resume			= IS_ENABLED(CONFIG_EXYNOS_CPU_SUSPEND) ? exynos_resume : NULL,
 };
 
+static void exynos_l2_write_sec(unsigned long val, unsigned reg)
+{
+	static int l2cache_enabled;
+
+	switch (reg) {
+	case L2X0_CTRL:
+		if (val & L2X0_CTRL_EN) {
+			/*
+			 * Before the cache can be enabled, due to firmware
+			 * design, SMC_CMD_L2X0INVALL must be called.
+			 */
+			if (!l2cache_enabled) {
+				exynos_smc(SMC_CMD_L2X0INVALL, 0, 0, 0);
+				l2cache_enabled = 1;
+			}
+		} else {
+			l2cache_enabled = 0;
+		}
+		exynos_smc(SMC_CMD_L2X0CTRL, val, 0, 0);
+		break;
+
+	case L2X0_DEBUG_CTRL:
+		exynos_smc(SMC_CMD_L2X0DEBUG, val, 0, 0);
+		break;
+
+	default:
+		WARN_ONCE(1, "%s: ignoring write to reg 0x%x\n", __func__, reg);
+	}
+}
+
+static void exynos_l2_configure(const struct l2x0_regs *regs)
+{
+	exynos_smc(SMC_CMD_L2X0SETUP1, regs->tag_latency, regs->data_latency,
+		   regs->prefetch_ctrl);
+	exynos_smc(SMC_CMD_L2X0SETUP2, regs->pwr_ctrl, regs->aux_ctrl, 0);
+}
+
 void __init exynos_firmware_init(void)
 {
 	struct device_node *nd;
@@ -155,4 +193,16 @@ void __init exynos_firmware_init(void)
 	pr_info("Running under secure firmware.\n");
 
 	register_firmware_ops(&exynos_firmware_ops);
+
+	/*
+	 * Exynos 4 SoCs (based on Cortex A9 and equipped with L2C-310),
+	 * running under secure firmware, require certain registers of L2
+	 * cache controller to be written in secure mode. Here .write_sec
+	 * callback is provided to perform necessary SMC calls.
+	 */
+	if (IS_ENABLED(CONFIG_CACHE_L2X0) &&
+	    read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) {
+		outer_cache.write_sec = exynos_l2_write_sec;
+		outer_cache.configure = exynos_l2_configure;
+	}
 }

+ 46 - 0
arch/arm/mach-exynos/sleep.S

@@ -16,6 +16,8 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/hardware/cache-l2x0.h>
 #include "smc.h"
 
 #define CPU_MASK	0xff0ffff0
@@ -74,6 +76,45 @@ ENTRY(exynos_cpu_resume_ns)
 	mov	r0, #SMC_CMD_C15RESUME
 	dsb
 	smc	#0
+#ifdef CONFIG_CACHE_L2X0
+	adr	r0, 1f
+	ldr	r2, [r0]
+	add	r0, r2, r0
+
+	/* Check that the address has been initialised. */
+	ldr	r1, [r0, #L2X0_R_PHY_BASE]
+	teq	r1, #0
+	beq	skip_l2x0
+
+	/* Check if controller has been enabled. */
+	ldr	r2, [r1, #L2X0_CTRL]
+	tst	r2, #0x1
+	bne	skip_l2x0
+
+	ldr	r1, [r0, #L2X0_R_TAG_LATENCY]
+	ldr	r2, [r0, #L2X0_R_DATA_LATENCY]
+	ldr	r3, [r0, #L2X0_R_PREFETCH_CTRL]
+	mov	r0, #SMC_CMD_L2X0SETUP1
+	smc	#0
+
+	/* Reload saved regs pointer because smc corrupts registers. */
+	adr	r0, 1f
+	ldr	r2, [r0]
+	add	r0, r2, r0
+
+	ldr	r1, [r0, #L2X0_R_PWR_CTRL]
+	ldr	r2, [r0, #L2X0_R_AUX_CTRL]
+	mov	r0, #SMC_CMD_L2X0SETUP2
+	smc	#0
+
+	mov	r0, #SMC_CMD_L2X0INVALL
+	smc	#0
+
+	mov	r1, #1
+	mov	r0, #SMC_CMD_L2X0CTRL
+	smc	#0
+skip_l2x0:
+#endif /* CONFIG_CACHE_L2X0 */
 skip_cp15:
 	b	cpu_resume
 ENDPROC(exynos_cpu_resume_ns)
@@ -83,3 +124,8 @@ cp15_save_diag:
 	.globl cp15_save_power
 cp15_save_power:
 	.long	0	@ cp15 power control
+
+#ifdef CONFIG_CACHE_L2X0
+	.align
+1:	.long	l2x0_saved_regs - .
+#endif /* CONFIG_CACHE_L2X0 */

+ 6 - 0
arch/arm/mach-omap2/board-generic.c

@@ -171,6 +171,9 @@ static const char *const omap4_boards_compat[] __initconst = {
 };
 
 DT_MACHINE_START(OMAP4_DT, "Generic OMAP4 (Flattened Device Tree)")
+	.l2c_aux_val	= OMAP_L2C_AUX_CTRL,
+	.l2c_aux_mask	= 0xcf9fffff,
+	.l2c_write_sec	= omap4_l2c310_write_sec,
 	.reserve	= omap_reserve,
 	.smp		= smp_ops(omap4_smp_ops),
 	.map_io		= omap4_map_io,
@@ -214,6 +217,9 @@ static const char *const am43_boards_compat[] __initconst = {
 };
 
 DT_MACHINE_START(AM43_DT, "Generic AM43 (Flattened Device Tree)")
+	.l2c_aux_val	= OMAP_L2C_AUX_CTRL,
+	.l2c_aux_mask	= 0xcf9fffff,
+	.l2c_write_sec	= omap4_l2c310_write_sec,
 	.map_io		= am33xx_map_io,
 	.init_early	= am43xx_init_early,
 	.init_late	= am43xx_init_late,

+ 8 - 0
arch/arm/mach-omap2/common.h

@@ -35,6 +35,7 @@
 #include <linux/irqchip/irq-omap-intc.h>
 
 #include <asm/proc-fns.h>
+#include <asm/hardware/cache-l2x0.h>
 
 #include "i2c.h"
 #include "serial.h"
@@ -94,11 +95,18 @@ extern void omap3_gptimer_timer_init(void);
 extern void omap4_local_timer_init(void);
 #ifdef CONFIG_CACHE_L2X0
 int omap_l2_cache_init(void);
+#define OMAP_L2C_AUX_CTRL	(L2C_AUX_CTRL_SHARED_OVERRIDE | \
+				 L310_AUX_CTRL_DATA_PREFETCH | \
+				 L310_AUX_CTRL_INSTR_PREFETCH)
+void omap4_l2c310_write_sec(unsigned long val, unsigned reg);
 #else
 static inline int omap_l2_cache_init(void)
 {
 	return 0;
 }
+
+#define OMAP_L2C_AUX_CTRL	0
+#define omap4_l2c310_write_sec	NULL
 #endif
 extern void omap5_realtime_timer_init(void);
 

+ 1 - 15
arch/arm/mach-omap2/omap4-common.c

@@ -166,7 +166,7 @@ void __iomem *omap4_get_l2cache_base(void)
 	return l2cache_base;
 }
 
-static void omap4_l2c310_write_sec(unsigned long val, unsigned reg)
+void omap4_l2c310_write_sec(unsigned long val, unsigned reg)
 {
 	unsigned smc_op;
 
@@ -201,24 +201,10 @@ static void omap4_l2c310_write_sec(unsigned long val, unsigned reg)
 
 int __init omap_l2_cache_init(void)
 {
-	u32 aux_ctrl;
-
 	/* Static mapping, never released */
 	l2cache_base = ioremap(OMAP44XX_L2CACHE_BASE, SZ_4K);
 	if (WARN_ON(!l2cache_base))
 		return -ENOMEM;
-
-	/* 16-way associativity, parity disabled, way size - 64KB (es2.0 +) */
-	aux_ctrl = L2C_AUX_CTRL_SHARED_OVERRIDE |
-		   L310_AUX_CTRL_DATA_PREFETCH |
-		   L310_AUX_CTRL_INSTR_PREFETCH;
-
-	outer_cache.write_sec = omap4_l2c310_write_sec;
-	if (of_have_populated_dt())
-		l2x0_of_init(aux_ctrl, 0xcf9fffff);
-	else
-		l2x0_init(l2cache_base, aux_ctrl, 0xcf9fffff);
-
 	return 0;
 }
 #endif

+ 2 - 2
arch/arm/mach-qcom/platsmp.c

@@ -44,7 +44,7 @@
 #define APCS_SAW2_VCTL		0x14
 #define APCS_SAW2_2_VCTL	0x1c
 
-extern void secondary_startup(void);
+extern void secondary_startup_arm(void);
 
 static DEFINE_SPINLOCK(boot_lock);
 
@@ -337,7 +337,7 @@ static void __init qcom_smp_prepare_cpus(unsigned int max_cpus)
 		flags |= cold_boot_flags[map];
 	}
 
-	if (scm_set_boot_addr(virt_to_phys(secondary_startup), flags)) {
+	if (scm_set_boot_addr(virt_to_phys(secondary_startup_arm), flags)) {
 		for_each_present_cpu(cpu) {
 			if (cpu == smp_processor_id())
 				continue;

+ 1 - 1
arch/arm/mach-sa1100/Makefile

@@ -3,7 +3,7 @@
 #
 
 # Common support
-obj-y := clock.o generic.o irq.o time.o #nmi-oopser.o
+obj-y := clock.o generic.o irq.o #nmi-oopser.o
 
 # Specific board support
 obj-$(CONFIG_SA1100_ASSABET)		+= assabet.o

+ 12 - 0
arch/arm/mach-sa1100/clock.c

@@ -119,6 +119,17 @@ static DEFINE_CLK(gpio27, &clk_gpio27_ops);
 
 static DEFINE_CLK(cpu, &clk_cpu_ops);
 
+static unsigned long clk_36864_get_rate(struct clk *clk)
+{
+	return 3686400;
+}
+
+static struct clkops clk_36864_ops = {
+	.get_rate	= clk_36864_get_rate,
+};
+
+static DEFINE_CLK(36864, &clk_36864_ops);
+
 static struct clk_lookup sa11xx_clkregs[] = {
 	CLKDEV_INIT("sa1111.0", NULL, &clk_gpio27),
 	CLKDEV_INIT("sa1100-rtc", NULL, NULL),
@@ -126,6 +137,7 @@ static struct clk_lookup sa11xx_clkregs[] = {
 	CLKDEV_INIT("sa11x0-pcmcia", NULL, &clk_cpu),
 	/* sa1111 names devices using internal offsets, PCMCIA is at 0x1800 */
 	CLKDEV_INIT("1800", NULL, &clk_cpu),
+	CLKDEV_INIT(NULL, "OSTIMER0", &clk_36864),
 };
 
 static int __init sa11xx_clk_init(void)

+ 1 - 2
arch/arm/mach-sa1100/collie.c

@@ -371,8 +371,7 @@ static void __init collie_init(void)
 		PPC_LDD6 | PPC_LDD7 | PPC_L_PCLK | PPC_L_LCLK | PPC_L_FCLK | PPC_L_BIAS |
 		PPC_TXD1 | PPC_TXD2 | PPC_TXD3 | PPC_TXD4 | PPC_SCLK | PPC_SFRM;
 
-	PWER = _COLLIE_GPIO_AC_IN | _COLLIE_GPIO_CO | _COLLIE_GPIO_ON_KEY |
-		_COLLIE_GPIO_WAKEUP | _COLLIE_GPIO_nREMOCON_INT | PWER_RTC;
+	PWER = 0;
 
 	PGSR = _COLLIE_GPIO_nREMOCON_ON;
 

+ 6 - 0
arch/arm/mach-sa1100/generic.c

@@ -33,6 +33,7 @@
 #include <mach/irqs.h>
 
 #include "generic.h"
+#include <clocksource/pxa.h>
 
 unsigned int reset_status;
 EXPORT_SYMBOL(reset_status);
@@ -369,6 +370,11 @@ void __init sa1100_map_io(void)
 	iotable_init(standard_io_desc, ARRAY_SIZE(standard_io_desc));
 }
 
+void __init sa1100_timer_init(void)
+{
+	pxa_timer_nodt_init(IRQ_OST0, io_p2v(0x90000000), 3686400);
+}
+
 /*
  * Disable the memory bus request/grant signals on the SA1110 to
  * ensure that we don't receive spurious memory requests.  We set

+ 42 - 31
arch/arm/mach-sa1100/include/mach/irqs.h

@@ -8,17 +8,17 @@
  * 2001/11/14	RMK	Cleaned up and standardised a lot of the IRQs.
  */
 
-#define	IRQ_GPIO0		1
-#define	IRQ_GPIO1		2
-#define	IRQ_GPIO2		3
-#define	IRQ_GPIO3		4
-#define	IRQ_GPIO4		5
-#define	IRQ_GPIO5		6
-#define	IRQ_GPIO6		7
-#define	IRQ_GPIO7		8
-#define	IRQ_GPIO8		9
-#define	IRQ_GPIO9		10
-#define	IRQ_GPIO10		11
+#define	IRQ_GPIO0_SC		1
+#define	IRQ_GPIO1_SC		2
+#define	IRQ_GPIO2_SC		3
+#define	IRQ_GPIO3_SC		4
+#define	IRQ_GPIO4_SC		5
+#define	IRQ_GPIO5_SC		6
+#define	IRQ_GPIO6_SC		7
+#define	IRQ_GPIO7_SC		8
+#define	IRQ_GPIO8_SC		9
+#define	IRQ_GPIO9_SC		10
+#define	IRQ_GPIO10_SC		11
 #define	IRQ_GPIO11_27		12
 #define	IRQ_LCD			13	/* LCD controller           */
 #define	IRQ_Ser0UDC		14	/* Ser. port 0 UDC          */
@@ -41,32 +41,43 @@
 #define	IRQ_RTC1Hz		31	/* RTC 1 Hz clock           */
 #define	IRQ_RTCAlrm		32	/* RTC Alarm                */
 
-#define	IRQ_GPIO11		33
-#define	IRQ_GPIO12		34
-#define	IRQ_GPIO13		35
-#define	IRQ_GPIO14		36
-#define	IRQ_GPIO15		37
-#define	IRQ_GPIO16		38
-#define	IRQ_GPIO17		39
-#define	IRQ_GPIO18		40
-#define	IRQ_GPIO19		41
-#define	IRQ_GPIO20		42
-#define	IRQ_GPIO21		43
-#define	IRQ_GPIO22		44
-#define	IRQ_GPIO23		45
-#define	IRQ_GPIO24		46
-#define	IRQ_GPIO25		47
-#define	IRQ_GPIO26		48
-#define	IRQ_GPIO27		49
+#define	IRQ_GPIO0		33
+#define	IRQ_GPIO1		34
+#define	IRQ_GPIO2		35
+#define	IRQ_GPIO3		36
+#define	IRQ_GPIO4		37
+#define	IRQ_GPIO5		38
+#define	IRQ_GPIO6		39
+#define	IRQ_GPIO7		40
+#define	IRQ_GPIO8		41
+#define	IRQ_GPIO9		42
+#define	IRQ_GPIO10		43
+#define	IRQ_GPIO11		44
+#define	IRQ_GPIO12		45
+#define	IRQ_GPIO13		46
+#define	IRQ_GPIO14		47
+#define	IRQ_GPIO15		48
+#define	IRQ_GPIO16		49
+#define	IRQ_GPIO17		50
+#define	IRQ_GPIO18		51
+#define	IRQ_GPIO19		52
+#define	IRQ_GPIO20		53
+#define	IRQ_GPIO21		54
+#define	IRQ_GPIO22		55
+#define	IRQ_GPIO23		56
+#define	IRQ_GPIO24		57
+#define	IRQ_GPIO25		58
+#define	IRQ_GPIO26		59
+#define	IRQ_GPIO27		60
 
 /*
  * The next 16 interrupts are for board specific purposes.  Since
  * the kernel can only run on one machine at a time, we can re-use
  * these.  If you need more, increase IRQ_BOARD_END, but keep it
- * within sensible limits.  IRQs 49 to 64 are available.
+ * within sensible limits.  IRQs 61 to 76 are available.
  */
-#define IRQ_BOARD_START		50
-#define IRQ_BOARD_END		66
+#define IRQ_BOARD_START		61
+#define IRQ_BOARD_END		77
 
 /*
  * Figure out the MAX IRQ number.

+ 4 - 199
arch/arm/mach-sa1100/irq.c

@@ -80,170 +80,6 @@ static struct irq_domain_ops sa1100_normal_irqdomain_ops = {
 
 static struct irq_domain *sa1100_normal_irqdomain;
 
-/*
- * SA1100 GPIO edge detection for IRQs:
- * IRQs are generated on Falling-Edge, Rising-Edge, or both.
- * Use this instead of directly setting GRER/GFER.
- */
-static int GPIO_IRQ_rising_edge;
-static int GPIO_IRQ_falling_edge;
-static int GPIO_IRQ_mask = (1 << 11) - 1;
-
-static int sa1100_gpio_type(struct irq_data *d, unsigned int type)
-{
-	unsigned int mask;
-
-	mask = BIT(d->hwirq);
-
-	if (type == IRQ_TYPE_PROBE) {
-		if ((GPIO_IRQ_rising_edge | GPIO_IRQ_falling_edge) & mask)
-			return 0;
-		type = IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING;
-	}
-
-	if (type & IRQ_TYPE_EDGE_RISING) {
-		GPIO_IRQ_rising_edge |= mask;
-	} else
-		GPIO_IRQ_rising_edge &= ~mask;
-	if (type & IRQ_TYPE_EDGE_FALLING) {
-		GPIO_IRQ_falling_edge |= mask;
-	} else
-		GPIO_IRQ_falling_edge &= ~mask;
-
-	GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask;
-	GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask;
-
-	return 0;
-}
-
-/*
- * GPIO IRQs must be acknowledged.
- */
-static void sa1100_gpio_ack(struct irq_data *d)
-{
-	GEDR = BIT(d->hwirq);
-}
-
-static int sa1100_gpio_wake(struct irq_data *d, unsigned int on)
-{
-	if (on)
-		PWER |= BIT(d->hwirq);
-	else
-		PWER &= ~BIT(d->hwirq);
-	return 0;
-}
-
-/*
- * This is for IRQs from 0 to 10.
- */
-static struct irq_chip sa1100_low_gpio_chip = {
-	.name		= "GPIO-l",
-	.irq_ack	= sa1100_gpio_ack,
-	.irq_mask	= sa1100_mask_irq,
-	.irq_unmask	= sa1100_unmask_irq,
-	.irq_set_type	= sa1100_gpio_type,
-	.irq_set_wake	= sa1100_gpio_wake,
-};
-
-static int sa1100_low_gpio_irqdomain_map(struct irq_domain *d,
-		unsigned int irq, irq_hw_number_t hwirq)
-{
-	irq_set_chip_and_handler(irq, &sa1100_low_gpio_chip,
-				 handle_edge_irq);
-	set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-
-	return 0;
-}
-
-static struct irq_domain_ops sa1100_low_gpio_irqdomain_ops = {
-	.map = sa1100_low_gpio_irqdomain_map,
-	.xlate = irq_domain_xlate_onetwocell,
-};
-
-static struct irq_domain *sa1100_low_gpio_irqdomain;
-
-/*
- * IRQ11 (GPIO11 through 27) handler.  We enter here with the
- * irq_controller_lock held, and IRQs disabled.  Decode the IRQ
- * and call the handler.
- */
-static void
-sa1100_high_gpio_handler(unsigned int irq, struct irq_desc *desc)
-{
-	unsigned int mask;
-
-	mask = GEDR & 0xfffff800;
-	do {
-		/*
-		 * clear down all currently active IRQ sources.
-		 * We will be processing them all.
-		 */
-		GEDR = mask;
-
-		irq = IRQ_GPIO11;
-		mask >>= 11;
-		do {
-			if (mask & 1)
-				generic_handle_irq(irq);
-			mask >>= 1;
-			irq++;
-		} while (mask);
-
-		mask = GEDR & 0xfffff800;
-	} while (mask);
-}
-
-/*
- * Like GPIO0 to 10, GPIO11-27 IRQs need to be handled specially.
- * In addition, the IRQs are all collected up into one bit in the
- * interrupt controller registers.
- */
-static void sa1100_high_gpio_mask(struct irq_data *d)
-{
-	unsigned int mask = BIT(d->hwirq);
-
-	GPIO_IRQ_mask &= ~mask;
-
-	GRER &= ~mask;
-	GFER &= ~mask;
-}
-
-static void sa1100_high_gpio_unmask(struct irq_data *d)
-{
-	unsigned int mask = BIT(d->hwirq);
-
-	GPIO_IRQ_mask |= mask;
-
-	GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask;
-	GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask;
-}
-
-static struct irq_chip sa1100_high_gpio_chip = {
-	.name		= "GPIO-h",
-	.irq_ack	= sa1100_gpio_ack,
-	.irq_mask	= sa1100_high_gpio_mask,
-	.irq_unmask	= sa1100_high_gpio_unmask,
-	.irq_set_type	= sa1100_gpio_type,
-	.irq_set_wake	= sa1100_gpio_wake,
-};
-
-static int sa1100_high_gpio_irqdomain_map(struct irq_domain *d,
-		unsigned int irq, irq_hw_number_t hwirq)
-{
-	irq_set_chip_and_handler(irq, &sa1100_high_gpio_chip,
-				 handle_edge_irq);
-	set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-
-	return 0;
-}
-
-static struct irq_domain_ops sa1100_high_gpio_irqdomain_ops = {
-	.map = sa1100_high_gpio_irqdomain_map,
-	.xlate = irq_domain_xlate_onetwocell,
-};
-
-static struct irq_domain *sa1100_high_gpio_irqdomain;
-
 static struct resource irq_resource =
 	DEFINE_RES_MEM_NAMED(0x90050000, SZ_64K, "irqs");
 
@@ -270,17 +106,6 @@ static int sa1100irq_suspend(void)
 		  IC_GPIO6|IC_GPIO5|IC_GPIO4|IC_GPIO3|IC_GPIO2|
 		  IC_GPIO1|IC_GPIO0);
 
-	/*
-	 * Set the appropriate edges for wakeup.
-	 */
-	GRER = PWER & GPIO_IRQ_rising_edge;
-	GFER = PWER & GPIO_IRQ_falling_edge;
-	
-	/*
-	 * Clear any pending GPIO interrupts.
-	 */
-	GEDR = GEDR;
-
 	return 0;
 }
 
@@ -292,9 +117,6 @@ static void sa1100irq_resume(void)
 		ICCR = st->iccr;
 		ICLR = st->iclr;
 
-		GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask;
-		GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask;
-
 		ICMR = st->icmr;
 	}
 }
@@ -325,7 +147,8 @@ sa1100_handle_irq(struct pt_regs *regs)
 		if (mask == 0)
 			break;
 
-		handle_IRQ(ffs(mask) - 1 + IRQ_GPIO0, regs);
+		handle_domain_irq(sa1100_normal_irqdomain,
+				ffs(mask) - 1, regs);
 	} while (1);
 }
 
@@ -339,34 +162,16 @@ void __init sa1100_init_irq(void)
 	/* all IRQs are IRQ, not FIQ */
 	ICLR = 0;
 
-	/* clear all GPIO edge detects */
-	GFER = 0;
-	GRER = 0;
-	GEDR = -1;
-
 	/*
 	 * Whatever the doc says, this has to be set for the wait-on-irq
 	 * instruction to work... on a SA1100 rev 9 at least.
 	 */
 	ICCR = 1;
 
-	sa1100_low_gpio_irqdomain = irq_domain_add_legacy(NULL,
-			11, IRQ_GPIO0, 0,
-			&sa1100_low_gpio_irqdomain_ops, NULL);
-
-	sa1100_normal_irqdomain = irq_domain_add_legacy(NULL,
-			21, IRQ_GPIO11_27, 11,
+	sa1100_normal_irqdomain = irq_domain_add_simple(NULL,
+			32, IRQ_GPIO0_SC,
 			&sa1100_normal_irqdomain_ops, NULL);
 
-	sa1100_high_gpio_irqdomain = irq_domain_add_legacy(NULL,
-			17, IRQ_GPIO11, 11,
-			&sa1100_high_gpio_irqdomain_ops, NULL);
-
-	/*
-	 * Install handler for GPIO 11-27 edge detect interrupts
-	 */
-	irq_set_chained_handler(IRQ_GPIO11_27, sa1100_high_gpio_handler);
-
 	set_handle_irq(sa1100_handle_irq);
 
 	sa1100_init_gpio();

+ 1 - 0
arch/arm/mach-sa1100/pm.c

@@ -81,6 +81,7 @@ static int sa11x0_pm_enter(suspend_state_t state)
 	/*
 	 * Ensure not to come back here if it wasn't intended
 	 */
+	RCSR = RCSR_SMR;
 	PSPR = 0;
 
 	/*

+ 0 - 139
arch/arm/mach-sa1100/time.c

@@ -1,139 +0,0 @@
-/*
- * linux/arch/arm/mach-sa1100/time.c
- *
- * Copyright (C) 1998 Deborah Wallach.
- * Twiddles  (C) 1999 Hugo Fiennes <hugo@empeg.com>
- *
- * 2000/03/29 (C) Nicolas Pitre <nico@fluxnic.net>
- *	Rewritten: big cleanup, much simpler, better HZ accuracy.
- *
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/timex.h>
-#include <linux/clockchips.h>
-#include <linux/sched_clock.h>
-
-#include <asm/mach/time.h>
-#include <mach/hardware.h>
-#include <mach/irqs.h>
-
-#define SA1100_CLOCK_FREQ 3686400
-#define SA1100_LATCH DIV_ROUND_CLOSEST(SA1100_CLOCK_FREQ, HZ)
-
-static u64 notrace sa1100_read_sched_clock(void)
-{
-	return readl_relaxed(OSCR);
-}
-
-#define MIN_OSCR_DELTA 2
-
-static irqreturn_t sa1100_ost0_interrupt(int irq, void *dev_id)
-{
-	struct clock_event_device *c = dev_id;
-
-	/* Disarm the compare/match, signal the event. */
-	writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER);
-	writel_relaxed(OSSR_M0, OSSR);
-	c->event_handler(c);
-
-	return IRQ_HANDLED;
-}
-
-static int
-sa1100_osmr0_set_next_event(unsigned long delta, struct clock_event_device *c)
-{
-	unsigned long next, oscr;
-
-	writel_relaxed(readl_relaxed(OIER) | OIER_E0, OIER);
-	next = readl_relaxed(OSCR) + delta;
-	writel_relaxed(next, OSMR0);
-	oscr = readl_relaxed(OSCR);
-
-	return (signed)(next - oscr) <= MIN_OSCR_DELTA ? -ETIME : 0;
-}
-
-static void
-sa1100_osmr0_set_mode(enum clock_event_mode mode, struct clock_event_device *c)
-{
-	switch (mode) {
-	case CLOCK_EVT_MODE_ONESHOT:
-	case CLOCK_EVT_MODE_UNUSED:
-	case CLOCK_EVT_MODE_SHUTDOWN:
-		writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER);
-		writel_relaxed(OSSR_M0, OSSR);
-		break;
-
-	case CLOCK_EVT_MODE_RESUME:
-	case CLOCK_EVT_MODE_PERIODIC:
-		break;
-	}
-}
-
-#ifdef CONFIG_PM
-unsigned long osmr[4], oier;
-
-static void sa1100_timer_suspend(struct clock_event_device *cedev)
-{
-	osmr[0] = readl_relaxed(OSMR0);
-	osmr[1] = readl_relaxed(OSMR1);
-	osmr[2] = readl_relaxed(OSMR2);
-	osmr[3] = readl_relaxed(OSMR3);
-	oier = readl_relaxed(OIER);
-}
-
-static void sa1100_timer_resume(struct clock_event_device *cedev)
-{
-	writel_relaxed(0x0f, OSSR);
-	writel_relaxed(osmr[0], OSMR0);
-	writel_relaxed(osmr[1], OSMR1);
-	writel_relaxed(osmr[2], OSMR2);
-	writel_relaxed(osmr[3], OSMR3);
-	writel_relaxed(oier, OIER);
-
-	/*
-	 * OSMR0 is the system timer: make sure OSCR is sufficiently behind
-	 */
-	writel_relaxed(OSMR0 - SA1100_LATCH, OSCR);
-}
-#else
-#define sa1100_timer_suspend NULL
-#define sa1100_timer_resume NULL
-#endif
-
-static struct clock_event_device ckevt_sa1100_osmr0 = {
-	.name		= "osmr0",
-	.features	= CLOCK_EVT_FEAT_ONESHOT,
-	.rating		= 200,
-	.set_next_event	= sa1100_osmr0_set_next_event,
-	.set_mode	= sa1100_osmr0_set_mode,
-	.suspend	= sa1100_timer_suspend,
-	.resume		= sa1100_timer_resume,
-};
-
-static struct irqaction sa1100_timer_irq = {
-	.name		= "ost0",
-	.flags		= IRQF_TIMER | IRQF_IRQPOLL,
-	.handler	= sa1100_ost0_interrupt,
-	.dev_id		= &ckevt_sa1100_osmr0,
-};
-
-void __init sa1100_timer_init(void)
-{
-	writel_relaxed(0, OIER);
-	writel_relaxed(OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3, OSSR);
-
-	sched_clock_register(sa1100_read_sched_clock, 32, 3686400);
-
-	ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
-
-	setup_irq(IRQ_OST0, &sa1100_timer_irq);
-
-	clocksource_mmio_init(OSCR, "oscr", SA1100_CLOCK_FREQ, 200, 32,
-		clocksource_mmio_readl_up);
-	clockevents_config_and_register(&ckevt_sa1100_osmr0, 3686400,
-					MIN_OSCR_DELTA * 2, 0x7fffffff);
-}

+ 1 - 0
arch/arm/mm/Kconfig

@@ -1012,6 +1012,7 @@ config ARCH_SUPPORTS_BIG_ENDIAN
 
 config ARM_KERNMEM_PERMS
 	bool "Restrict kernel memory permissions"
+	depends on MMU
 	help
 	  If this is set, kernel memory other than kernel text (and rodata)
 	  will be made non-executable. The tradeoff is that each region is

+ 230 - 209
arch/arm/mm/cache-l2x0.c

@@ -41,12 +41,14 @@ struct l2c_init_data {
 	void (*enable)(void __iomem *, u32, unsigned);
 	void (*fixup)(void __iomem *, u32, struct outer_cache_fns *);
 	void (*save)(void __iomem *);
+	void (*configure)(void __iomem *);
 	struct outer_cache_fns outer_cache;
 };
 
 #define CACHE_LINE_SIZE		32
 
 static void __iomem *l2x0_base;
+static const struct l2c_init_data *l2x0_data;
 static DEFINE_RAW_SPINLOCK(l2x0_lock);
 static u32 l2x0_way_mask;	/* Bitmask of active ways */
 static u32 l2x0_size;
@@ -106,6 +108,19 @@ static inline void l2c_unlock(void __iomem *base, unsigned num)
 	}
 }
 
+static void l2c_configure(void __iomem *base)
+{
+	if (outer_cache.configure) {
+		outer_cache.configure(&l2x0_saved_regs);
+		return;
+	}
+
+	if (l2x0_data->configure)
+		l2x0_data->configure(base);
+
+	l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL);
+}
+
 /*
  * Enable the L2 cache controller.  This function must only be
  * called when the cache controller is known to be disabled.
@@ -114,7 +129,12 @@ static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock)
 {
 	unsigned long flags;
 
-	l2c_write_sec(aux, base, L2X0_AUX_CTRL);
+	/* Do not touch the controller if already enabled. */
+	if (readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)
+		return;
+
+	l2x0_saved_regs.aux_ctrl = aux;
+	l2c_configure(base);
 
 	l2c_unlock(base, num_lock);
 
@@ -136,76 +156,14 @@ static void l2c_disable(void)
 	dsb(st);
 }
 
-#ifdef CONFIG_CACHE_PL310
-static inline void cache_wait(void __iomem *reg, unsigned long mask)
-{
-	/* cache operations by line are atomic on PL310 */
-}
-#else
-#define cache_wait	l2c_wait_mask
-#endif
-
-static inline void cache_sync(void)
-{
-	void __iomem *base = l2x0_base;
-
-	writel_relaxed(0, base + sync_reg_offset);
-	cache_wait(base + L2X0_CACHE_SYNC, 1);
-}
-
-#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915)
-static inline void debug_writel(unsigned long val)
-{
-	l2c_set_debug(l2x0_base, val);
-}
-#else
-/* Optimised out for non-errata case */
-static inline void debug_writel(unsigned long val)
-{
-}
-#endif
-
-static void l2x0_cache_sync(void)
-{
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	cache_sync();
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
-static void __l2x0_flush_all(void)
-{
-	debug_writel(0x03);
-	__l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY);
-	cache_sync();
-	debug_writel(0x00);
-}
-
-static void l2x0_flush_all(void)
-{
-	unsigned long flags;
-
-	/* clean all ways */
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	__l2x0_flush_all();
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-}
-
-static void l2x0_disable(void)
+static void l2c_save(void __iomem *base)
 {
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	__l2x0_flush_all();
-	l2c_write_sec(0, l2x0_base, L2X0_CTRL);
-	dsb(st);
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
 }
 
-static void l2c_save(void __iomem *base)
+static void l2c_resume(void)
 {
-	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+	l2c_enable(l2x0_base, l2x0_saved_regs.aux_ctrl, l2x0_data->num_lock);
 }
 
 /*
@@ -288,14 +246,6 @@ static void l2c210_sync(void)
 	__l2c210_cache_sync(l2x0_base);
 }
 
-static void l2c210_resume(void)
-{
-	void __iomem *base = l2x0_base;
-
-	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
-		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1);
-}
-
 static const struct l2c_init_data l2c210_data __initconst = {
 	.type = "L2C-210",
 	.way_size_0 = SZ_8K,
@@ -309,7 +259,7 @@ static const struct l2c_init_data l2c210_data __initconst = {
 		.flush_all = l2c210_flush_all,
 		.disable = l2c_disable,
 		.sync = l2c210_sync,
-		.resume = l2c210_resume,
+		.resume = l2c_resume,
 	},
 };
 
@@ -466,7 +416,7 @@ static const struct l2c_init_data l2c220_data = {
 		.flush_all = l2c220_flush_all,
 		.disable = l2c_disable,
 		.sync = l2c220_sync,
-		.resume = l2c210_resume,
+		.resume = l2c_resume,
 	},
 };
 
@@ -615,39 +565,29 @@ static void __init l2c310_save(void __iomem *base)
 							L310_POWER_CTRL);
 }
 
-static void l2c310_resume(void)
+static void l2c310_configure(void __iomem *base)
 {
-	void __iomem *base = l2x0_base;
+	unsigned revision;
 
-	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
-		unsigned revision;
-
-		/* restore pl310 setup */
-		writel_relaxed(l2x0_saved_regs.tag_latency,
-			       base + L310_TAG_LATENCY_CTRL);
-		writel_relaxed(l2x0_saved_regs.data_latency,
-			       base + L310_DATA_LATENCY_CTRL);
-		writel_relaxed(l2x0_saved_regs.filter_end,
-			       base + L310_ADDR_FILTER_END);
-		writel_relaxed(l2x0_saved_regs.filter_start,
-			       base + L310_ADDR_FILTER_START);
-
-		revision = readl_relaxed(base + L2X0_CACHE_ID) &
-				L2X0_CACHE_ID_RTL_MASK;
-
-		if (revision >= L310_CACHE_ID_RTL_R2P0)
-			l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base,
-				      L310_PREFETCH_CTRL);
-		if (revision >= L310_CACHE_ID_RTL_R3P0)
-			l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base,
-				      L310_POWER_CTRL);
-
-		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
-
-		/* Re-enable full-line-of-zeros for Cortex-A9 */
-		if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
-			set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
-	}
+	/* restore pl310 setup */
+	l2c_write_sec(l2x0_saved_regs.tag_latency, base,
+		      L310_TAG_LATENCY_CTRL);
+	l2c_write_sec(l2x0_saved_regs.data_latency, base,
+		      L310_DATA_LATENCY_CTRL);
+	l2c_write_sec(l2x0_saved_regs.filter_end, base,
+		      L310_ADDR_FILTER_END);
+	l2c_write_sec(l2x0_saved_regs.filter_start, base,
+		      L310_ADDR_FILTER_START);
+
+	revision = readl_relaxed(base + L2X0_CACHE_ID) &
+				 L2X0_CACHE_ID_RTL_MASK;
+
+	if (revision >= L310_CACHE_ID_RTL_R2P0)
+		l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base,
+			      L310_PREFETCH_CTRL);
+	if (revision >= L310_CACHE_ID_RTL_R3P0)
+		l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base,
+			      L310_POWER_CTRL);
 }
 
 static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data)
@@ -699,6 +639,23 @@ static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
 		aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP);
 	}
 
+	/* r3p0 or later has power control register */
+	if (rev >= L310_CACHE_ID_RTL_R3P0)
+		l2x0_saved_regs.pwr_ctrl = L310_DYNAMIC_CLK_GATING_EN |
+						L310_STNDBY_MODE_EN;
+
+	/*
+	 * Always enable non-secure access to the lockdown registers -
+	 * we write to them as part of the L2C enable sequence so they
+	 * need to be accessible.
+	 */
+	aux |= L310_AUX_CTRL_NS_LOCKDOWN;
+
+	l2c_enable(base, aux, num_lock);
+
+	/* Read back resulting AUX_CTRL value as it could have been altered. */
+	aux = readl_relaxed(base + L2X0_AUX_CTRL);
+
 	if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) {
 		u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL);
 
@@ -712,23 +669,12 @@ static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock)
 	if (rev >= L310_CACHE_ID_RTL_R3P0) {
 		u32 power_ctrl;
 
-		l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN,
-			      base, L310_POWER_CTRL);
 		power_ctrl = readl_relaxed(base + L310_POWER_CTRL);
 		pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n",
 			power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis",
 			power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis");
 	}
 
-	/*
-	 * Always enable non-secure access to the lockdown registers -
-	 * we write to them as part of the L2C enable sequence so they
-	 * need to be accessible.
-	 */
-	aux |= L310_AUX_CTRL_NS_LOCKDOWN;
-
-	l2c_enable(base, aux, num_lock);
-
 	if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) {
 		set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
 		cpu_notifier(l2c310_cpu_enable_flz, 0);
@@ -760,11 +706,11 @@ static void __init l2c310_fixup(void __iomem *base, u32 cache_id,
 
 	if (revision >= L310_CACHE_ID_RTL_R3P0 &&
 	    revision < L310_CACHE_ID_RTL_R3P2) {
-		u32 val = readl_relaxed(base + L310_PREFETCH_CTRL);
+		u32 val = l2x0_saved_regs.prefetch_ctrl;
 		/* I don't think bit23 is required here... but iMX6 does so */
 		if (val & (BIT(30) | BIT(23))) {
 			val &= ~(BIT(30) | BIT(23));
-			l2c_write_sec(val, base, L310_PREFETCH_CTRL);
+			l2x0_saved_regs.prefetch_ctrl = val;
 			errata[n++] = "752271";
 		}
 	}
@@ -800,6 +746,15 @@ static void l2c310_disable(void)
 	l2c_disable();
 }
 
+static void l2c310_resume(void)
+{
+	l2c_resume();
+
+	/* Re-enable full-line-of-zeros for Cortex-A9 */
+	if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
+		set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
+}
+
 static const struct l2c_init_data l2c310_init_fns __initconst = {
 	.type = "L2C-310",
 	.way_size_0 = SZ_8K,
@@ -807,6 +762,7 @@ static const struct l2c_init_data l2c310_init_fns __initconst = {
 	.enable = l2c310_enable,
 	.fixup = l2c310_fixup,
 	.save = l2c310_save,
+	.configure = l2c310_configure,
 	.outer_cache = {
 		.inv_range = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -818,13 +774,21 @@ static const struct l2c_init_data l2c310_init_fns __initconst = {
 	},
 };
 
-static void __init __l2c_init(const struct l2c_init_data *data,
-	u32 aux_val, u32 aux_mask, u32 cache_id)
+static int __init __l2c_init(const struct l2c_init_data *data,
+			     u32 aux_val, u32 aux_mask, u32 cache_id)
 {
 	struct outer_cache_fns fns;
 	unsigned way_size_bits, ways;
 	u32 aux, old_aux;
 
+	/*
+	 * Save the pointer globally so that callbacks which do not receive
+	 * context from callers can access the structure.
+	 */
+	l2x0_data = kmemdup(data, sizeof(*data), GFP_KERNEL);
+	if (!l2x0_data)
+		return -ENOMEM;
+
 	/*
 	 * Sanity check the aux values.  aux_mask is the bits we preserve
 	 * from reading the hardware register, and aux_val is the bits we
@@ -884,6 +848,7 @@ static void __init __l2c_init(const struct l2c_init_data *data,
 
 	fns = data->outer_cache;
 	fns.write_sec = outer_cache.write_sec;
+	fns.configure = outer_cache.configure;
 	if (data->fixup)
 		data->fixup(l2x0_base, cache_id, &fns);
 
@@ -910,6 +875,8 @@ static void __init __l2c_init(const struct l2c_init_data *data,
 		data->type, ways, l2x0_size >> 10);
 	pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n",
 		data->type, cache_id, aux);
+
+	return 0;
 }
 
 void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
@@ -936,6 +903,10 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
 		break;
 	}
 
+	/* Read back current (default) hardware configuration */
+	if (data->save)
+		data->save(l2x0_base);
+
 	__l2c_init(data, aux_val, aux_mask, cache_id);
 }
 
@@ -1102,7 +1073,7 @@ static const struct l2c_init_data of_l2c210_data __initconst = {
 		.flush_all   = l2c210_flush_all,
 		.disable     = l2c_disable,
 		.sync        = l2c210_sync,
-		.resume      = l2c210_resume,
+		.resume      = l2c_resume,
 	},
 };
 
@@ -1120,7 +1091,7 @@ static const struct l2c_init_data of_l2c220_data __initconst = {
 		.flush_all   = l2c220_flush_all,
 		.disable     = l2c_disable,
 		.sync        = l2c220_sync,
-		.resume      = l2c210_resume,
+		.resume      = l2c_resume,
 	},
 };
 
@@ -1131,32 +1102,32 @@ static void __init l2c310_of_parse(const struct device_node *np,
 	u32 tag[3] = { 0, 0, 0 };
 	u32 filter[2] = { 0, 0 };
 	u32 assoc;
+	u32 prefetch;
+	u32 val;
 	int ret;
 
 	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
 	if (tag[0] && tag[1] && tag[2])
-		writel_relaxed(
+		l2x0_saved_regs.tag_latency =
 			L310_LATENCY_CTRL_RD(tag[0] - 1) |
 			L310_LATENCY_CTRL_WR(tag[1] - 1) |
-			L310_LATENCY_CTRL_SETUP(tag[2] - 1),
-			l2x0_base + L310_TAG_LATENCY_CTRL);
+			L310_LATENCY_CTRL_SETUP(tag[2] - 1);
 
 	of_property_read_u32_array(np, "arm,data-latency",
 				   data, ARRAY_SIZE(data));
 	if (data[0] && data[1] && data[2])
-		writel_relaxed(
+		l2x0_saved_regs.data_latency =
 			L310_LATENCY_CTRL_RD(data[0] - 1) |
 			L310_LATENCY_CTRL_WR(data[1] - 1) |
-			L310_LATENCY_CTRL_SETUP(data[2] - 1),
-			l2x0_base + L310_DATA_LATENCY_CTRL);
+			L310_LATENCY_CTRL_SETUP(data[2] - 1);
 
 	of_property_read_u32_array(np, "arm,filter-ranges",
 				   filter, ARRAY_SIZE(filter));
 	if (filter[1]) {
-		writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M),
-			       l2x0_base + L310_ADDR_FILTER_END);
-		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN,
-			       l2x0_base + L310_ADDR_FILTER_START);
+		l2x0_saved_regs.filter_end =
+					ALIGN(filter[0] + filter[1], SZ_1M);
+		l2x0_saved_regs.filter_start = (filter[0] & ~(SZ_1M - 1))
+					| L310_ADDR_FILTER_EN;
 	}
 
 	ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K);
@@ -1178,6 +1149,58 @@ static void __init l2c310_of_parse(const struct device_node *np,
 		       assoc);
 		break;
 	}
+
+	prefetch = l2x0_saved_regs.prefetch_ctrl;
+
+	ret = of_property_read_u32(np, "arm,double-linefill", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,double-linefill property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,double-linefill-incr", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_INCR;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_INCR;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,double-linefill-incr property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,double-linefill-wrap", &val);
+	if (ret == 0) {
+		if (!val)
+			prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,double-linefill-wrap property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,prefetch-drop", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_PREFETCH_DROP;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_PREFETCH_DROP;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,prefetch-drop property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,prefetch-offset", &val);
+	if (ret == 0) {
+		prefetch &= ~L310_PREFETCH_CTRL_OFFSET_MASK;
+		prefetch |= val & L310_PREFETCH_CTRL_OFFSET_MASK;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,prefetch-offset property value is missing\n");
+	}
+
+	l2x0_saved_regs.prefetch_ctrl = prefetch;
 }
 
 static const struct l2c_init_data of_l2c310_data __initconst = {
@@ -1188,6 +1211,7 @@ static const struct l2c_init_data of_l2c310_data __initconst = {
 	.enable = l2c310_enable,
 	.fixup = l2c310_fixup,
 	.save  = l2c310_save,
+	.configure = l2c310_configure,
 	.outer_cache = {
 		.inv_range   = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -1216,6 +1240,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = {
 	.enable = l2c310_enable,
 	.fixup = l2c310_fixup,
 	.save  = l2c310_save,
+	.configure = l2c310_configure,
 	.outer_cache = {
 		.inv_range   = l2c210_inv_range,
 		.clean_range = l2c210_clean_range,
@@ -1231,7 +1256,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = {
  * noninclusive, while the hardware cache range operations use
  * inclusive start and end addresses.
  */
-static unsigned long calc_range_end(unsigned long start, unsigned long end)
+static unsigned long aurora_range_end(unsigned long start, unsigned long end)
 {
 	/*
 	 * Limit the number of cache lines processed at once,
@@ -1250,25 +1275,13 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end)
 	return end;
 }
 
-/*
- * Make sure 'start' and 'end' reference the same page, as L2 is PIPT
- * and range operations only do a TLB lookup on the start address.
- */
 static void aurora_pa_range(unsigned long start, unsigned long end,
-			unsigned long offset)
+			    unsigned long offset)
 {
+	void __iomem *base = l2x0_base;
+	unsigned long range_end;
 	unsigned long flags;
 
-	raw_spin_lock_irqsave(&l2x0_lock, flags);
-	writel_relaxed(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG);
-	writel_relaxed(end, l2x0_base + offset);
-	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
-
-	cache_sync();
-}
-
-static void aurora_inv_range(unsigned long start, unsigned long end)
-{
 	/*
 	 * round start and end adresses up to cache line size
 	 */
@@ -1276,15 +1289,24 @@ static void aurora_inv_range(unsigned long start, unsigned long end)
 	end = ALIGN(end, CACHE_LINE_SIZE);
 
 	/*
-	 * Invalidate all full cache lines between 'start' and 'end'.
+	 * perform operation on all full cache lines between 'start' and 'end'
 	 */
 	while (start < end) {
-		unsigned long range_end = calc_range_end(start, end);
-		aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
-				AURORA_INVAL_RANGE_REG);
+		range_end = aurora_range_end(start, end);
+
+		raw_spin_lock_irqsave(&l2x0_lock, flags);
+		writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG);
+		writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset);
+		raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+
+		writel_relaxed(0, base + AURORA_SYNC_REG);
 		start = range_end;
 	}
 }
+static void aurora_inv_range(unsigned long start, unsigned long end)
+{
+	aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
+}
 
 static void aurora_clean_range(unsigned long start, unsigned long end)
 {
@@ -1292,52 +1314,53 @@ static void aurora_clean_range(unsigned long start, unsigned long end)
 	 * If L2 is forced to WT, the L2 will always be clean and we
 	 * don't need to do anything here.
 	 */
-	if (!l2_wt_override) {
-		start &= ~(CACHE_LINE_SIZE - 1);
-		end = ALIGN(end, CACHE_LINE_SIZE);
-		while (start != end) {
-			unsigned long range_end = calc_range_end(start, end);
-			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
-					AURORA_CLEAN_RANGE_REG);
-			start = range_end;
-		}
-	}
+	if (!l2_wt_override)
+		aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG);
 }
 
 static void aurora_flush_range(unsigned long start, unsigned long end)
 {
-	start &= ~(CACHE_LINE_SIZE - 1);
-	end = ALIGN(end, CACHE_LINE_SIZE);
-	while (start != end) {
-		unsigned long range_end = calc_range_end(start, end);
-		/*
-		 * If L2 is forced to WT, the L2 will always be clean and we
-		 * just need to invalidate.
-		 */
-		if (l2_wt_override)
-			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
-							AURORA_INVAL_RANGE_REG);
-		else
-			aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
-							AURORA_FLUSH_RANGE_REG);
-		start = range_end;
-	}
+	if (l2_wt_override)
+		aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
+	else
+		aurora_pa_range(start, end, AURORA_FLUSH_RANGE_REG);
 }
 
-static void aurora_save(void __iomem *base)
+static void aurora_flush_all(void)
 {
-	l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
-	l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
+	void __iomem *base = l2x0_base;
+	unsigned long flags;
+
+	/* clean all ways */
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	__l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+
+	writel_relaxed(0, base + AURORA_SYNC_REG);
 }
 
-static void aurora_resume(void)
+static void aurora_cache_sync(void)
+{
+	writel_relaxed(0, l2x0_base + AURORA_SYNC_REG);
+}
+
+static void aurora_disable(void)
 {
 	void __iomem *base = l2x0_base;
+	unsigned long flags;
 
-	if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
-		writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL);
-		writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL);
-	}
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	__l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+	writel_relaxed(0, base + AURORA_SYNC_REG);
+	l2c_write_sec(0, base, L2X0_CTRL);
+	dsb(st);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void aurora_save(void __iomem *base)
+{
+	l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
+	l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
 }
 
 /*
@@ -1398,10 +1421,10 @@ static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
 		.inv_range   = aurora_inv_range,
 		.clean_range = aurora_clean_range,
 		.flush_range = aurora_flush_range,
-		.flush_all   = l2x0_flush_all,
-		.disable     = l2x0_disable,
-		.sync        = l2x0_cache_sync,
-		.resume      = aurora_resume,
+		.flush_all   = aurora_flush_all,
+		.disable     = aurora_disable,
+		.sync	     = aurora_cache_sync,
+		.resume      = l2c_resume,
 	},
 };
 
@@ -1414,7 +1437,7 @@ static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
 	.fixup = aurora_fixup,
 	.save  = aurora_save,
 	.outer_cache = {
-		.resume      = aurora_resume,
+		.resume      = l2c_resume,
 	},
 };
 
@@ -1562,6 +1585,7 @@ static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
 	.of_parse = l2c310_of_parse,
 	.enable = l2c310_enable,
 	.save  = l2c310_save,
+	.configure = l2c310_configure,
 	.outer_cache = {
 		.inv_range   = bcm_inv_range,
 		.clean_range = bcm_clean_range,
@@ -1583,18 +1607,12 @@ static void __init tauros3_save(void __iomem *base)
 		readl_relaxed(base + L310_PREFETCH_CTRL);
 }
 
-static void tauros3_resume(void)
+static void tauros3_configure(void __iomem *base)
 {
-	void __iomem *base = l2x0_base;
-
-	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) {
-		writel_relaxed(l2x0_saved_regs.aux2_ctrl,
-			       base + TAUROS3_AUX2_CTRL);
-		writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
-			       base + L310_PREFETCH_CTRL);
-
-		l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8);
-	}
+	writel_relaxed(l2x0_saved_regs.aux2_ctrl,
+		       base + TAUROS3_AUX2_CTRL);
+	writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
+		       base + L310_PREFETCH_CTRL);
 }
 
 static const struct l2c_init_data of_tauros3_data __initconst = {
@@ -1603,9 +1621,10 @@ static const struct l2c_init_data of_tauros3_data __initconst = {
 	.num_lock = 8,
 	.enable = l2c_enable,
 	.save  = tauros3_save,
+	.configure = tauros3_configure,
 	/* Tauros3 broadcasts L1 cache operations to L2 */
 	.outer_cache = {
-		.resume      = tauros3_resume,
+		.resume      = l2c_resume,
 	},
 };
 
@@ -1661,6 +1680,10 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 	if (!of_property_read_bool(np, "cache-unified"))
 		pr_err("L2C: device tree omits to specify unified cache\n");
 
+	/* Read back current (default) hardware configuration */
+	if (data->save)
+		data->save(l2x0_base);
+
 	/* L2 configuration can only be changed if the cache is disabled */
 	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
 		if (data->of_parse)
@@ -1671,8 +1694,6 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
 	else
 		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
 
-	__l2c_init(data, aux_val, aux_mask, cache_id);
-
-	return 0;
+	return __l2c_init(data, aux_val, aux_mask, cache_id);
 }
 #endif

+ 11 - 15
arch/arm/mm/context.c

@@ -144,21 +144,17 @@ static void flush_context(unsigned int cpu)
 	/* Update the list of reserved ASIDs and the ASID bitmap. */
 	bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
 	for_each_possible_cpu(i) {
-		if (i == cpu) {
-			asid = 0;
-		} else {
-			asid = atomic64_xchg(&per_cpu(active_asids, i), 0);
-			/*
-			 * If this CPU has already been through a
-			 * rollover, but hasn't run another task in
-			 * the meantime, we must preserve its reserved
-			 * ASID, as this is the only trace we have of
-			 * the process it is still running.
-			 */
-			if (asid == 0)
-				asid = per_cpu(reserved_asids, i);
-			__set_bit(asid & ~ASID_MASK, asid_map);
-		}
+		asid = atomic64_xchg(&per_cpu(active_asids, i), 0);
+		/*
+		 * If this CPU has already been through a
+		 * rollover, but hasn't run another task in
+		 * the meantime, we must preserve its reserved
+		 * ASID, as this is the only trace we have of
+		 * the process it is still running.
+		 */
+		if (asid == 0)
+			asid = per_cpu(reserved_asids, i);
+		__set_bit(asid & ~ASID_MASK, asid_map);
 		per_cpu(reserved_asids, i) = asid;
 	}
 

+ 3 - 0
arch/arm/mm/dma-mapping.c

@@ -2025,6 +2025,9 @@ static void arm_teardown_iommu_dma_ops(struct device *dev)
 {
 	struct dma_iommu_mapping *mapping = dev->archdata.mapping;
 
+	if (!mapping)
+		return;
+
 	arm_iommu_detach_device(dev);
 	arm_iommu_release_mapping(mapping);
 }

+ 2 - 7
arch/arm/mm/dump.c

@@ -220,9 +220,6 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, u
 	static const char units[] = "KMGTPE";
 	u64 prot = val & pg_level[level].mask;
 
-	if (addr < USER_PGTABLES_CEILING)
-		return;
-
 	if (!st->level) {
 		st->level = level;
 		st->current_prot = prot;
@@ -308,15 +305,13 @@ static void walk_pgd(struct seq_file *m)
 	pgd_t *pgd = swapper_pg_dir;
 	struct pg_state st;
 	unsigned long addr;
-	unsigned i, pgdoff = USER_PGTABLES_CEILING / PGDIR_SIZE;
+	unsigned i;
 
 	memset(&st, 0, sizeof(st));
 	st.seq = m;
 	st.marker = address_markers;
 
-	pgd += pgdoff;
-
-	for (i = pgdoff; i < PTRS_PER_PGD; i++, pgd++) {
+	for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
 		addr = i * PGDIR_SIZE;
 		if (!pgd_none(*pgd)) {
 			walk_pud(&st, pgd, addr);

+ 3 - 6
arch/arm/mm/init.c

@@ -319,10 +319,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc)
 
 	early_init_fdt_scan_reserved_mem();
 
-	/*
-	 * reserve memory for DMA contigouos allocations,
-	 * must come from DMA area inside low memory
-	 */
+	/* reserve memory for DMA contiguous allocations */
 	dma_contiguous_reserve(arm_dma_limit);
 
 	arm_memblock_steal_permitted = false;
@@ -658,8 +655,8 @@ static struct section_perm ro_perms[] = {
 		.start  = (unsigned long)_stext,
 		.end    = (unsigned long)__init_begin,
 #ifdef CONFIG_ARM_LPAE
-		.mask   = ~PMD_SECT_RDONLY,
-		.prot   = PMD_SECT_RDONLY,
+		.mask   = ~L_PMD_SECT_RDONLY,
+		.prot   = L_PMD_SECT_RDONLY,
 #else
 		.mask   = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE),
 		.prot   = PMD_SECT_APX | PMD_SECT_AP_WRITE,

+ 2 - 2
arch/arm/mm/mmu.c

@@ -1329,8 +1329,8 @@ static void __init kmap_init(void)
 static void __init map_lowmem(void)
 {
 	struct memblock_region *reg;
-	unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
-	unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
+	phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
+	phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
 
 	/* Map all the lowmem memory banks. */
 	for_each_memblock(memory, reg) {

+ 1 - 0
arch/arm64/Kconfig

@@ -39,6 +39,7 @@ config ARM64
 	select HARDIRQS_SW_RESEND
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_BITREVERSE
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_SECCOMP_FILTER

+ 19 - 0
arch/arm64/include/asm/bitrev.h

@@ -0,0 +1,19 @@
+#ifndef __ASM_BITREV_H
+#define __ASM_BITREV_H
+static __always_inline __attribute_const__ u32 __arch_bitrev32(u32 x)
+{
+	__asm__ ("rbit %w0, %w1" : "=r" (x) : "r" (x));
+	return x;
+}
+
+static __always_inline __attribute_const__ u16 __arch_bitrev16(u16 x)
+{
+	return __arch_bitrev32((u32)x) >> 16;
+}
+
+static __always_inline __attribute_const__ u8 __arch_bitrev8(u8 x)
+{
+	return __arch_bitrev32((u32)x) >> 24;
+}
+
+#endif

+ 47 - 0
drivers/amba/bus.c

@@ -18,6 +18,7 @@
 #include <linux/pm_domain.h>
 #include <linux/amba/bus.h>
 #include <linux/sizes.h>
+#include <linux/limits.h>
 
 #include <asm/irq.h>
 
@@ -43,6 +44,10 @@ static int amba_match(struct device *dev, struct device_driver *drv)
 	struct amba_device *pcdev = to_amba_device(dev);
 	struct amba_driver *pcdrv = to_amba_driver(drv);
 
+	/* When driver_override is set, only bind to the matching driver */
+	if (pcdev->driver_override)
+		return !strcmp(pcdev->driver_override, drv->name);
+
 	return amba_lookup(pcdrv->id_table, pcdev) != NULL;
 }
 
@@ -59,6 +64,47 @@ static int amba_uevent(struct device *dev, struct kobj_uevent_env *env)
 	return retval;
 }
 
+static ssize_t driver_override_show(struct device *_dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct amba_device *dev = to_amba_device(_dev);
+
+	if (!dev->driver_override)
+		return 0;
+
+	return sprintf(buf, "%s\n", dev->driver_override);
+}
+
+static ssize_t driver_override_store(struct device *_dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct amba_device *dev = to_amba_device(_dev);
+	char *driver_override, *old = dev->driver_override, *cp;
+
+	if (count > PATH_MAX)
+		return -EINVAL;
+
+	driver_override = kstrndup(buf, count, GFP_KERNEL);
+	if (!driver_override)
+		return -ENOMEM;
+
+	cp = strchr(driver_override, '\n');
+	if (cp)
+		*cp = '\0';
+
+	if (strlen(driver_override)) {
+		dev->driver_override = driver_override;
+	} else {
+	       kfree(driver_override);
+	       dev->driver_override = NULL;
+	}
+
+	kfree(old);
+
+	return count;
+}
+
 #define amba_attr_func(name,fmt,arg...)					\
 static ssize_t name##_show(struct device *_dev,				\
 			   struct device_attribute *attr, char *buf)	\
@@ -81,6 +127,7 @@ amba_attr_func(resource, "\t%016llx\t%016llx\t%016lx\n",
 static struct device_attribute amba_dev_attrs[] = {
 	__ATTR_RO(id),
 	__ATTR_RO(resource),
+	__ATTR_RW(driver_override),
 	__ATTR_NULL,
 };
 

+ 7 - 0
drivers/clocksource/Kconfig

@@ -229,4 +229,11 @@ config CLKSRC_MIPS_GIC
 	depends on MIPS_GIC
 	select CLKSRC_OF
 
+config CLKSRC_PXA
+	def_bool y if ARCH_PXA || ARCH_SA1100
+	select CLKSRC_OF if USE_OF
+	help
+	  This enables OST0 support available on PXA and SA-11x0
+	  platforms.
+
 endmenu

+ 1 - 1
drivers/clocksource/Makefile

@@ -21,7 +21,7 @@ obj-$(CONFIG_ARCH_CLPS711X)	+= clps711x-timer.o
 obj-$(CONFIG_ARCH_MARCO)	+= timer-marco.o
 obj-$(CONFIG_ARCH_MOXART)	+= moxart_timer.o
 obj-$(CONFIG_ARCH_MXS)		+= mxs_timer.o
-obj-$(CONFIG_ARCH_PXA)		+= pxa_timer.o
+obj-$(CONFIG_CLKSRC_PXA)	+= pxa_timer.o
 obj-$(CONFIG_ARCH_PRIMA2)	+= timer-prima2.o
 obj-$(CONFIG_ARCH_U300)		+= timer-u300.o
 obj-$(CONFIG_SUN4I_TIMER)	+= sun4i_timer.o

+ 198 - 1
drivers/gpio/gpio-sa1100.c

@@ -11,6 +11,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/syscore_ops.h>
 #include <mach/hardware.h>
 #include <mach/irqs.h>
 
@@ -50,7 +51,7 @@ static int sa1100_direction_output(struct gpio_chip *chip, unsigned offset, int
 
 static int sa1100_to_irq(struct gpio_chip *chip, unsigned offset)
 {
-	return offset < 11 ? (IRQ_GPIO0 + offset) : (IRQ_GPIO11 - 11 + offset);
+	return IRQ_GPIO0 + offset;
 }
 
 static struct gpio_chip sa1100_gpio_chip = {
@@ -64,7 +65,203 @@ static struct gpio_chip sa1100_gpio_chip = {
 	.ngpio			= GPIO_MAX + 1,
 };
 
+/*
+ * SA1100 GPIO edge detection for IRQs:
+ * IRQs are generated on Falling-Edge, Rising-Edge, or both.
+ * Use this instead of directly setting GRER/GFER.
+ */
+static int GPIO_IRQ_rising_edge;
+static int GPIO_IRQ_falling_edge;
+static int GPIO_IRQ_mask;
+
+static int sa1100_gpio_type(struct irq_data *d, unsigned int type)
+{
+	unsigned int mask;
+
+	mask = BIT(d->hwirq);
+
+	if (type == IRQ_TYPE_PROBE) {
+		if ((GPIO_IRQ_rising_edge | GPIO_IRQ_falling_edge) & mask)
+			return 0;
+		type = IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING;
+	}
+
+	if (type & IRQ_TYPE_EDGE_RISING)
+		GPIO_IRQ_rising_edge |= mask;
+	else
+		GPIO_IRQ_rising_edge &= ~mask;
+	if (type & IRQ_TYPE_EDGE_FALLING)
+		GPIO_IRQ_falling_edge |= mask;
+	else
+		GPIO_IRQ_falling_edge &= ~mask;
+
+	GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask;
+	GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask;
+
+	return 0;
+}
+
+/*
+ * GPIO IRQs must be acknowledged.
+ */
+static void sa1100_gpio_ack(struct irq_data *d)
+{
+	GEDR = BIT(d->hwirq);
+}
+
+static void sa1100_gpio_mask(struct irq_data *d)
+{
+	unsigned int mask = BIT(d->hwirq);
+
+	GPIO_IRQ_mask &= ~mask;
+
+	GRER &= ~mask;
+	GFER &= ~mask;
+}
+
+static void sa1100_gpio_unmask(struct irq_data *d)
+{
+	unsigned int mask = BIT(d->hwirq);
+
+	GPIO_IRQ_mask |= mask;
+
+	GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask;
+	GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask;
+}
+
+static int sa1100_gpio_wake(struct irq_data *d, unsigned int on)
+{
+	if (on)
+		PWER |= BIT(d->hwirq);
+	else
+		PWER &= ~BIT(d->hwirq);
+	return 0;
+}
+
+/*
+ * This is for GPIO IRQs
+ */
+static struct irq_chip sa1100_gpio_irq_chip = {
+	.name		= "GPIO",
+	.irq_ack	= sa1100_gpio_ack,
+	.irq_mask	= sa1100_gpio_mask,
+	.irq_unmask	= sa1100_gpio_unmask,
+	.irq_set_type	= sa1100_gpio_type,
+	.irq_set_wake	= sa1100_gpio_wake,
+};
+
+static int sa1100_gpio_irqdomain_map(struct irq_domain *d,
+		unsigned int irq, irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &sa1100_gpio_irq_chip,
+				 handle_edge_irq);
+	set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
+
+	return 0;
+}
+
+static struct irq_domain_ops sa1100_gpio_irqdomain_ops = {
+	.map = sa1100_gpio_irqdomain_map,
+	.xlate = irq_domain_xlate_onetwocell,
+};
+
+static struct irq_domain *sa1100_gpio_irqdomain;
+
+/*
+ * IRQ 0-11 (GPIO) handler.  We enter here with the
+ * irq_controller_lock held, and IRQs disabled.  Decode the IRQ
+ * and call the handler.
+ */
+static void
+sa1100_gpio_handler(unsigned int irq, struct irq_desc *desc)
+{
+	unsigned int mask;
+
+	mask = GEDR;
+	do {
+		/*
+		 * clear down all currently active IRQ sources.
+		 * We will be processing them all.
+		 */
+		GEDR = mask;
+
+		irq = IRQ_GPIO0;
+		do {
+			if (mask & 1)
+				generic_handle_irq(irq);
+			mask >>= 1;
+			irq++;
+		} while (mask);
+
+		mask = GEDR;
+	} while (mask);
+}
+
+static int sa1100_gpio_suspend(void)
+{
+	/*
+	 * Set the appropriate edges for wakeup.
+	 */
+	GRER = PWER & GPIO_IRQ_rising_edge;
+	GFER = PWER & GPIO_IRQ_falling_edge;
+
+	/*
+	 * Clear any pending GPIO interrupts.
+	 */
+	GEDR = GEDR;
+
+	return 0;
+}
+
+static void sa1100_gpio_resume(void)
+{
+	GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask;
+	GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask;
+}
+
+static struct syscore_ops sa1100_gpio_syscore_ops = {
+	.suspend	= sa1100_gpio_suspend,
+	.resume		= sa1100_gpio_resume,
+};
+
+static int __init sa1100_gpio_init_devicefs(void)
+{
+	register_syscore_ops(&sa1100_gpio_syscore_ops);
+	return 0;
+}
+
+device_initcall(sa1100_gpio_init_devicefs);
+
 void __init sa1100_init_gpio(void)
 {
+	/* clear all GPIO edge detects */
+	GFER = 0;
+	GRER = 0;
+	GEDR = -1;
+
 	gpiochip_add(&sa1100_gpio_chip);
+
+	sa1100_gpio_irqdomain = irq_domain_add_simple(NULL,
+			28, IRQ_GPIO0,
+			&sa1100_gpio_irqdomain_ops, NULL);
+
+	/*
+	 * Install handlers for GPIO 0-10 edge detect interrupts
+	 */
+	irq_set_chained_handler(IRQ_GPIO0_SC, sa1100_gpio_handler);
+	irq_set_chained_handler(IRQ_GPIO1_SC, sa1100_gpio_handler);
+	irq_set_chained_handler(IRQ_GPIO2_SC, sa1100_gpio_handler);
+	irq_set_chained_handler(IRQ_GPIO3_SC, sa1100_gpio_handler);
+	irq_set_chained_handler(IRQ_GPIO4_SC, sa1100_gpio_handler);
+	irq_set_chained_handler(IRQ_GPIO5_SC, sa1100_gpio_handler);
+	irq_set_chained_handler(IRQ_GPIO6_SC, sa1100_gpio_handler);
+	irq_set_chained_handler(IRQ_GPIO7_SC, sa1100_gpio_handler);
+	irq_set_chained_handler(IRQ_GPIO8_SC, sa1100_gpio_handler);
+	irq_set_chained_handler(IRQ_GPIO9_SC, sa1100_gpio_handler);
+	irq_set_chained_handler(IRQ_GPIO10_SC, sa1100_gpio_handler);
+	/*
+	 * Install handler for GPIO 11-27 edge detect interrupts
+	 */
+	irq_set_chained_handler(IRQ_GPIO11_27, sa1100_gpio_handler);
+
 }

+ 9 - 4
include/linux/amba/bus.h

@@ -33,6 +33,7 @@ struct amba_device {
 	struct clk		*pclk;
 	unsigned int		periphid;
 	unsigned int		irq[AMBA_NR_IRQS];
+	char			*driver_override;
 };
 
 struct amba_driver {
@@ -92,11 +93,15 @@ struct amba_device *amba_find_device(const char *, struct device *, unsigned int
 int amba_request_regions(struct amba_device *, const char *);
 void amba_release_regions(struct amba_device *);
 
-#define amba_pclk_enable(d)	\
-	(IS_ERR((d)->pclk) ? 0 : clk_enable((d)->pclk))
+static inline int amba_pclk_enable(struct amba_device *dev)
+{
+	return clk_enable(dev->pclk);
+}
 
-#define amba_pclk_disable(d)	\
-	do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0)
+static inline void amba_pclk_disable(struct amba_device *dev)
+{
+	clk_disable(dev->pclk);
+}
 
 static inline int amba_pclk_prepare(struct amba_device *dev)
 {

+ 73 - 4
include/linux/bitrev.h

@@ -3,14 +3,83 @@
 
 #include <linux/types.h>
 
-extern u8 const byte_rev_table[256];
+#ifdef CONFIG_HAVE_ARCH_BITREVERSE
+#include <asm/bitrev.h>
+
+#define __bitrev32 __arch_bitrev32
+#define __bitrev16 __arch_bitrev16
+#define __bitrev8 __arch_bitrev8
 
-static inline u8 bitrev8(u8 byte)
+#else
+extern u8 const byte_rev_table[256];
+static inline u8 __bitrev8(u8 byte)
 {
 	return byte_rev_table[byte];
 }
 
-extern u16 bitrev16(u16 in);
-extern u32 bitrev32(u32 in);
+static inline u16 __bitrev16(u16 x)
+{
+	return (__bitrev8(x & 0xff) << 8) | __bitrev8(x >> 8);
+}
+
+static inline u32 __bitrev32(u32 x)
+{
+	return (__bitrev16(x & 0xffff) << 16) | __bitrev16(x >> 16);
+}
+
+#endif /* CONFIG_HAVE_ARCH_BITREVERSE */
+
+#define __constant_bitrev32(x)	\
+({					\
+	u32 __x = x;			\
+	__x = (__x >> 16) | (__x << 16);	\
+	__x = ((__x & (u32)0xFF00FF00UL) >> 8) | ((__x & (u32)0x00FF00FFUL) << 8);	\
+	__x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4);	\
+	__x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2);	\
+	__x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1);	\
+	__x;								\
+})
+
+#define __constant_bitrev16(x)	\
+({					\
+	u16 __x = x;			\
+	__x = (__x >> 8) | (__x << 8);	\
+	__x = ((__x & (u16)0xF0F0U) >> 4) | ((__x & (u16)0x0F0FU) << 4);	\
+	__x = ((__x & (u16)0xCCCCU) >> 2) | ((__x & (u16)0x3333U) << 2);	\
+	__x = ((__x & (u16)0xAAAAU) >> 1) | ((__x & (u16)0x5555U) << 1);	\
+	__x;								\
+})
+
+#define __constant_bitrev8(x)	\
+({					\
+	u8 __x = x;			\
+	__x = (__x >> 4) | (__x << 4);	\
+	__x = ((__x & (u8)0xCCU) >> 2) | ((__x & (u8)0x33U) << 2);	\
+	__x = ((__x & (u8)0xAAU) >> 1) | ((__x & (u8)0x55U) << 1);	\
+	__x;								\
+})
+
+#define bitrev32(x) \
+({			\
+	u32 __x = x;	\
+	__builtin_constant_p(__x) ?	\
+	__constant_bitrev32(__x) :			\
+	__bitrev32(__x);				\
+})
+
+#define bitrev16(x) \
+({			\
+	u16 __x = x;	\
+	__builtin_constant_p(__x) ?	\
+	__constant_bitrev16(__x) :			\
+	__bitrev16(__x);				\
+ })
 
+#define bitrev8(x) \
+({			\
+	u8 __x = x;	\
+	__builtin_constant_p(__x) ?	\
+	__constant_bitrev8(__x) :			\
+	__bitrev8(__x)	;			\
+ })
 #endif /* _LINUX_BITREV_H */

+ 9 - 0
lib/Kconfig

@@ -13,6 +13,15 @@ config RAID6_PQ
 config BITREVERSE
 	tristate
 
+config HAVE_ARCH_BITREVERSE
+	boolean
+	default n
+	depends on BITREVERSE
+	help
+	  This option provides an config for the architecture which have instruction
+	  can do bitreverse operation, we use the hardware instruction if the architecture
+	  have this capability.
+
 config RATIONAL
 	boolean
 

+ 2 - 15
lib/bitrev.c

@@ -1,3 +1,4 @@
+#ifndef CONFIG_HAVE_ARCH_BITREVERSE
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/bitrev.h>
@@ -42,18 +43,4 @@ const u8 byte_rev_table[256] = {
 };
 EXPORT_SYMBOL_GPL(byte_rev_table);
 
-u16 bitrev16(u16 x)
-{
-	return (bitrev8(x & 0xff) << 8) | bitrev8(x >> 8);
-}
-EXPORT_SYMBOL(bitrev16);
-
-/**
- * bitrev32 - reverse the order of bits in a u32 value
- * @x: value to be bit-reversed
- */
-u32 bitrev32(u32 x)
-{
-	return (bitrev16(x & 0xffff) << 16) | bitrev16(x >> 16);
-}
-EXPORT_SYMBOL(bitrev32);
+#endif /* CONFIG_HAVE_ARCH_BITREVERSE */