浏览代码

Merge tag 'openrisc-for-linus' of git://github.com/openrisc/linux

Pull OpenRISC updates from Stafford Horne:
 "Highlights include:

   - optimized memset and memcpy routines, ~20% boot time saving

   - support for cpu idling

   - adding support for l.swa and l.lwa atomic operations (in spec from
     2014)

   - use atomics to implement: bitops, cmpxchg, futex

   - the atomics are in preparation for SMP support"

* tag 'openrisc-for-linus' of git://github.com/openrisc/linux: (25 commits)
  openrisc: head: Init r0 to 0 on start
  openrisc: Export ioremap symbols used by modules
  arch/openrisc/lib/memcpy.c: use correct OR1200 option
  openrisc: head: Remove unused strings
  openrisc: head: Move init strings to rodata section
  openrisc: entry: Fix delay slot detection
  openrisc: entry: Whitespace and comment cleanups
  scripts/checkstack.pl: Add openrisc support
  MAINTAINERS: Add the openrisc official repository
  openrisc: Add .gitignore
  openrisc: Add optimized memcpy routine
  openrisc: Add optimized memset
  openrisc: Initial support for the idle state
  openrisc: Fix the bitmask for the unit present register
  openrisc: remove unnecessary stddef.h include
  openrisc: add futex_atomic_* implementations
  openrisc: add optimized atomic operations
  openrisc: add cmpxchg and xchg implementations
  openrisc: add atomic bitops
  openrisc: add l.lwa/l.swa emulation
  ...
Linus Torvalds 8 年之前
父节点
当前提交
9e31489029

+ 1 - 0
MAINTAINERS

@@ -9315,6 +9315,7 @@ OPENRISC ARCHITECTURE
 M:	Jonas Bonn <jonas@southpole.se>
 M:	Jonas Bonn <jonas@southpole.se>
 M:	Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
 M:	Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
 M:	Stafford Horne <shorne@gmail.com>
 M:	Stafford Horne <shorne@gmail.com>
+T:	git git://github.com/openrisc/linux.git
 L:	openrisc@lists.librecores.org
 L:	openrisc@lists.librecores.org
 W:	http://openrisc.io
 W:	http://openrisc.io
 S:	Maintained
 S:	Maintained

+ 1 - 0
arch/openrisc/Kconfig

@@ -12,6 +12,7 @@ config OPENRISC
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK
 	select GPIOLIB
 	select GPIOLIB
         select HAVE_ARCH_TRACEHOOK
         select HAVE_ARCH_TRACEHOOK
+	select SPARSE_IRQ
 	select GENERIC_IRQ_CHIP
 	select GENERIC_IRQ_CHIP
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IRQ_SHOW

+ 0 - 1
arch/openrisc/TODO.openrisc

@@ -10,4 +10,3 @@ that are due for investigation shortly, i.e. our TODO list:
    or1k and this change is slowly trickling through the stack.  For the time
    or1k and this change is slowly trickling through the stack.  For the time
    being, or32 is equivalent to or1k.
    being, or32 is equivalent to or1k.
 
 
--- Implement optimized version of memcpy and memset

+ 1 - 4
arch/openrisc/include/asm/Kbuild

@@ -1,7 +1,6 @@
 
 
 header-y += ucontext.h
 header-y += ucontext.h
 
 
-generic-y += atomic.h
 generic-y += auxvec.h
 generic-y += auxvec.h
 generic-y += barrier.h
 generic-y += barrier.h
 generic-y += bitsperlong.h
 generic-y += bitsperlong.h
@@ -10,8 +9,6 @@ generic-y += bugs.h
 generic-y += cacheflush.h
 generic-y += cacheflush.h
 generic-y += checksum.h
 generic-y += checksum.h
 generic-y += clkdev.h
 generic-y += clkdev.h
-generic-y += cmpxchg-local.h
-generic-y += cmpxchg.h
 generic-y += current.h
 generic-y += current.h
 generic-y += device.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += div64.h
@@ -22,12 +19,12 @@ generic-y += exec.h
 generic-y += fb.h
 generic-y += fb.h
 generic-y += fcntl.h
 generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += ftrace.h
-generic-y += futex.h
 generic-y += hardirq.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
 generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += ipcbuf.h
+generic-y += irq.h
 generic-y += irq_regs.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kdebug.h

+ 126 - 0
arch/openrisc/include/asm/atomic.h

@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ASM_OPENRISC_ATOMIC_H
+#define __ASM_OPENRISC_ATOMIC_H
+
+#include <linux/types.h>
+
+/* Atomically perform op with v->counter and i */
+#define ATOMIC_OP(op)							\
+static inline void atomic_##op(int i, atomic_t *v)			\
+{									\
+	int tmp;							\
+									\
+	__asm__ __volatile__(						\
+		"1:	l.lwa	%0,0(%1)	\n"			\
+		"	l." #op " %0,%0,%2	\n"			\
+		"	l.swa	0(%1),%0	\n"			\
+		"	l.bnf	1b		\n"			\
+		"	 l.nop			\n"			\
+		: "=&r"(tmp)						\
+		: "r"(&v->counter), "r"(i)				\
+		: "cc", "memory");					\
+}
+
+/* Atomically perform op with v->counter and i, return the result */
+#define ATOMIC_OP_RETURN(op)						\
+static inline int atomic_##op##_return(int i, atomic_t *v)		\
+{									\
+	int tmp;							\
+									\
+	__asm__ __volatile__(						\
+		"1:	l.lwa	%0,0(%1)	\n"			\
+		"	l." #op " %0,%0,%2	\n"			\
+		"	l.swa	0(%1),%0	\n"			\
+		"	l.bnf	1b		\n"			\
+		"	 l.nop			\n"			\
+		: "=&r"(tmp)						\
+		: "r"(&v->counter), "r"(i)				\
+		: "cc", "memory");					\
+									\
+	return tmp;							\
+}
+
+/* Atomically perform op with v->counter and i, return orig v->counter */
+#define ATOMIC_FETCH_OP(op)						\
+static inline int atomic_fetch_##op(int i, atomic_t *v)			\
+{									\
+	int tmp, old;							\
+									\
+	__asm__ __volatile__(						\
+		"1:	l.lwa	%0,0(%2)	\n"			\
+		"	l." #op " %1,%0,%3	\n"			\
+		"	l.swa	0(%2),%1	\n"			\
+		"	l.bnf	1b		\n"			\
+		"	 l.nop			\n"			\
+		: "=&r"(old), "=&r"(tmp)				\
+		: "r"(&v->counter), "r"(i)				\
+		: "cc", "memory");					\
+									\
+	return old;							\
+}
+
+ATOMIC_OP_RETURN(add)
+ATOMIC_OP_RETURN(sub)
+
+ATOMIC_FETCH_OP(add)
+ATOMIC_FETCH_OP(sub)
+ATOMIC_FETCH_OP(and)
+ATOMIC_FETCH_OP(or)
+ATOMIC_FETCH_OP(xor)
+
+ATOMIC_OP(and)
+ATOMIC_OP(or)
+ATOMIC_OP(xor)
+
+#undef ATOMIC_FETCH_OP
+#undef ATOMIC_OP_RETURN
+#undef ATOMIC_OP
+
+#define atomic_add_return	atomic_add_return
+#define atomic_sub_return	atomic_sub_return
+#define atomic_fetch_add	atomic_fetch_add
+#define atomic_fetch_sub	atomic_fetch_sub
+#define atomic_fetch_and	atomic_fetch_and
+#define atomic_fetch_or		atomic_fetch_or
+#define atomic_fetch_xor	atomic_fetch_xor
+#define atomic_and	atomic_and
+#define atomic_or	atomic_or
+#define atomic_xor	atomic_xor
+
+/*
+ * Atomically add a to v->counter as long as v is not already u.
+ * Returns the original value at v->counter.
+ *
+ * This is often used through atomic_inc_not_zero()
+ */
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+	int old, tmp;
+
+	__asm__ __volatile__(
+		"1:	l.lwa %0, 0(%2)		\n"
+		"	l.sfeq %0, %4		\n"
+		"	l.bf 2f			\n"
+		"	 l.add %1, %0, %3	\n"
+		"	l.swa 0(%2), %1		\n"
+		"	l.bnf 1b		\n"
+		"	 l.nop			\n"
+		"2:				\n"
+		: "=&r"(old), "=&r" (tmp)
+		: "r"(&v->counter), "r"(a), "r"(u)
+		: "cc", "memory");
+
+	return old;
+}
+#define __atomic_add_unless	__atomic_add_unless
+
+#include <asm-generic/atomic.h>
+
+#endif /* __ASM_OPENRISC_ATOMIC_H */

+ 1 - 1
arch/openrisc/include/asm/bitops.h

@@ -45,7 +45,7 @@
 #include <asm-generic/bitops/hweight.h>
 #include <asm-generic/bitops/hweight.h>
 #include <asm-generic/bitops/lock.h>
 #include <asm-generic/bitops/lock.h>
 
 
-#include <asm-generic/bitops/atomic.h>
+#include <asm/bitops/atomic.h>
 #include <asm-generic/bitops/non-atomic.h>
 #include <asm-generic/bitops/non-atomic.h>
 #include <asm-generic/bitops/le.h>
 #include <asm-generic/bitops/le.h>
 #include <asm-generic/bitops/ext2-atomic.h>
 #include <asm-generic/bitops/ext2-atomic.h>

+ 123 - 0
arch/openrisc/include/asm/bitops/atomic.h

@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ASM_OPENRISC_BITOPS_ATOMIC_H
+#define __ASM_OPENRISC_BITOPS_ATOMIC_H
+
+static inline void set_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+		"1:	l.lwa	%0,0(%1)	\n"
+		"	l.or	%0,%0,%2	\n"
+		"	l.swa	0(%1),%0	\n"
+		"	l.bnf	1b		\n"
+		"	 l.nop			\n"
+		: "=&r"(tmp)
+		: "r"(p), "r"(mask)
+		: "cc", "memory");
+}
+
+static inline void clear_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+		"1:	l.lwa	%0,0(%1)	\n"
+		"	l.and	%0,%0,%2	\n"
+		"	l.swa	0(%1),%0	\n"
+		"	l.bnf	1b		\n"
+		"	 l.nop			\n"
+		: "=&r"(tmp)
+		: "r"(p), "r"(~mask)
+		: "cc", "memory");
+}
+
+static inline void change_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+		"1:	l.lwa	%0,0(%1)	\n"
+		"	l.xor	%0,%0,%2	\n"
+		"	l.swa	0(%1),%0	\n"
+		"	l.bnf	1b		\n"
+		"	 l.nop			\n"
+		: "=&r"(tmp)
+		: "r"(p), "r"(mask)
+		: "cc", "memory");
+}
+
+static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old;
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+		"1:	l.lwa	%0,0(%2)	\n"
+		"	l.or	%1,%0,%3	\n"
+		"	l.swa	0(%2),%1	\n"
+		"	l.bnf	1b		\n"
+		"	 l.nop			\n"
+		: "=&r"(old), "=&r"(tmp)
+		: "r"(p), "r"(mask)
+		: "cc", "memory");
+
+	return (old & mask) != 0;
+}
+
+static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old;
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+		"1:	l.lwa	%0,0(%2)	\n"
+		"	l.and	%1,%0,%3	\n"
+		"	l.swa	0(%2),%1	\n"
+		"	l.bnf	1b		\n"
+		"	 l.nop			\n"
+		: "=&r"(old), "=&r"(tmp)
+		: "r"(p), "r"(~mask)
+		: "cc", "memory");
+
+	return (old & mask) != 0;
+}
+
+static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old;
+	unsigned long tmp;
+
+	__asm__ __volatile__(
+		"1:	l.lwa	%0,0(%2)	\n"
+		"	l.xor	%1,%0,%3	\n"
+		"	l.swa	0(%2),%1	\n"
+		"	l.bnf	1b		\n"
+		"	 l.nop			\n"
+		: "=&r"(old), "=&r"(tmp)
+		: "r"(p), "r"(mask)
+		: "cc", "memory");
+
+	return (old & mask) != 0;
+}
+
+#endif /* __ASM_OPENRISC_BITOPS_ATOMIC_H */

+ 83 - 0
arch/openrisc/include/asm/cmpxchg.h

@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ASM_OPENRISC_CMPXCHG_H
+#define __ASM_OPENRISC_CMPXCHG_H
+
+#include  <linux/types.h>
+
+/*
+ * This function doesn't exist, so you'll get a linker error
+ * if something tries to do an invalid cmpxchg().
+ */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+#define __HAVE_ARCH_CMPXCHG 1
+
+static inline unsigned long
+__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
+{
+	if (size != 4) {
+		__cmpxchg_called_with_bad_pointer();
+		return old;
+	}
+
+	__asm__ __volatile__(
+		"1:	l.lwa %0, 0(%1)		\n"
+		"	l.sfeq %0, %2		\n"
+		"	l.bnf 2f		\n"
+		"	 l.nop			\n"
+		"	l.swa 0(%1), %3		\n"
+		"	l.bnf 1b		\n"
+		"	 l.nop			\n"
+		"2:				\n"
+		: "=&r"(old)
+		: "r"(ptr), "r"(old), "r"(new)
+		: "cc", "memory");
+
+	return old;
+}
+
+#define cmpxchg(ptr, o, n)						\
+	({								\
+		(__typeof__(*(ptr))) __cmpxchg((ptr),			\
+					       (unsigned long)(o),	\
+					       (unsigned long)(n),	\
+					       sizeof(*(ptr)));		\
+	})
+
+/*
+ * This function doesn't exist, so you'll get a linker error if
+ * something tries to do an invalidly-sized xchg().
+ */
+extern void __xchg_called_with_bad_pointer(void);
+
+static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
+				   int size)
+{
+	if (size != 4) {
+		__xchg_called_with_bad_pointer();
+		return val;
+	}
+
+	__asm__ __volatile__(
+		"1:	l.lwa %0, 0(%1)		\n"
+		"	l.swa 0(%1), %2		\n"
+		"	l.bnf 1b		\n"
+		"	 l.nop			\n"
+		: "=&r"(val)
+		: "r"(ptr), "r"(val)
+		: "cc", "memory");
+
+	return val;
+}
+
+#define xchg(ptr, with) \
+	((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr))))
+
+#endif /* __ASM_OPENRISC_CMPXCHG_H */

+ 2 - 0
arch/openrisc/include/asm/cpuinfo.h

@@ -24,9 +24,11 @@ struct cpuinfo {
 
 
 	u32 icache_size;
 	u32 icache_size;
 	u32 icache_block_size;
 	u32 icache_block_size;
+	u32 icache_ways;
 
 
 	u32 dcache_size;
 	u32 dcache_size;
 	u32 dcache_block_size;
 	u32 dcache_block_size;
+	u32 dcache_ways;
 };
 };
 
 
 extern struct cpuinfo cpuinfo;
 extern struct cpuinfo cpuinfo;

+ 135 - 0
arch/openrisc/include/asm/futex.h

@@ -0,0 +1,135 @@
+#ifndef __ASM_OPENRISC_FUTEX_H
+#define __ASM_OPENRISC_FUTEX_H
+
+#ifdef __KERNEL__
+
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <asm/errno.h>
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
+({								\
+	__asm__ __volatile__ (					\
+		"1:	l.lwa	%0, %2			\n"	\
+			insn				"\n"	\
+		"2:	l.swa	%2, %1			\n"	\
+		"	l.bnf	1b			\n"	\
+		"	 l.ori	%1, r0, 0		\n"	\
+		"3:					\n"	\
+		".section .fixup,\"ax\"			\n"	\
+		"4:	l.j	3b			\n"	\
+		"	 l.addi	%1, r0, %3		\n"	\
+		".previous				\n"	\
+		".section __ex_table,\"a\"		\n"	\
+		".word	1b,4b,2b,4b			\n"	\
+		".previous				\n"	\
+		: "=&r" (oldval), "=&r" (ret), "+m" (*uaddr)	\
+		: "i" (-EFAULT), "r" (oparg)			\
+		: "cc", "memory"				\
+		);						\
+})
+
+static inline int
+futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
+{
+	int op = (encoded_op >> 28) & 7;
+	int cmp = (encoded_op >> 24) & 15;
+	int oparg = (encoded_op << 8) >> 20;
+	int cmparg = (encoded_op << 20) >> 20;
+	int oldval = 0, ret;
+
+	if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+		oparg = 1 << oparg;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	pagefault_disable();
+
+	switch (op) {
+	case FUTEX_OP_SET:
+		__futex_atomic_op("l.or %1,%4,%4", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ADD:
+		__futex_atomic_op("l.add %1,%0,%4", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_OR:
+		__futex_atomic_op("l.or %1,%0,%4", ret, oldval, uaddr, oparg);
+		break;
+	case FUTEX_OP_ANDN:
+		__futex_atomic_op("l.and %1,%0,%4", ret, oldval, uaddr, ~oparg);
+		break;
+	case FUTEX_OP_XOR:
+		__futex_atomic_op("l.xor %1,%0,%4", ret, oldval, uaddr, oparg);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	pagefault_enable();
+
+	if (!ret) {
+		switch (cmp) {
+		case FUTEX_OP_CMP_EQ:
+			ret = (oldval == cmparg);
+			break;
+		case FUTEX_OP_CMP_NE:
+			ret = (oldval != cmparg);
+			break;
+		case FUTEX_OP_CMP_LT:
+			ret = (oldval < cmparg);
+			break;
+		case FUTEX_OP_CMP_GE:
+			ret = (oldval >= cmparg);
+			break;
+		case FUTEX_OP_CMP_LE:
+			ret = (oldval <= cmparg);
+			break;
+		case FUTEX_OP_CMP_GT:
+			ret = (oldval > cmparg);
+			break;
+		default:
+			ret = -ENOSYS;
+		}
+	}
+	return ret;
+}
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+			      u32 oldval, u32 newval)
+{
+	int ret = 0;
+	u32 prev;
+
+	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+		return -EFAULT;
+
+	__asm__ __volatile__ (				\
+		"1:	l.lwa	%1, %2		\n"	\
+		"	l.sfeq	%1, %3		\n"	\
+		"	l.bnf	3f		\n"	\
+		"	 l.nop			\n"	\
+		"2:	l.swa	%2, %4		\n"	\
+		"	l.bnf	1b		\n"	\
+		"	 l.nop			\n"	\
+		"3:				\n"	\
+		".section .fixup,\"ax\"		\n"	\
+		"4:	l.j	3b		\n"	\
+		"	 l.addi	%0, r0, %5	\n"	\
+		".previous			\n"	\
+		".section __ex_table,\"a\"	\n"	\
+		".word	1b,4b,2b,4b		\n"	\
+		".previous			\n"	\
+		: "+r" (ret), "=&r" (prev), "+m" (*uaddr) \
+		: "r" (oldval), "r" (newval), "i" (-EFAULT) \
+		: "cc",	"memory"			\
+		);
+
+	*uval = prev;
+	return ret;
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASM_OPENRISC_FUTEX_H */

+ 2 - 2
arch/openrisc/include/asm/spr_defs.h

@@ -152,8 +152,8 @@
 #define SPR_UPR_MP	   0x00000020  /* MAC present */
 #define SPR_UPR_MP	   0x00000020  /* MAC present */
 #define SPR_UPR_DUP	   0x00000040  /* Debug unit present */
 #define SPR_UPR_DUP	   0x00000040  /* Debug unit present */
 #define SPR_UPR_PCUP	   0x00000080  /* Performance counters unit present */
 #define SPR_UPR_PCUP	   0x00000080  /* Performance counters unit present */
-#define SPR_UPR_PMP	   0x00000100  /* Power management present */
-#define SPR_UPR_PICP	   0x00000200  /* PIC present */
+#define SPR_UPR_PICP	   0x00000100  /* PIC present */
+#define SPR_UPR_PMP	   0x00000200  /* Power management present */
 #define SPR_UPR_TTP	   0x00000400  /* Tick timer present */
 #define SPR_UPR_TTP	   0x00000400  /* Tick timer present */
 #define SPR_UPR_RES	   0x00fe0000  /* Reserved */
 #define SPR_UPR_RES	   0x00fe0000  /* Reserved */
 #define SPR_UPR_CUP	   0xff000000  /* Context units present */
 #define SPR_UPR_CUP	   0xff000000  /* Context units present */

+ 10 - 0
arch/openrisc/include/asm/string.h

@@ -0,0 +1,10 @@
+#ifndef __ASM_OPENRISC_STRING_H
+#define __ASM_OPENRISC_STRING_H
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *s, int c, __kernel_size_t n);
+
+#define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *dest, __const void *src, __kernel_size_t n);
+
+#endif /* __ASM_OPENRISC_STRING_H */

+ 1 - 0
arch/openrisc/kernel/.gitignore

@@ -0,0 +1 @@
+vmlinux.lds

+ 38 - 22
arch/openrisc/kernel/entry.S

@@ -173,6 +173,11 @@ handler:							;\
 	l.j	_ret_from_exception				;\
 	l.j	_ret_from_exception				;\
 	 l.nop
 	 l.nop
 
 
+/* clobbers 'reg' */
+#define CLEAR_LWA_FLAG(reg)		\
+	l.movhi	reg,hi(lwa_flag)	;\
+	l.ori	reg,reg,lo(lwa_flag)	;\
+	l.sw	0(reg),r0
 /*
 /*
  * NOTE: one should never assume that SPR_EPC, SPR_ESR, SPR_EEAR
  * NOTE: one should never assume that SPR_EPC, SPR_ESR, SPR_EEAR
  *       contain the same values as when exception we're handling
  *       contain the same values as when exception we're handling
@@ -193,6 +198,7 @@ EXCEPTION_ENTRY(_tng_kernel_start)
 /* ---[ 0x200: BUS exception ]------------------------------------------- */
 /* ---[ 0x200: BUS exception ]------------------------------------------- */
 
 
 EXCEPTION_ENTRY(_bus_fault_handler)
 EXCEPTION_ENTRY(_bus_fault_handler)
+	CLEAR_LWA_FLAG(r3)
 	/* r4: EA of fault (set by EXCEPTION_HANDLE) */
 	/* r4: EA of fault (set by EXCEPTION_HANDLE) */
 	l.jal   do_bus_fault
 	l.jal   do_bus_fault
 	 l.addi  r3,r1,0 /* pt_regs */
 	 l.addi  r3,r1,0 /* pt_regs */
@@ -202,11 +208,13 @@ EXCEPTION_ENTRY(_bus_fault_handler)
 
 
 /* ---[ 0x300: Data Page Fault exception ]------------------------------- */
 /* ---[ 0x300: Data Page Fault exception ]------------------------------- */
 EXCEPTION_ENTRY(_dtlb_miss_page_fault_handler)
 EXCEPTION_ENTRY(_dtlb_miss_page_fault_handler)
+	CLEAR_LWA_FLAG(r3)
 	l.and	r5,r5,r0
 	l.and	r5,r5,r0
 	l.j	1f
 	l.j	1f
 	 l.nop
 	 l.nop
 
 
 EXCEPTION_ENTRY(_data_page_fault_handler)
 EXCEPTION_ENTRY(_data_page_fault_handler)
+	CLEAR_LWA_FLAG(r3)
 	/* set up parameters for do_page_fault */
 	/* set up parameters for do_page_fault */
 	l.ori	r5,r0,0x300		   // exception vector
 	l.ori	r5,r0,0x300		   // exception vector
 1:
 1:
@@ -220,7 +228,7 @@ EXCEPTION_ENTRY(_data_page_fault_handler)
 	 * DTLB miss handler in the CONFIG_GUARD_PROTECTED_CORE part
 	 * DTLB miss handler in the CONFIG_GUARD_PROTECTED_CORE part
 	 */
 	 */
 #ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX
 #ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX
-	l.lwz   r6,PT_PC(r3)                  // address of an offending insn
+	l.lwz   r6,PT_PC(r3)               // address of an offending insn
 	l.lwz   r6,0(r6)                   // instruction that caused pf
 	l.lwz   r6,0(r6)                   // instruction that caused pf
 
 
 	l.srli  r6,r6,26                   // check opcode for jump insn
 	l.srli  r6,r6,26                   // check opcode for jump insn
@@ -236,57 +244,57 @@ EXCEPTION_ENTRY(_data_page_fault_handler)
 	l.bf    8f
 	l.bf    8f
 	l.sfeqi r6,0x12                    // l.jalr
 	l.sfeqi r6,0x12                    // l.jalr
 	l.bf    8f
 	l.bf    8f
-
-	l.nop
+	 l.nop
 
 
 	l.j     9f
 	l.j     9f
-	l.nop
-8:
+	 l.nop
 
 
-	l.lwz   r6,PT_PC(r3)                  // address of an offending insn
+8: // offending insn is in delay slot
+	l.lwz   r6,PT_PC(r3)               // address of an offending insn
 	l.addi  r6,r6,4
 	l.addi  r6,r6,4
 	l.lwz   r6,0(r6)                   // instruction that caused pf
 	l.lwz   r6,0(r6)                   // instruction that caused pf
 	l.srli  r6,r6,26                   // get opcode
 	l.srli  r6,r6,26                   // get opcode
-9:
+9: // offending instruction opcode loaded in r6
 
 
 #else
 #else
 
 
-	l.mfspr r6,r0,SPR_SR		   // SR
-//	l.lwz	r6,PT_SR(r3)		   // ESR
-	l.andi	r6,r6,SPR_SR_DSX	   // check for delay slot exception
-	l.sfeqi	r6,0x1			   // exception happened in delay slot
-	l.bnf	7f
-	l.lwz	r6,PT_PC(r3)		   // address of an offending insn
+	l.lwz   r6,PT_SR(r3)               // SR
+	l.andi  r6,r6,SPR_SR_DSX           // check for delay slot exception
+	l.sfne  r6,r0                      // exception happened in delay slot
+	l.bnf   7f
+	 l.lwz  r6,PT_PC(r3)               // address of an offending insn
 
 
-	l.addi	r6,r6,4			   // offending insn is in delay slot
+	l.addi	r6,r6,4                    // offending insn is in delay slot
 7:
 7:
 	l.lwz   r6,0(r6)                   // instruction that caused pf
 	l.lwz   r6,0(r6)                   // instruction that caused pf
 	l.srli  r6,r6,26                   // check opcode for write access
 	l.srli  r6,r6,26                   // check opcode for write access
 #endif
 #endif
 
 
-	l.sfgeui r6,0x33		   // check opcode for write access
+	l.sfgeui r6,0x33                   // check opcode for write access
 	l.bnf   1f
 	l.bnf   1f
 	l.sfleui r6,0x37
 	l.sfleui r6,0x37
 	l.bnf   1f
 	l.bnf   1f
 	l.ori   r6,r0,0x1                  // write access
 	l.ori   r6,r0,0x1                  // write access
 	l.j     2f
 	l.j     2f
-	l.nop
+	 l.nop
 1:	l.ori   r6,r0,0x0                  // !write access
 1:	l.ori   r6,r0,0x0                  // !write access
 2:
 2:
 
 
 	/* call fault.c handler in or32/mm/fault.c */
 	/* call fault.c handler in or32/mm/fault.c */
 	l.jal   do_page_fault
 	l.jal   do_page_fault
-	l.nop
+	 l.nop
 	l.j     _ret_from_exception
 	l.j     _ret_from_exception
-	l.nop
+	 l.nop
 
 
 /* ---[ 0x400: Insn Page Fault exception ]------------------------------- */
 /* ---[ 0x400: Insn Page Fault exception ]------------------------------- */
 EXCEPTION_ENTRY(_itlb_miss_page_fault_handler)
 EXCEPTION_ENTRY(_itlb_miss_page_fault_handler)
+	CLEAR_LWA_FLAG(r3)
 	l.and	r5,r5,r0
 	l.and	r5,r5,r0
 	l.j	1f
 	l.j	1f
 	 l.nop
 	 l.nop
 
 
 EXCEPTION_ENTRY(_insn_page_fault_handler)
 EXCEPTION_ENTRY(_insn_page_fault_handler)
+	CLEAR_LWA_FLAG(r3)
 	/* set up parameters for do_page_fault */
 	/* set up parameters for do_page_fault */
 	l.ori	r5,r0,0x400		   // exception vector
 	l.ori	r5,r0,0x400		   // exception vector
 1:
 1:
@@ -296,14 +304,15 @@ EXCEPTION_ENTRY(_insn_page_fault_handler)
 
 
 	/* call fault.c handler in or32/mm/fault.c */
 	/* call fault.c handler in or32/mm/fault.c */
 	l.jal   do_page_fault
 	l.jal   do_page_fault
-	l.nop
+	 l.nop
 	l.j     _ret_from_exception
 	l.j     _ret_from_exception
-	l.nop
+	 l.nop
 
 
 
 
 /* ---[ 0x500: Timer exception ]----------------------------------------- */
 /* ---[ 0x500: Timer exception ]----------------------------------------- */
 
 
 EXCEPTION_ENTRY(_timer_handler)
 EXCEPTION_ENTRY(_timer_handler)
+	CLEAR_LWA_FLAG(r3)
 	l.jal	timer_interrupt
 	l.jal	timer_interrupt
 	 l.addi r3,r1,0 /* pt_regs */
 	 l.addi r3,r1,0 /* pt_regs */
 
 
@@ -313,6 +322,7 @@ EXCEPTION_ENTRY(_timer_handler)
 /* ---[ 0x600: Aligment exception ]-------------------------------------- */
 /* ---[ 0x600: Aligment exception ]-------------------------------------- */
 
 
 EXCEPTION_ENTRY(_alignment_handler)
 EXCEPTION_ENTRY(_alignment_handler)
+	CLEAR_LWA_FLAG(r3)
 	/* r4: EA of fault (set by EXCEPTION_HANDLE) */
 	/* r4: EA of fault (set by EXCEPTION_HANDLE) */
 	l.jal   do_unaligned_access
 	l.jal   do_unaligned_access
 	 l.addi  r3,r1,0 /* pt_regs */
 	 l.addi  r3,r1,0 /* pt_regs */
@@ -509,6 +519,7 @@ EXCEPTION_ENTRY(_external_irq_handler)
 //	l.sw	PT_SR(r1),r4
 //	l.sw	PT_SR(r1),r4
 1:
 1:
 #endif
 #endif
+	CLEAR_LWA_FLAG(r3)
 	l.addi	r3,r1,0
 	l.addi	r3,r1,0
 	l.movhi	r8,hi(do_IRQ)
 	l.movhi	r8,hi(do_IRQ)
 	l.ori	r8,r8,lo(do_IRQ)
 	l.ori	r8,r8,lo(do_IRQ)
@@ -556,8 +567,12 @@ ENTRY(_sys_call_handler)
 	 * they should be clobbered, otherwise
 	 * they should be clobbered, otherwise
 	 */
 	 */
 	l.sw    PT_GPR3(r1),r3
 	l.sw    PT_GPR3(r1),r3
-	/* r4 already saved */
-	/* r4 holds the EEAR address of the fault, load the original r4 */
+	/*
+	 * r4 already saved
+	 * r4 holds the EEAR address of the fault, use it as screatch reg and
+	 * then load the original r4
+	 */
+	CLEAR_LWA_FLAG(r4)
 	l.lwz	r4,PT_GPR4(r1)
 	l.lwz	r4,PT_GPR4(r1)
 	l.sw    PT_GPR5(r1),r5
 	l.sw    PT_GPR5(r1),r5
 	l.sw    PT_GPR6(r1),r6
 	l.sw    PT_GPR6(r1),r6
@@ -776,6 +791,7 @@ UNHANDLED_EXCEPTION(_vector_0xd00,0xd00)
 /* ---[ 0xe00: Trap exception ]------------------------------------------ */
 /* ---[ 0xe00: Trap exception ]------------------------------------------ */
 
 
 EXCEPTION_ENTRY(_trap_handler)
 EXCEPTION_ENTRY(_trap_handler)
+	CLEAR_LWA_FLAG(r3)
 	/* r4: EA of fault (set by EXCEPTION_HANDLE) */
 	/* r4: EA of fault (set by EXCEPTION_HANDLE) */
 	l.jal   do_trap
 	l.jal   do_trap
 	 l.addi  r3,r1,0 /* pt_regs */
 	 l.addi  r3,r1,0 /* pt_regs */

+ 76 - 124
arch/openrisc/kernel/head.S

@@ -24,6 +24,7 @@
 #include <asm/page.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
 #include <asm/mmu.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
+#include <asm/thread_info.h>
 #include <asm/cache.h>
 #include <asm/cache.h>
 #include <asm/spr_defs.h>
 #include <asm/spr_defs.h>
 #include <asm/asm-offsets.h>
 #include <asm/asm-offsets.h>
@@ -34,7 +35,7 @@
 	l.add	rd,rd,rs
 	l.add	rd,rd,rs
 
 
 #define CLEAR_GPR(gpr)				\
 #define CLEAR_GPR(gpr)				\
-	l.or    gpr,r0,r0
+	l.movhi	gpr,0x0
 
 
 #define LOAD_SYMBOL_2_GPR(gpr,symbol)		\
 #define LOAD_SYMBOL_2_GPR(gpr,symbol)		\
 	l.movhi gpr,hi(symbol)			;\
 	l.movhi gpr,hi(symbol)			;\
@@ -442,6 +443,9 @@ _dispatch_do_ipage_fault:
 	__HEAD
 	__HEAD
 	.global _start
 	.global _start
 _start:
 _start:
+	/* Init r0 to zero as per spec */
+	CLEAR_GPR(r0)
+
 	/* save kernel parameters */
 	/* save kernel parameters */
 	l.or	r25,r0,r3	/* pointer to fdt */
 	l.or	r25,r0,r3	/* pointer to fdt */
 
 
@@ -486,7 +490,8 @@ _start:
 	/*
 	/*
 	 * set up initial ksp and current
 	 * set up initial ksp and current
 	 */
 	 */
-	LOAD_SYMBOL_2_GPR(r1,init_thread_union+0x2000)	// setup kernel stack
+	/* setup kernel stack */
+	LOAD_SYMBOL_2_GPR(r1,init_thread_union + THREAD_SIZE)
 	LOAD_SYMBOL_2_GPR(r10,init_thread_union)	// setup current
 	LOAD_SYMBOL_2_GPR(r10,init_thread_union)	// setup current
 	tophys	(r31,r10)
 	tophys	(r31,r10)
 	l.sw	TI_KSP(r31), r1
 	l.sw	TI_KSP(r31), r1
@@ -520,22 +525,8 @@ enable_dc:
 	 l.nop
 	 l.nop
 
 
 flush_tlb:
 flush_tlb:
-	/*
-	 *  I N V A L I D A T E   T L B   e n t r i e s
-	 */
-	LOAD_SYMBOL_2_GPR(r5,SPR_DTLBMR_BASE(0))
-	LOAD_SYMBOL_2_GPR(r6,SPR_ITLBMR_BASE(0))
-	l.addi	r7,r0,128 /* Maximum number of sets */
-1:
-	l.mtspr	r5,r0,0x0
-	l.mtspr	r6,r0,0x0
-
-	l.addi	r5,r5,1
-	l.addi	r6,r6,1
-	l.sfeq	r7,r0
-	l.bnf	1b
-	 l.addi	r7,r7,-1
-
+	l.jal	_flush_tlb
+	 l.nop
 
 
 /* The MMU needs to be enabled before or32_early_setup is called */
 /* The MMU needs to be enabled before or32_early_setup is called */
 
 
@@ -627,6 +618,26 @@ jump_start_kernel:
 	l.jr    r30
 	l.jr    r30
 	 l.nop
 	 l.nop
 
 
+_flush_tlb:
+	/*
+	 *  I N V A L I D A T E   T L B   e n t r i e s
+	 */
+	LOAD_SYMBOL_2_GPR(r5,SPR_DTLBMR_BASE(0))
+	LOAD_SYMBOL_2_GPR(r6,SPR_ITLBMR_BASE(0))
+	l.addi	r7,r0,128 /* Maximum number of sets */
+1:
+	l.mtspr	r5,r0,0x0
+	l.mtspr	r6,r0,0x0
+
+	l.addi	r5,r5,1
+	l.addi	r6,r6,1
+	l.sfeq	r7,r0
+	l.bnf	1b
+	 l.addi	r7,r7,-1
+
+	l.jr	r9
+	 l.nop
+
 /* ========================================[ cache ]=== */
 /* ========================================[ cache ]=== */
 
 
 	/* aligment here so we don't change memory offsets with
 	/* aligment here so we don't change memory offsets with
@@ -971,8 +982,6 @@ ENTRY(dtlb_miss_handler)
 	EXCEPTION_STORE_GPR2
 	EXCEPTION_STORE_GPR2
 	EXCEPTION_STORE_GPR3
 	EXCEPTION_STORE_GPR3
 	EXCEPTION_STORE_GPR4
 	EXCEPTION_STORE_GPR4
-	EXCEPTION_STORE_GPR5
-	EXCEPTION_STORE_GPR6
 	/*
 	/*
 	 * get EA of the miss
 	 * get EA of the miss
 	 */
 	 */
@@ -980,91 +989,70 @@ ENTRY(dtlb_miss_handler)
 	/*
 	/*
 	 * pmd = (pmd_t *)(current_pgd + pgd_index(daddr));
 	 * pmd = (pmd_t *)(current_pgd + pgd_index(daddr));
 	 */
 	 */
-	GET_CURRENT_PGD(r3,r5)		// r3 is current_pgd, r5 is temp
+	GET_CURRENT_PGD(r3,r4)		// r3 is current_pgd, r4 is temp
 	l.srli	r4,r2,0x18		// >> PAGE_SHIFT + (PAGE_SHIFT - 2)
 	l.srli	r4,r2,0x18		// >> PAGE_SHIFT + (PAGE_SHIFT - 2)
 	l.slli	r4,r4,0x2		// to get address << 2
 	l.slli	r4,r4,0x2		// to get address << 2
-	l.add	r5,r4,r3		// r4 is pgd_index(daddr)
+	l.add	r3,r4,r3		// r4 is pgd_index(daddr)
 	/*
 	/*
 	 * if (pmd_none(*pmd))
 	 * if (pmd_none(*pmd))
 	 *   goto pmd_none:
 	 *   goto pmd_none:
 	 */
 	 */
-	tophys	(r4,r5)
+	tophys	(r4,r3)
 	l.lwz	r3,0x0(r4)		// get *pmd value
 	l.lwz	r3,0x0(r4)		// get *pmd value
 	l.sfne	r3,r0
 	l.sfne	r3,r0
 	l.bnf	d_pmd_none
 	l.bnf	d_pmd_none
-	 l.andi	r3,r3,~PAGE_MASK //0x1fff		// ~PAGE_MASK
-	/*
-	 * if (pmd_bad(*pmd))
-	 *   pmd_clear(pmd)
-	 *   goto pmd_bad:
-	 */
-//	l.sfeq	r3,r0			// check *pmd value
-//	l.bf	d_pmd_good
-	l.addi	r3,r0,0xffffe000	// PAGE_MASK
-//	l.j	d_pmd_bad
-//	l.sw	0x0(r4),r0		// clear pmd
+	 l.addi	r3,r0,0xffffe000	// PAGE_MASK
+
 d_pmd_good:
 d_pmd_good:
 	/*
 	/*
 	 * pte = *pte_offset(pmd, daddr);
 	 * pte = *pte_offset(pmd, daddr);
 	 */
 	 */
 	l.lwz	r4,0x0(r4)		// get **pmd value
 	l.lwz	r4,0x0(r4)		// get **pmd value
 	l.and	r4,r4,r3		// & PAGE_MASK
 	l.and	r4,r4,r3		// & PAGE_MASK
-	l.srli	r5,r2,0xd		// >> PAGE_SHIFT, r2 == EEAR
-	l.andi	r3,r5,0x7ff		// (1UL << PAGE_SHIFT - 2) - 1
+	l.srli	r2,r2,0xd		// >> PAGE_SHIFT, r2 == EEAR
+	l.andi	r3,r2,0x7ff		// (1UL << PAGE_SHIFT - 2) - 1
 	l.slli	r3,r3,0x2		// to get address << 2
 	l.slli	r3,r3,0x2		// to get address << 2
 	l.add	r3,r3,r4
 	l.add	r3,r3,r4
-	l.lwz	r2,0x0(r3)		// this is pte at last
+	l.lwz	r3,0x0(r3)		// this is pte at last
 	/*
 	/*
 	 * if (!pte_present(pte))
 	 * if (!pte_present(pte))
 	 */
 	 */
-	l.andi	r4,r2,0x1
+	l.andi	r4,r3,0x1
 	l.sfne	r4,r0			// is pte present
 	l.sfne	r4,r0			// is pte present
 	l.bnf	d_pte_not_present
 	l.bnf	d_pte_not_present
-	l.addi	r3,r0,0xffffe3fa	// PAGE_MASK | DTLB_UP_CONVERT_MASK
+	l.addi	r4,r0,0xffffe3fa	// PAGE_MASK | DTLB_UP_CONVERT_MASK
 	/*
 	/*
 	 * fill DTLB TR register
 	 * fill DTLB TR register
 	 */
 	 */
-	l.and	r4,r2,r3		// apply the mask
+	l.and	r4,r3,r4		// apply the mask
 	// Determine number of DMMU sets
 	// Determine number of DMMU sets
-	l.mfspr r6, r0, SPR_DMMUCFGR
-	l.andi	r6, r6, SPR_DMMUCFGR_NTS
-	l.srli	r6, r6, SPR_DMMUCFGR_NTS_OFF
+	l.mfspr r2, r0, SPR_DMMUCFGR
+	l.andi	r2, r2, SPR_DMMUCFGR_NTS
+	l.srli	r2, r2, SPR_DMMUCFGR_NTS_OFF
 	l.ori	r3, r0, 0x1
 	l.ori	r3, r0, 0x1
-	l.sll	r3, r3, r6 	// r3 = number DMMU sets DMMUCFGR
-	l.addi	r6, r3, -1  	// r6 = nsets mask
-	l.and	r5, r5, r6	// calc offset:	 & (NUM_TLB_ENTRIES-1)
+	l.sll	r3, r3, r2 	// r3 = number DMMU sets DMMUCFGR
+	l.addi	r2, r3, -1  	// r2 = nsets mask
+	l.mfspr	r3, r0, SPR_EEAR_BASE
+	l.srli	r3, r3, 0xd	// >> PAGE_SHIFT
+	l.and	r2, r3, r2	// calc offset:	 & (NUM_TLB_ENTRIES-1)
 	                                                   //NUM_TLB_ENTRIES
 	                                                   //NUM_TLB_ENTRIES
-	l.mtspr	r5,r4,SPR_DTLBTR_BASE(0)
+	l.mtspr	r2,r4,SPR_DTLBTR_BASE(0)
 	/*
 	/*
 	 * fill DTLB MR register
 	 * fill DTLB MR register
 	 */
 	 */
-	l.mfspr	r2,r0,SPR_EEAR_BASE
-	l.addi	r3,r0,0xffffe000	// PAGE_MASK
-	l.and	r4,r2,r3		// apply PAGE_MASK to EA (__PHX__ do we really need this?)
-	l.ori	r4,r4,0x1		// set hardware valid bit: DTBL_MR entry
-	l.mtspr	r5,r4,SPR_DTLBMR_BASE(0)
+	l.slli	r3, r3, 0xd		/* << PAGE_SHIFT => EA & PAGE_MASK */
+	l.ori	r4,r3,0x1		// set hardware valid bit: DTBL_MR entry
+	l.mtspr	r2,r4,SPR_DTLBMR_BASE(0)
 
 
 	EXCEPTION_LOAD_GPR2
 	EXCEPTION_LOAD_GPR2
 	EXCEPTION_LOAD_GPR3
 	EXCEPTION_LOAD_GPR3
 	EXCEPTION_LOAD_GPR4
 	EXCEPTION_LOAD_GPR4
-	EXCEPTION_LOAD_GPR5
-	EXCEPTION_LOAD_GPR6
-	l.rfe
-d_pmd_bad:
-	l.nop	1
-	EXCEPTION_LOAD_GPR2
-	EXCEPTION_LOAD_GPR3
-	EXCEPTION_LOAD_GPR4
-	EXCEPTION_LOAD_GPR5
-	EXCEPTION_LOAD_GPR6
 	l.rfe
 	l.rfe
 d_pmd_none:
 d_pmd_none:
 d_pte_not_present:
 d_pte_not_present:
 	EXCEPTION_LOAD_GPR2
 	EXCEPTION_LOAD_GPR2
 	EXCEPTION_LOAD_GPR3
 	EXCEPTION_LOAD_GPR3
 	EXCEPTION_LOAD_GPR4
 	EXCEPTION_LOAD_GPR4
-	EXCEPTION_LOAD_GPR5
-	EXCEPTION_LOAD_GPR6
 	EXCEPTION_HANDLE(_dtlb_miss_page_fault_handler)
 	EXCEPTION_HANDLE(_dtlb_miss_page_fault_handler)
 
 
 /* ==============================================[ ITLB miss handler ]=== */
 /* ==============================================[ ITLB miss handler ]=== */
@@ -1072,8 +1060,6 @@ ENTRY(itlb_miss_handler)
 	EXCEPTION_STORE_GPR2
 	EXCEPTION_STORE_GPR2
 	EXCEPTION_STORE_GPR3
 	EXCEPTION_STORE_GPR3
 	EXCEPTION_STORE_GPR4
 	EXCEPTION_STORE_GPR4
-	EXCEPTION_STORE_GPR5
-	EXCEPTION_STORE_GPR6
 	/*
 	/*
 	 * get EA of the miss
 	 * get EA of the miss
 	 */
 	 */
@@ -1083,30 +1069,19 @@ ENTRY(itlb_miss_handler)
 	 * pmd = (pmd_t *)(current_pgd + pgd_index(daddr));
 	 * pmd = (pmd_t *)(current_pgd + pgd_index(daddr));
 	 *
 	 *
 	 */
 	 */
-	GET_CURRENT_PGD(r3,r5)		// r3 is current_pgd, r5 is temp
+	GET_CURRENT_PGD(r3,r4)		// r3 is current_pgd, r5 is temp
 	l.srli	r4,r2,0x18		// >> PAGE_SHIFT + (PAGE_SHIFT - 2)
 	l.srli	r4,r2,0x18		// >> PAGE_SHIFT + (PAGE_SHIFT - 2)
 	l.slli	r4,r4,0x2		// to get address << 2
 	l.slli	r4,r4,0x2		// to get address << 2
-	l.add	r5,r4,r3		// r4 is pgd_index(daddr)
+	l.add	r3,r4,r3		// r4 is pgd_index(daddr)
 	/*
 	/*
 	 * if (pmd_none(*pmd))
 	 * if (pmd_none(*pmd))
 	 *   goto pmd_none:
 	 *   goto pmd_none:
 	 */
 	 */
-	tophys	(r4,r5)
+	tophys	(r4,r3)
 	l.lwz	r3,0x0(r4)		// get *pmd value
 	l.lwz	r3,0x0(r4)		// get *pmd value
 	l.sfne	r3,r0
 	l.sfne	r3,r0
 	l.bnf	i_pmd_none
 	l.bnf	i_pmd_none
-	l.andi	r3,r3,0x1fff		// ~PAGE_MASK
-	/*
-	 * if (pmd_bad(*pmd))
-	 *   pmd_clear(pmd)
-	 *   goto pmd_bad:
-	 */
-
-//	l.sfeq	r3,r0			// check *pmd value
-//	l.bf	i_pmd_good
-	l.addi	r3,r0,0xffffe000	// PAGE_MASK
-//	l.j	i_pmd_bad
-//	l.sw	0x0(r4),r0		// clear pmd
+	 l.addi	r3,r0,0xffffe000	// PAGE_MASK
 
 
 i_pmd_good:
 i_pmd_good:
 	/*
 	/*
@@ -1115,35 +1090,36 @@ i_pmd_good:
 	 */
 	 */
 	l.lwz	r4,0x0(r4)		// get **pmd value
 	l.lwz	r4,0x0(r4)		// get **pmd value
 	l.and	r4,r4,r3		// & PAGE_MASK
 	l.and	r4,r4,r3		// & PAGE_MASK
-	l.srli	r5,r2,0xd		// >> PAGE_SHIFT, r2 == EEAR
-	l.andi	r3,r5,0x7ff		// (1UL << PAGE_SHIFT - 2) - 1
+	l.srli	r2,r2,0xd		// >> PAGE_SHIFT, r2 == EEAR
+	l.andi	r3,r2,0x7ff		// (1UL << PAGE_SHIFT - 2) - 1
 	l.slli	r3,r3,0x2		// to get address << 2
 	l.slli	r3,r3,0x2		// to get address << 2
 	l.add	r3,r3,r4
 	l.add	r3,r3,r4
-	l.lwz	r2,0x0(r3)		// this is pte at last
+	l.lwz	r3,0x0(r3)		// this is pte at last
 	/*
 	/*
 	 * if (!pte_present(pte))
 	 * if (!pte_present(pte))
 	 *
 	 *
 	 */
 	 */
-	l.andi	r4,r2,0x1
+	l.andi	r4,r3,0x1
 	l.sfne	r4,r0			// is pte present
 	l.sfne	r4,r0			// is pte present
 	l.bnf	i_pte_not_present
 	l.bnf	i_pte_not_present
-	l.addi	r3,r0,0xffffe03a	// PAGE_MASK | ITLB_UP_CONVERT_MASK
+	 l.addi	r4,r0,0xffffe03a	// PAGE_MASK | ITLB_UP_CONVERT_MASK
 	/*
 	/*
 	 * fill ITLB TR register
 	 * fill ITLB TR register
 	 */
 	 */
-	l.and	r4,r2,r3		// apply the mask
-	l.andi	r3,r2,0x7c0		// _PAGE_EXEC | _PAGE_SRE | _PAGE_SWE |  _PAGE_URE | _PAGE_UWE
-//	l.andi	r3,r2,0x400		// _PAGE_EXEC
+	l.and	r4,r3,r4		// apply the mask
+	l.andi	r3,r3,0x7c0		// _PAGE_EXEC | _PAGE_SRE | _PAGE_SWE |  _PAGE_URE | _PAGE_UWE
 	l.sfeq	r3,r0
 	l.sfeq	r3,r0
 	l.bf	itlb_tr_fill //_workaround
 	l.bf	itlb_tr_fill //_workaround
 	// Determine number of IMMU sets
 	// Determine number of IMMU sets
-	l.mfspr r6, r0, SPR_IMMUCFGR
-	l.andi	r6, r6, SPR_IMMUCFGR_NTS
-	l.srli	r6, r6, SPR_IMMUCFGR_NTS_OFF
+	l.mfspr r2, r0, SPR_IMMUCFGR
+	l.andi	r2, r2, SPR_IMMUCFGR_NTS
+	l.srli	r2, r2, SPR_IMMUCFGR_NTS_OFF
 	l.ori	r3, r0, 0x1
 	l.ori	r3, r0, 0x1
-	l.sll	r3, r3, r6 	// r3 = number IMMU sets IMMUCFGR
-	l.addi	r6, r3, -1  	// r6 = nsets mask
-	l.and	r5, r5, r6	// calc offset:	 & (NUM_TLB_ENTRIES-1)
+	l.sll	r3, r3, r2 	// r3 = number IMMU sets IMMUCFGR
+	l.addi	r2, r3, -1  	// r2 = nsets mask
+	l.mfspr	r3, r0, SPR_EEAR_BASE
+	l.srli	r3, r3, 0xd	// >> PAGE_SHIFT
+	l.and	r2, r3, r2	// calc offset:	 & (NUM_TLB_ENTRIES-1)
 
 
 /*
 /*
  * __PHX__ :: fixme
  * __PHX__ :: fixme
@@ -1155,38 +1131,24 @@ i_pmd_good:
 itlb_tr_fill_workaround:
 itlb_tr_fill_workaround:
 	l.ori	r4,r4,0xc0		// | (SPR_ITLBTR_UXE | ITLBTR_SXE)
 	l.ori	r4,r4,0xc0		// | (SPR_ITLBTR_UXE | ITLBTR_SXE)
 itlb_tr_fill:
 itlb_tr_fill:
-	l.mtspr	r5,r4,SPR_ITLBTR_BASE(0)
+	l.mtspr	r2,r4,SPR_ITLBTR_BASE(0)
 	/*
 	/*
 	 * fill DTLB MR register
 	 * fill DTLB MR register
 	 */
 	 */
-	l.mfspr	r2,r0,SPR_EEAR_BASE
-	l.addi	r3,r0,0xffffe000	// PAGE_MASK
-	l.and	r4,r2,r3		// apply PAGE_MASK to EA (__PHX__ do we really need this?)
-	l.ori	r4,r4,0x1		// set hardware valid bit: DTBL_MR entry
-	l.mtspr	r5,r4,SPR_ITLBMR_BASE(0)
+	l.slli	r3, r3, 0xd		/* << PAGE_SHIFT => EA & PAGE_MASK */
+	l.ori	r4,r3,0x1		// set hardware valid bit: ITBL_MR entry
+	l.mtspr	r2,r4,SPR_ITLBMR_BASE(0)
 
 
 	EXCEPTION_LOAD_GPR2
 	EXCEPTION_LOAD_GPR2
 	EXCEPTION_LOAD_GPR3
 	EXCEPTION_LOAD_GPR3
 	EXCEPTION_LOAD_GPR4
 	EXCEPTION_LOAD_GPR4
-	EXCEPTION_LOAD_GPR5
-	EXCEPTION_LOAD_GPR6
 	l.rfe
 	l.rfe
 
 
-i_pmd_bad:
-	l.nop	1
-	EXCEPTION_LOAD_GPR2
-	EXCEPTION_LOAD_GPR3
-	EXCEPTION_LOAD_GPR4
-	EXCEPTION_LOAD_GPR5
-	EXCEPTION_LOAD_GPR6
-	l.rfe
 i_pmd_none:
 i_pmd_none:
 i_pte_not_present:
 i_pte_not_present:
 	EXCEPTION_LOAD_GPR2
 	EXCEPTION_LOAD_GPR2
 	EXCEPTION_LOAD_GPR3
 	EXCEPTION_LOAD_GPR3
 	EXCEPTION_LOAD_GPR4
 	EXCEPTION_LOAD_GPR4
-	EXCEPTION_LOAD_GPR5
-	EXCEPTION_LOAD_GPR6
 	EXCEPTION_HANDLE(_itlb_miss_page_fault_handler)
 	EXCEPTION_HANDLE(_itlb_miss_page_fault_handler)
 
 
 /* ==============================================[ boot tlb handlers ]=== */
 /* ==============================================[ boot tlb handlers ]=== */
@@ -1571,12 +1533,7 @@ ENTRY(_early_uart_init)
 	l.jr	r9
 	l.jr	r9
 	l.nop
 	l.nop
 
 
-_string_copying_linux:
-	.string "\n\n\n\n\n\rCopying Linux... \0"
-
-_string_ok_booting:
-	.string "Ok, booting the kernel.\n\r\0"
-
+	.section .rodata
 _string_unhandled_exception:
 _string_unhandled_exception:
 	.string "\n\rRunarunaround: Unhandled exception 0x\0"
 	.string "\n\rRunarunaround: Unhandled exception 0x\0"
 
 
@@ -1586,11 +1543,6 @@ _string_epc_prefix:
 _string_nl:
 _string_nl:
 	.string "\n\r\0"
 	.string "\n\r\0"
 
 
-	.global	_string_esr_irq_bug
-_string_esr_irq_bug:
-	.string "\n\rESR external interrupt bug, for details look into entry.S\n\r\0"
-
-
 
 
 /* ========================================[ page aligned structures ]=== */
 /* ========================================[ page aligned structures ]=== */
 
 

+ 1 - 0
arch/openrisc/kernel/or32_ksyms.c

@@ -44,3 +44,4 @@ DECLARE_EXPORT(__ashldi3);
 DECLARE_EXPORT(__lshrdi3);
 DECLARE_EXPORT(__lshrdi3);
 
 
 EXPORT_SYMBOL(__copy_tofrom_user);
 EXPORT_SYMBOL(__copy_tofrom_user);
+EXPORT_SYMBOL(memset);

+ 14 - 0
arch/openrisc/kernel/process.c

@@ -75,6 +75,17 @@ void machine_power_off(void)
 	__asm__("l.nop 1");
 	__asm__("l.nop 1");
 }
 }
 
 
+/*
+ * Send the doze signal to the cpu if available.
+ * Make sure, that all interrupts are enabled
+ */
+void arch_cpu_idle(void)
+{
+	local_irq_enable();
+	if (mfspr(SPR_UPR) & SPR_UPR_PMP)
+		mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
+}
+
 void (*pm_power_off) (void) = machine_power_off;
 void (*pm_power_off) (void) = machine_power_off;
 
 
 /*
 /*
@@ -226,6 +237,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu)
 
 
 extern struct thread_info *_switch(struct thread_info *old_ti,
 extern struct thread_info *_switch(struct thread_info *old_ti,
 				   struct thread_info *new_ti);
 				   struct thread_info *new_ti);
+extern int lwa_flag;
 
 
 struct task_struct *__switch_to(struct task_struct *old,
 struct task_struct *__switch_to(struct task_struct *old,
 				struct task_struct *new)
 				struct task_struct *new)
@@ -243,6 +255,8 @@ struct task_struct *__switch_to(struct task_struct *old,
 	new_ti = new->stack;
 	new_ti = new->stack;
 	old_ti = old->stack;
 	old_ti = old->stack;
 
 
+	lwa_flag = 0;
+
 	current_thread_info_set[smp_processor_id()] = new_ti;
 	current_thread_info_set[smp_processor_id()] = new_ti;
 	last = (_switch(old_ti, new_ti))->task;
 	last = (_switch(old_ti, new_ti))->task;
 
 

+ 0 - 1
arch/openrisc/kernel/ptrace.c

@@ -16,7 +16,6 @@
  *      2 of the License, or (at your option) any later version.
  *      2 of the License, or (at your option) any later version.
  */
  */
 
 
-#include <stddef.h>
 #include <linux/kernel.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/string.h>

+ 36 - 31
arch/openrisc/kernel/setup.c

@@ -117,13 +117,15 @@ static void print_cpuinfo(void)
 	if (upr & SPR_UPR_DCP)
 	if (upr & SPR_UPR_DCP)
 		printk(KERN_INFO
 		printk(KERN_INFO
 		       "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n",
 		       "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n",
-		       cpuinfo.dcache_size, cpuinfo.dcache_block_size, 1);
+		       cpuinfo.dcache_size, cpuinfo.dcache_block_size,
+		       cpuinfo.dcache_ways);
 	else
 	else
 		printk(KERN_INFO "-- dcache disabled\n");
 		printk(KERN_INFO "-- dcache disabled\n");
 	if (upr & SPR_UPR_ICP)
 	if (upr & SPR_UPR_ICP)
 		printk(KERN_INFO
 		printk(KERN_INFO
 		       "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n",
 		       "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n",
-		       cpuinfo.icache_size, cpuinfo.icache_block_size, 1);
+		       cpuinfo.icache_size, cpuinfo.icache_block_size,
+		       cpuinfo.icache_ways);
 	else
 	else
 		printk(KERN_INFO "-- icache disabled\n");
 		printk(KERN_INFO "-- icache disabled\n");
 
 
@@ -155,25 +157,25 @@ void __init setup_cpuinfo(void)
 {
 {
 	struct device_node *cpu;
 	struct device_node *cpu;
 	unsigned long iccfgr, dccfgr;
 	unsigned long iccfgr, dccfgr;
-	unsigned long cache_set_size, cache_ways;
+	unsigned long cache_set_size;
 
 
 	cpu = of_find_compatible_node(NULL, NULL, "opencores,or1200-rtlsvn481");
 	cpu = of_find_compatible_node(NULL, NULL, "opencores,or1200-rtlsvn481");
 	if (!cpu)
 	if (!cpu)
 		panic("No compatible CPU found in device tree...\n");
 		panic("No compatible CPU found in device tree...\n");
 
 
 	iccfgr = mfspr(SPR_ICCFGR);
 	iccfgr = mfspr(SPR_ICCFGR);
-	cache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
+	cpuinfo.icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
 	cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
 	cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
 	cpuinfo.icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
 	cpuinfo.icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
 	cpuinfo.icache_size =
 	cpuinfo.icache_size =
-	    cache_set_size * cache_ways * cpuinfo.icache_block_size;
+	    cache_set_size * cpuinfo.icache_ways * cpuinfo.icache_block_size;
 
 
 	dccfgr = mfspr(SPR_DCCFGR);
 	dccfgr = mfspr(SPR_DCCFGR);
-	cache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
+	cpuinfo.dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
 	cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
 	cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
 	cpuinfo.dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
 	cpuinfo.dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
 	cpuinfo.dcache_size =
 	cpuinfo.dcache_size =
-	    cache_set_size * cache_ways * cpuinfo.dcache_block_size;
+	    cache_set_size * cpuinfo.dcache_ways * cpuinfo.dcache_block_size;
 
 
 	if (of_property_read_u32(cpu, "clock-frequency",
 	if (of_property_read_u32(cpu, "clock-frequency",
 				 &cpuinfo.clock_frequency)) {
 				 &cpuinfo.clock_frequency)) {
@@ -308,30 +310,33 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	revision = vr & SPR_VR_REV;
 	revision = vr & SPR_VR_REV;
 
 
 	seq_printf(m,
 	seq_printf(m,
-		   "cpu\t\t: OpenRISC-%x\n"
-		   "revision\t: %d\n"
-		   "frequency\t: %ld\n"
-		   "dcache size\t: %d bytes\n"
-		   "dcache block size\t: %d bytes\n"
-		   "icache size\t: %d bytes\n"
-		   "icache block size\t: %d bytes\n"
-		   "immu\t\t: %d entries, %lu ways\n"
-		   "dmmu\t\t: %d entries, %lu ways\n"
-		   "bogomips\t: %lu.%02lu\n",
-		   version,
-		   revision,
-		   loops_per_jiffy * HZ,
-		   cpuinfo.dcache_size,
-		   cpuinfo.dcache_block_size,
-		   cpuinfo.icache_size,
-		   cpuinfo.icache_block_size,
-		   1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
-		   1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW),
-		   1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2),
-		   1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW),
-		   (loops_per_jiffy * HZ) / 500000,
-		   ((loops_per_jiffy * HZ) / 5000) % 100);
-
+		  "cpu\t\t: OpenRISC-%x\n"
+		  "revision\t: %d\n"
+		  "frequency\t: %ld\n"
+		  "dcache size\t: %d bytes\n"
+		  "dcache block size\t: %d bytes\n"
+		  "dcache ways\t: %d\n"
+		  "icache size\t: %d bytes\n"
+		  "icache block size\t: %d bytes\n"
+		  "icache ways\t: %d\n"
+		  "immu\t\t: %d entries, %lu ways\n"
+		  "dmmu\t\t: %d entries, %lu ways\n"
+		  "bogomips\t: %lu.%02lu\n",
+		  version,
+		  revision,
+		  loops_per_jiffy * HZ,
+		  cpuinfo.dcache_size,
+		  cpuinfo.dcache_block_size,
+		  cpuinfo.dcache_ways,
+		  cpuinfo.icache_size,
+		  cpuinfo.icache_block_size,
+		  cpuinfo.icache_ways,
+		  1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
+		  1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW),
+		  1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2),
+		  1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW),
+		  (loops_per_jiffy * HZ) / 500000,
+		  ((loops_per_jiffy * HZ) / 5000) % 100);
 	return 0;
 	return 0;
 }
 }
 
 

+ 183 - 0
arch/openrisc/kernel/traps.c

@@ -40,6 +40,8 @@
 extern char _etext, _stext;
 extern char _etext, _stext;
 
 
 int kstack_depth_to_print = 0x180;
 int kstack_depth_to_print = 0x180;
+int lwa_flag;
+unsigned long __user *lwa_addr;
 
 
 static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
 static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
 {
 {
@@ -334,10 +336,191 @@ asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address)
 	}
 	}
 }
 }
 
 
+static inline int in_delay_slot(struct pt_regs *regs)
+{
+#ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX
+	/* No delay slot flag, do the old way */
+	unsigned int op, insn;
+
+	insn = *((unsigned int *)regs->pc);
+	op = insn >> 26;
+	switch (op) {
+	case 0x00: /* l.j */
+	case 0x01: /* l.jal */
+	case 0x03: /* l.bnf */
+	case 0x04: /* l.bf */
+	case 0x11: /* l.jr */
+	case 0x12: /* l.jalr */
+		return 1;
+	default:
+		return 0;
+	}
+#else
+	return regs->sr & SPR_SR_DSX;
+#endif
+}
+
+static inline void adjust_pc(struct pt_regs *regs, unsigned long address)
+{
+	int displacement;
+	unsigned int rb, op, jmp;
+
+	if (unlikely(in_delay_slot(regs))) {
+		/* In delay slot, instruction at pc is a branch, simulate it */
+		jmp = *((unsigned int *)regs->pc);
+
+		displacement = sign_extend32(((jmp) & 0x3ffffff) << 2, 27);
+		rb = (jmp & 0x0000ffff) >> 11;
+		op = jmp >> 26;
+
+		switch (op) {
+		case 0x00: /* l.j */
+			regs->pc += displacement;
+			return;
+		case 0x01: /* l.jal */
+			regs->pc += displacement;
+			regs->gpr[9] = regs->pc + 8;
+			return;
+		case 0x03: /* l.bnf */
+			if (regs->sr & SPR_SR_F)
+				regs->pc += 8;
+			else
+				regs->pc += displacement;
+			return;
+		case 0x04: /* l.bf */
+			if (regs->sr & SPR_SR_F)
+				regs->pc += displacement;
+			else
+				regs->pc += 8;
+			return;
+		case 0x11: /* l.jr */
+			regs->pc = regs->gpr[rb];
+			return;
+		case 0x12: /* l.jalr */
+			regs->pc = regs->gpr[rb];
+			regs->gpr[9] = regs->pc + 8;
+			return;
+		default:
+			break;
+		}
+	} else {
+		regs->pc += 4;
+	}
+}
+
+static inline void simulate_lwa(struct pt_regs *regs, unsigned long address,
+				unsigned int insn)
+{
+	unsigned int ra, rd;
+	unsigned long value;
+	unsigned long orig_pc;
+	long imm;
+
+	const struct exception_table_entry *entry;
+
+	orig_pc = regs->pc;
+	adjust_pc(regs, address);
+
+	ra = (insn >> 16) & 0x1f;
+	rd = (insn >> 21) & 0x1f;
+	imm = (short)insn;
+	lwa_addr = (unsigned long __user *)(regs->gpr[ra] + imm);
+
+	if ((unsigned long)lwa_addr & 0x3) {
+		do_unaligned_access(regs, address);
+		return;
+	}
+
+	if (get_user(value, lwa_addr)) {
+		if (user_mode(regs)) {
+			force_sig(SIGSEGV, current);
+			return;
+		}
+
+		if ((entry = search_exception_tables(orig_pc))) {
+			regs->pc = entry->fixup;
+			return;
+		}
+
+		/* kernel access in kernel space, load it directly */
+		value = *((unsigned long *)lwa_addr);
+	}
+
+	lwa_flag = 1;
+	regs->gpr[rd] = value;
+}
+
+static inline void simulate_swa(struct pt_regs *regs, unsigned long address,
+				unsigned int insn)
+{
+	unsigned long __user *vaddr;
+	unsigned long orig_pc;
+	unsigned int ra, rb;
+	long imm;
+
+	const struct exception_table_entry *entry;
+
+	orig_pc = regs->pc;
+	adjust_pc(regs, address);
+
+	ra = (insn >> 16) & 0x1f;
+	rb = (insn >> 11) & 0x1f;
+	imm = (short)(((insn & 0x2200000) >> 10) | (insn & 0x7ff));
+	vaddr = (unsigned long __user *)(regs->gpr[ra] + imm);
+
+	if (!lwa_flag || vaddr != lwa_addr) {
+		regs->sr &= ~SPR_SR_F;
+		return;
+	}
+
+	if ((unsigned long)vaddr & 0x3) {
+		do_unaligned_access(regs, address);
+		return;
+	}
+
+	if (put_user(regs->gpr[rb], vaddr)) {
+		if (user_mode(regs)) {
+			force_sig(SIGSEGV, current);
+			return;
+		}
+
+		if ((entry = search_exception_tables(orig_pc))) {
+			regs->pc = entry->fixup;
+			return;
+		}
+
+		/* kernel access in kernel space, store it directly */
+		*((unsigned long *)vaddr) = regs->gpr[rb];
+	}
+
+	lwa_flag = 0;
+	regs->sr |= SPR_SR_F;
+}
+
+#define INSN_LWA	0x1b
+#define INSN_SWA	0x33
+
 asmlinkage void do_illegal_instruction(struct pt_regs *regs,
 asmlinkage void do_illegal_instruction(struct pt_regs *regs,
 				       unsigned long address)
 				       unsigned long address)
 {
 {
 	siginfo_t info;
 	siginfo_t info;
+	unsigned int op;
+	unsigned int insn = *((unsigned int *)address);
+
+	op = insn >> 26;
+
+	switch (op) {
+	case INSN_LWA:
+		simulate_lwa(regs, address, insn);
+		return;
+
+	case INSN_SWA:
+		simulate_swa(regs, address, insn);
+		return;
+
+	default:
+		break;
+	}
 
 
 	if (user_mode(regs)) {
 	if (user_mode(regs)) {
 		/* Send a SIGILL */
 		/* Send a SIGILL */

+ 1 - 1
arch/openrisc/lib/Makefile

@@ -2,4 +2,4 @@
 # Makefile for or32 specific library files..
 # Makefile for or32 specific library files..
 #
 #
 
 
-obj-y  = string.o delay.o
+obj-y	:= delay.o string.o memset.o memcpy.o

+ 124 - 0
arch/openrisc/lib/memcpy.c

@@ -0,0 +1,124 @@
+/*
+ * arch/openrisc/lib/memcpy.c
+ *
+ * Optimized memory copy routines for openrisc.  These are mostly copied
+ * from ohter sources but slightly entended based on ideas discuassed in
+ * #openrisc.
+ *
+ * The word unroll implementation is an extension to the arm byte
+ * unrolled implementation, but using word copies (if things are
+ * properly aligned)
+ *
+ * The great arm loop unroll algorithm can be found at:
+ *  arch/arm/boot/compressed/string.c
+ */
+
+#include <linux/export.h>
+
+#include <linux/string.h>
+
+#ifdef CONFIG_OR1K_1200
+/*
+ * Do memcpy with word copies and loop unrolling. This gives the
+ * best performance on the OR1200 and MOR1KX archirectures
+ */
+void *memcpy(void *dest, __const void *src, __kernel_size_t n)
+{
+	int i = 0;
+	unsigned char *d, *s;
+	uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src;
+
+	/* If both source and dest are word aligned copy words */
+	if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) {
+		/* Copy 32 bytes per loop */
+		for (i = n >> 5; i > 0; i--) {
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+		}
+
+		if (n & 1 << 4) {
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+		}
+
+		if (n & 1 << 3) {
+			*dest_w++ = *src_w++;
+			*dest_w++ = *src_w++;
+		}
+
+		if (n & 1 << 2)
+			*dest_w++ = *src_w++;
+
+		d = (unsigned char *)dest_w;
+		s = (unsigned char *)src_w;
+
+	} else {
+		d = (unsigned char *)dest_w;
+		s = (unsigned char *)src_w;
+
+		for (i = n >> 3; i > 0; i--) {
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+		}
+
+		if (n & 1 << 2) {
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+			*d++ = *s++;
+		}
+	}
+
+	if (n & 1 << 1) {
+		*d++ = *s++;
+		*d++ = *s++;
+	}
+
+	if (n & 1)
+		*d++ = *s++;
+
+	return dest;
+}
+#else
+/*
+ * Use word copies but no loop unrolling as we cannot assume there
+ * will be benefits on the archirecture
+ */
+void *memcpy(void *dest, __const void *src, __kernel_size_t n)
+{
+	unsigned char *d = (unsigned char *)dest, *s = (unsigned char *)src;
+	uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src;
+
+	/* If both source and dest are word aligned copy words */
+	if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) {
+		for (; n >= 4; n -= 4)
+			*dest_w++ = *src_w++;
+	}
+
+	d = (unsigned char *)dest_w;
+	s = (unsigned char *)src_w;
+
+	/* For remaining or if not aligned, copy bytes */
+	for (; n >= 1; n -= 1)
+		*d++ = *s++;
+
+	return dest;
+
+}
+#endif
+
+EXPORT_SYMBOL(memcpy);

+ 98 - 0
arch/openrisc/lib/memset.S

@@ -0,0 +1,98 @@
+/*
+ * OpenRISC memset.S
+ *
+ * Hand-optimized assembler version of memset for OpenRISC.
+ * Algorithm inspired by several other arch-specific memset routines
+ * in the kernel tree
+ *
+ * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+	.global memset
+	.type	memset, @function
+memset:
+	/* arguments:
+	 * r3 = *s
+	 * r4 = c
+	 * r5 = n
+	 * r13, r15, r17, r19 used as temp regs
+	*/
+
+	/* Exit if n == 0 */
+	l.sfeqi		r5, 0
+	l.bf		4f
+
+	/* Truncate c to char */
+	l.andi  	r13, r4, 0xff
+
+	/* Skip word extension if c is 0 */
+	l.sfeqi		r13, 0
+	l.bf		1f
+	/* Check for at least two whole words (8 bytes) */
+	 l.sfleui	r5, 7
+
+	/* Extend char c to 32-bit word cccc in r13 */
+	l.slli		r15, r13, 16  // r13 = 000c, r15 = 0c00
+	l.or		r13, r13, r15 // r13 = 0c0c, r15 = 0c00
+	l.slli		r15, r13, 8   // r13 = 0c0c, r15 = c0c0
+	l.or		r13, r13, r15 // r13 = cccc, r15 = c0c0
+
+1:	l.addi		r19, r3, 0 // Set r19 = src
+	/* Jump to byte copy loop if less than two words */
+	l.bf		3f
+	 l.or		r17, r5, r0 // Set r17 = n
+
+	/* Mask out two LSBs to check alignment */
+	l.andi		r15, r3, 0x3
+
+	/* lsb == 00, jump to word copy loop */
+	l.sfeqi		r15, 0
+	l.bf		2f
+	 l.addi		r19, r3, 0 // Set r19 = src
+
+	/* lsb == 01,10 or 11 */
+	l.sb		0(r3), r13   // *src = c
+	l.addi		r17, r17, -1 // Decrease n
+
+	l.sfeqi		r15, 3
+	l.bf		2f
+	 l.addi		r19, r3, 1  // src += 1
+
+	/* lsb == 01 or 10 */
+	l.sb		1(r3), r13   // *(src+1) = c
+	l.addi		r17, r17, -1 // Decrease n
+
+	l.sfeqi		r15, 2
+	l.bf		2f
+	 l.addi		r19, r3, 2  // src += 2
+
+	/* lsb == 01 */
+	l.sb		2(r3), r13   // *(src+2) = c
+	l.addi		r17, r17, -1 // Decrease n
+	l.addi		r19, r3, 3   // src += 3
+
+	/* Word copy loop */
+2:	l.sw		0(r19), r13  // *src = cccc
+	l.addi		r17, r17, -4 // Decrease n
+	l.sfgeui	r17, 4
+	l.bf		2b
+	 l.addi		r19, r19, 4  // Increase src
+
+	/* When n > 0, copy the remaining bytes, otherwise jump to exit */
+	l.sfeqi		r17, 0
+	l.bf		4f
+
+	/* Byte copy loop */
+3:	l.addi		r17, r17, -1 // Decrease n
+	l.sb		0(r19), r13  // *src = cccc
+	l.sfnei		r17, 0
+	l.bf		3b
+	 l.addi		r19, r19, 1  // Increase src
+
+4:	l.jr		r9
+	 l.ori		r11, r3, 0

+ 2 - 0
arch/openrisc/mm/ioremap.c

@@ -80,6 +80,7 @@ __ioremap(phys_addr_t addr, unsigned long size, pgprot_t prot)
 
 
 	return (void __iomem *)(offset + (char *)v);
 	return (void __iomem *)(offset + (char *)v);
 }
 }
+EXPORT_SYMBOL(__ioremap);
 
 
 void iounmap(void *addr)
 void iounmap(void *addr)
 {
 {
@@ -106,6 +107,7 @@ void iounmap(void *addr)
 
 
 	return vfree((void *)(PAGE_MASK & (unsigned long)addr));
 	return vfree((void *)(PAGE_MASK & (unsigned long)addr));
 }
 }
+EXPORT_SYMBOL(iounmap);
 
 
 /**
 /**
  * OK, this one's a bit tricky... ioremap can get called before memory is
  * OK, this one's a bit tricky... ioremap can get called before memory is

+ 2 - 0
include/asm-generic/atomic.h

@@ -223,6 +223,7 @@ static inline void atomic_dec(atomic_t *v)
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_xchg(ptr, v)		(xchg(&(ptr)->counter, (v)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 #define atomic_cmpxchg(v, old, new)	(cmpxchg(&((v)->counter), (old), (new)))
 
 
+#ifndef __atomic_add_unless
 static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 {
 {
 	int c, old;
 	int c, old;
@@ -231,5 +232,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 		c = old;
 		c = old;
 	return c;
 	return c;
 }
 }
+#endif
 
 
 #endif /* __ASM_GENERIC_ATOMIC_H */
 #endif /* __ASM_GENERIC_ATOMIC_H */

+ 3 - 0
scripts/checkstack.pl

@@ -81,6 +81,9 @@ my (@stack, $re, $dre, $x, $xs, $funcre);
 	} elsif ($arch eq 'nios2') {
 	} elsif ($arch eq 'nios2') {
 		#25a8:	defffb04 	addi	sp,sp,-20
 		#25a8:	defffb04 	addi	sp,sp,-20
 		$re = qr/.*addi.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o;
 		$re = qr/.*addi.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o;
+	} elsif ($arch eq 'openrisc') {
+		# c000043c:       9c 21 fe f0     l.addi r1,r1,-272
+		$re = qr/.*l\.addi.*r1,r1,-(([0-9]{2}|[3-9])[0-9]{2})/o;
 	} elsif ($arch eq 'parisc' || $arch eq 'parisc64') {
 	} elsif ($arch eq 'parisc' || $arch eq 'parisc64') {
 		$re = qr/.*ldo ($x{1,8})\(sp\),sp/o;
 		$re = qr/.*ldo ($x{1,8})\(sp\),sp/o;
 	} elsif ($arch eq 'ppc') {
 	} elsif ($arch eq 'ppc') {