Эх сурвалжийг харах

powerpc/32: Few optimisations in memcpy

This patch adds a few optimisations in memcpy functions by using
lbzu/stbu instead of lxb/stb and by re-ordering insn inside a loop
to reduce latency due to loading

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: Scott Wood <scottwood@freescale.com>
LEROY Christophe 10 жил өмнө
parent
commit
295ffb4189

+ 5 - 5
arch/powerpc/lib/copy_32.S

@@ -155,9 +155,9 @@ _GLOBAL(memcpy)
 	mtctr	r8
 	beq+	61f
 70:	lbz	r9,4(r4)		/* do some bytes */
-	stb	r9,4(r6)
 	addi	r4,r4,1
 	addi	r6,r6,1
+	stb	r9,3(r6)
 	bdnz	70b
 61:	srwi.	r0,r0,2
 	mtctr	r0
@@ -199,10 +199,10 @@ _GLOBAL(memcpy)
 64:	andi.	r0,r5,3
 	mtctr	r0
 	beq+	65f
-40:	lbz	r0,4(r4)
-	stb	r0,4(r6)
-	addi	r4,r4,1
-	addi	r6,r6,1
+	addi	r4,r4,3
+	addi	r6,r6,3
+40:	lbzu	r0,1(r4)
+	stbu	r0,1(r6)
 	bdnz	40b
 65:	blr