|
@@ -315,13 +315,13 @@ err1; stb r0,0(r3)
|
|
|
.machine push
|
|
|
.machine "power4"
|
|
|
/* setup read stream 0 */
|
|
|
- dcbt r0,r6,0b01000 /* addr from */
|
|
|
- dcbt r0,r7,0b01010 /* length and depth from */
|
|
|
+ dcbt 0,r6,0b01000 /* addr from */
|
|
|
+ dcbt 0,r7,0b01010 /* length and depth from */
|
|
|
/* setup write stream 1 */
|
|
|
- dcbtst r0,r9,0b01000 /* addr to */
|
|
|
- dcbtst r0,r10,0b01010 /* length and depth to */
|
|
|
+ dcbtst 0,r9,0b01000 /* addr to */
|
|
|
+ dcbtst 0,r10,0b01010 /* length and depth to */
|
|
|
eieio
|
|
|
- dcbt r0,r8,0b01010 /* all streams GO */
|
|
|
+ dcbt 0,r8,0b01010 /* all streams GO */
|
|
|
.machine pop
|
|
|
|
|
|
beq cr1,.Lunwind_stack_nonvmx_copy
|
|
@@ -376,26 +376,26 @@ err3; std r0,0(r3)
|
|
|
li r11,48
|
|
|
|
|
|
bf cr7*4+3,5f
|
|
|
-err3; lvx v1,r0,r4
|
|
|
+err3; lvx v1,0,r4
|
|
|
addi r4,r4,16
|
|
|
-err3; stvx v1,r0,r3
|
|
|
+err3; stvx v1,0,r3
|
|
|
addi r3,r3,16
|
|
|
|
|
|
5: bf cr7*4+2,6f
|
|
|
-err3; lvx v1,r0,r4
|
|
|
+err3; lvx v1,0,r4
|
|
|
err3; lvx v0,r4,r9
|
|
|
addi r4,r4,32
|
|
|
-err3; stvx v1,r0,r3
|
|
|
+err3; stvx v1,0,r3
|
|
|
err3; stvx v0,r3,r9
|
|
|
addi r3,r3,32
|
|
|
|
|
|
6: bf cr7*4+1,7f
|
|
|
-err3; lvx v3,r0,r4
|
|
|
+err3; lvx v3,0,r4
|
|
|
err3; lvx v2,r4,r9
|
|
|
err3; lvx v1,r4,r10
|
|
|
err3; lvx v0,r4,r11
|
|
|
addi r4,r4,64
|
|
|
-err3; stvx v3,r0,r3
|
|
|
+err3; stvx v3,0,r3
|
|
|
err3; stvx v2,r3,r9
|
|
|
err3; stvx v1,r3,r10
|
|
|
err3; stvx v0,r3,r11
|
|
@@ -421,7 +421,7 @@ err3; stvx v0,r3,r11
|
|
|
*/
|
|
|
.align 5
|
|
|
8:
|
|
|
-err4; lvx v7,r0,r4
|
|
|
+err4; lvx v7,0,r4
|
|
|
err4; lvx v6,r4,r9
|
|
|
err4; lvx v5,r4,r10
|
|
|
err4; lvx v4,r4,r11
|
|
@@ -430,7 +430,7 @@ err4; lvx v2,r4,r14
|
|
|
err4; lvx v1,r4,r15
|
|
|
err4; lvx v0,r4,r16
|
|
|
addi r4,r4,128
|
|
|
-err4; stvx v7,r0,r3
|
|
|
+err4; stvx v7,0,r3
|
|
|
err4; stvx v6,r3,r9
|
|
|
err4; stvx v5,r3,r10
|
|
|
err4; stvx v4,r3,r11
|
|
@@ -451,29 +451,29 @@ err4; stvx v0,r3,r16
|
|
|
mtocrf 0x01,r6
|
|
|
|
|
|
bf cr7*4+1,9f
|
|
|
-err3; lvx v3,r0,r4
|
|
|
+err3; lvx v3,0,r4
|
|
|
err3; lvx v2,r4,r9
|
|
|
err3; lvx v1,r4,r10
|
|
|
err3; lvx v0,r4,r11
|
|
|
addi r4,r4,64
|
|
|
-err3; stvx v3,r0,r3
|
|
|
+err3; stvx v3,0,r3
|
|
|
err3; stvx v2,r3,r9
|
|
|
err3; stvx v1,r3,r10
|
|
|
err3; stvx v0,r3,r11
|
|
|
addi r3,r3,64
|
|
|
|
|
|
9: bf cr7*4+2,10f
|
|
|
-err3; lvx v1,r0,r4
|
|
|
+err3; lvx v1,0,r4
|
|
|
err3; lvx v0,r4,r9
|
|
|
addi r4,r4,32
|
|
|
-err3; stvx v1,r0,r3
|
|
|
+err3; stvx v1,0,r3
|
|
|
err3; stvx v0,r3,r9
|
|
|
addi r3,r3,32
|
|
|
|
|
|
10: bf cr7*4+3,11f
|
|
|
-err3; lvx v1,r0,r4
|
|
|
+err3; lvx v1,0,r4
|
|
|
addi r4,r4,16
|
|
|
-err3; stvx v1,r0,r3
|
|
|
+err3; stvx v1,0,r3
|
|
|
addi r3,r3,16
|
|
|
|
|
|
/* Up to 15B to go */
|
|
@@ -553,25 +553,25 @@ err3; lvx v0,0,r4
|
|
|
addi r4,r4,16
|
|
|
|
|
|
bf cr7*4+3,5f
|
|
|
-err3; lvx v1,r0,r4
|
|
|
+err3; lvx v1,0,r4
|
|
|
VPERM(v8,v0,v1,v16)
|
|
|
addi r4,r4,16
|
|
|
-err3; stvx v8,r0,r3
|
|
|
+err3; stvx v8,0,r3
|
|
|
addi r3,r3,16
|
|
|
vor v0,v1,v1
|
|
|
|
|
|
5: bf cr7*4+2,6f
|
|
|
-err3; lvx v1,r0,r4
|
|
|
+err3; lvx v1,0,r4
|
|
|
VPERM(v8,v0,v1,v16)
|
|
|
err3; lvx v0,r4,r9
|
|
|
VPERM(v9,v1,v0,v16)
|
|
|
addi r4,r4,32
|
|
|
-err3; stvx v8,r0,r3
|
|
|
+err3; stvx v8,0,r3
|
|
|
err3; stvx v9,r3,r9
|
|
|
addi r3,r3,32
|
|
|
|
|
|
6: bf cr7*4+1,7f
|
|
|
-err3; lvx v3,r0,r4
|
|
|
+err3; lvx v3,0,r4
|
|
|
VPERM(v8,v0,v3,v16)
|
|
|
err3; lvx v2,r4,r9
|
|
|
VPERM(v9,v3,v2,v16)
|
|
@@ -580,7 +580,7 @@ err3; lvx v1,r4,r10
|
|
|
err3; lvx v0,r4,r11
|
|
|
VPERM(v11,v1,v0,v16)
|
|
|
addi r4,r4,64
|
|
|
-err3; stvx v8,r0,r3
|
|
|
+err3; stvx v8,0,r3
|
|
|
err3; stvx v9,r3,r9
|
|
|
err3; stvx v10,r3,r10
|
|
|
err3; stvx v11,r3,r11
|
|
@@ -606,7 +606,7 @@ err3; stvx v11,r3,r11
|
|
|
*/
|
|
|
.align 5
|
|
|
8:
|
|
|
-err4; lvx v7,r0,r4
|
|
|
+err4; lvx v7,0,r4
|
|
|
VPERM(v8,v0,v7,v16)
|
|
|
err4; lvx v6,r4,r9
|
|
|
VPERM(v9,v7,v6,v16)
|
|
@@ -623,7 +623,7 @@ err4; lvx v1,r4,r15
|
|
|
err4; lvx v0,r4,r16
|
|
|
VPERM(v15,v1,v0,v16)
|
|
|
addi r4,r4,128
|
|
|
-err4; stvx v8,r0,r3
|
|
|
+err4; stvx v8,0,r3
|
|
|
err4; stvx v9,r3,r9
|
|
|
err4; stvx v10,r3,r10
|
|
|
err4; stvx v11,r3,r11
|
|
@@ -644,7 +644,7 @@ err4; stvx v15,r3,r16
|
|
|
mtocrf 0x01,r6
|
|
|
|
|
|
bf cr7*4+1,9f
|
|
|
-err3; lvx v3,r0,r4
|
|
|
+err3; lvx v3,0,r4
|
|
|
VPERM(v8,v0,v3,v16)
|
|
|
err3; lvx v2,r4,r9
|
|
|
VPERM(v9,v3,v2,v16)
|
|
@@ -653,27 +653,27 @@ err3; lvx v1,r4,r10
|
|
|
err3; lvx v0,r4,r11
|
|
|
VPERM(v11,v1,v0,v16)
|
|
|
addi r4,r4,64
|
|
|
-err3; stvx v8,r0,r3
|
|
|
+err3; stvx v8,0,r3
|
|
|
err3; stvx v9,r3,r9
|
|
|
err3; stvx v10,r3,r10
|
|
|
err3; stvx v11,r3,r11
|
|
|
addi r3,r3,64
|
|
|
|
|
|
9: bf cr7*4+2,10f
|
|
|
-err3; lvx v1,r0,r4
|
|
|
+err3; lvx v1,0,r4
|
|
|
VPERM(v8,v0,v1,v16)
|
|
|
err3; lvx v0,r4,r9
|
|
|
VPERM(v9,v1,v0,v16)
|
|
|
addi r4,r4,32
|
|
|
-err3; stvx v8,r0,r3
|
|
|
+err3; stvx v8,0,r3
|
|
|
err3; stvx v9,r3,r9
|
|
|
addi r3,r3,32
|
|
|
|
|
|
10: bf cr7*4+3,11f
|
|
|
-err3; lvx v1,r0,r4
|
|
|
+err3; lvx v1,0,r4
|
|
|
VPERM(v8,v0,v1,v16)
|
|
|
addi r4,r4,16
|
|
|
-err3; stvx v8,r0,r3
|
|
|
+err3; stvx v8,0,r3
|
|
|
addi r3,r3,16
|
|
|
|
|
|
/* Up to 15B to go */
|