|
@@ -76,10 +76,10 @@
|
|
|
LOAD _t1, (offset + UNIT(1))(src); \
|
|
|
LOAD _t2, (offset + UNIT(2))(src); \
|
|
|
LOAD _t3, (offset + UNIT(3))(src); \
|
|
|
+ ADDC(_t0, _t1); \
|
|
|
+ ADDC(_t2, _t3); \
|
|
|
ADDC(sum, _t0); \
|
|
|
- ADDC(sum, _t1); \
|
|
|
- ADDC(sum, _t2); \
|
|
|
- ADDC(sum, _t3)
|
|
|
+ ADDC(sum, _t2)
|
|
|
|
|
|
#ifdef USE_DOUBLE
|
|
|
#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
|
|
@@ -504,21 +504,21 @@ LEAF(csum_partial)
|
|
|
SUB len, len, 8*NBYTES
|
|
|
ADD src, src, 8*NBYTES
|
|
|
STORE(t0, UNIT(0)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t0)
|
|
|
+ ADDC(t0, t1)
|
|
|
STORE(t1, UNIT(1)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t1)
|
|
|
+ ADDC(sum, t0)
|
|
|
STORE(t2, UNIT(2)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t2)
|
|
|
+ ADDC(t2, t3)
|
|
|
STORE(t3, UNIT(3)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t3)
|
|
|
+ ADDC(sum, t2)
|
|
|
STORE(t4, UNIT(4)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t4)
|
|
|
+ ADDC(t4, t5)
|
|
|
STORE(t5, UNIT(5)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t5)
|
|
|
+ ADDC(sum, t4)
|
|
|
STORE(t6, UNIT(6)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t6)
|
|
|
+ ADDC(t6, t7)
|
|
|
STORE(t7, UNIT(7)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t7)
|
|
|
+ ADDC(sum, t6)
|
|
|
.set reorder /* DADDI_WAR */
|
|
|
ADD dst, dst, 8*NBYTES
|
|
|
bgez len, 1b
|
|
@@ -544,13 +544,13 @@ LEAF(csum_partial)
|
|
|
SUB len, len, 4*NBYTES
|
|
|
ADD src, src, 4*NBYTES
|
|
|
STORE(t0, UNIT(0)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t0)
|
|
|
+ ADDC(t0, t1)
|
|
|
STORE(t1, UNIT(1)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t1)
|
|
|
+ ADDC(sum, t0)
|
|
|
STORE(t2, UNIT(2)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t2)
|
|
|
+ ADDC(t2, t3)
|
|
|
STORE(t3, UNIT(3)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t3)
|
|
|
+ ADDC(sum, t2)
|
|
|
.set reorder /* DADDI_WAR */
|
|
|
ADD dst, dst, 4*NBYTES
|
|
|
beqz len, .Ldone\@
|
|
@@ -649,13 +649,13 @@ LEAF(csum_partial)
|
|
|
nop # improves slotting
|
|
|
#endif
|
|
|
STORE(t0, UNIT(0)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t0)
|
|
|
+ ADDC(t0, t1)
|
|
|
STORE(t1, UNIT(1)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t1)
|
|
|
+ ADDC(sum, t0)
|
|
|
STORE(t2, UNIT(2)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t2)
|
|
|
+ ADDC(t2, t3)
|
|
|
STORE(t3, UNIT(3)(dst), .Ls_exc\@)
|
|
|
- ADDC(sum, t3)
|
|
|
+ ADDC(sum, t2)
|
|
|
.set reorder /* DADDI_WAR */
|
|
|
ADD dst, dst, 4*NBYTES
|
|
|
bne len, rem, 1b
|