|
@@ -74,9 +74,9 @@ _GLOBAL(__csum_partial)
|
|
ld r11,24(r3)
|
|
ld r11,24(r3)
|
|
|
|
|
|
/*
|
|
/*
|
|
- * On POWER6 and POWER7 back to back addes take 2 cycles because of
|
|
|
|
- * the XER dependency. This means the fastest this loop can go is
|
|
|
|
- * 16 cycles per iteration. The scheduling of the loop below has
|
|
|
|
|
|
+ * On POWER6 and POWER7 back to back adde instructions take 2 cycles
|
|
|
|
+ * because of the XER dependency. This means the fastest this loop can
|
|
|
|
+ * go is 16 cycles per iteration. The scheduling of the loop below has
|
|
* been shown to hit this on both POWER6 and POWER7.
|
|
* been shown to hit this on both POWER6 and POWER7.
|
|
*/
|
|
*/
|
|
.align 5
|
|
.align 5
|
|
@@ -275,9 +275,9 @@ source; ld r10,16(r3)
|
|
source; ld r11,24(r3)
|
|
source; ld r11,24(r3)
|
|
|
|
|
|
/*
|
|
/*
|
|
- * On POWER6 and POWER7 back to back addes take 2 cycles because of
|
|
|
|
- * the XER dependency. This means the fastest this loop can go is
|
|
|
|
- * 16 cycles per iteration. The scheduling of the loop below has
|
|
|
|
|
|
+ * On POWER6 and POWER7 back to back adde instructions take 2 cycles
|
|
|
|
+ * because of the XER dependency. This means the fastest this loop can
|
|
|
|
+ * go is 16 cycles per iteration. The scheduling of the loop below has
|
|
* been shown to hit this on both POWER6 and POWER7.
|
|
* been shown to hit this on both POWER6 and POWER7.
|
|
*/
|
|
*/
|
|
.align 5
|
|
.align 5
|