|
@@ -610,20 +610,25 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
|
vmovdqu y6, 14 * 32(rio); \
|
|
vmovdqu y6, 14 * 32(rio); \
|
|
vmovdqu y7, 15 * 32(rio);
|
|
vmovdqu y7, 15 * 32(rio);
|
|
|
|
|
|
-.data
|
|
|
|
-.align 32
|
|
|
|
|
|
|
|
|
|
+.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32
|
|
|
|
+.align 32
|
|
#define SHUFB_BYTES(idx) \
|
|
#define SHUFB_BYTES(idx) \
|
|
0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
|
|
0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
|
|
-
|
|
|
|
.Lshufb_16x16b:
|
|
.Lshufb_16x16b:
|
|
.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
|
|
.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
|
|
.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
|
|
.byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
|
|
|
|
|
|
|
|
+.section .rodata.cst32.pack_bswap, "aM", @progbits, 32
|
|
|
|
+.align 32
|
|
.Lpack_bswap:
|
|
.Lpack_bswap:
|
|
.long 0x00010203, 0x04050607, 0x80808080, 0x80808080
|
|
.long 0x00010203, 0x04050607, 0x80808080, 0x80808080
|
|
.long 0x00010203, 0x04050607, 0x80808080, 0x80808080
|
|
.long 0x00010203, 0x04050607, 0x80808080, 0x80808080
|
|
|
|
|
|
|
|
+/* NB: section is mergeable, all elements must be aligned 16-byte blocks */
|
|
|
|
+.section .rodata.cst16, "aM", @progbits, 16
|
|
|
|
+.align 16
|
|
|
|
+
|
|
/* For CTR-mode IV byteswap */
|
|
/* For CTR-mode IV byteswap */
|
|
.Lbswap128_mask:
|
|
.Lbswap128_mask:
|
|
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
|
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
|
@@ -750,6 +755,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
|
|
.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
|
|
.byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
|
|
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
|
|
.byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
|
|
|
|
|
|
|
|
+.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4
|
|
.align 4
|
|
.align 4
|
|
/* 4-bit mask */
|
|
/* 4-bit mask */
|
|
.L0f0f0f0f:
|
|
.L0f0f0f0f:
|