|
@@ -169,19 +169,19 @@ ENTRY(ce_aes_ecb_encrypt)
|
|
.Lecbencloop3x:
|
|
.Lecbencloop3x:
|
|
subs r4, r4, #3
|
|
subs r4, r4, #3
|
|
bmi .Lecbenc1x
|
|
bmi .Lecbenc1x
|
|
- vld1.8 {q0-q1}, [r1, :64]!
|
|
|
|
- vld1.8 {q2}, [r1, :64]!
|
|
|
|
|
|
+ vld1.8 {q0-q1}, [r1]!
|
|
|
|
+ vld1.8 {q2}, [r1]!
|
|
bl aes_encrypt_3x
|
|
bl aes_encrypt_3x
|
|
- vst1.8 {q0-q1}, [r0, :64]!
|
|
|
|
- vst1.8 {q2}, [r0, :64]!
|
|
|
|
|
|
+ vst1.8 {q0-q1}, [r0]!
|
|
|
|
+ vst1.8 {q2}, [r0]!
|
|
b .Lecbencloop3x
|
|
b .Lecbencloop3x
|
|
.Lecbenc1x:
|
|
.Lecbenc1x:
|
|
adds r4, r4, #3
|
|
adds r4, r4, #3
|
|
beq .Lecbencout
|
|
beq .Lecbencout
|
|
.Lecbencloop:
|
|
.Lecbencloop:
|
|
- vld1.8 {q0}, [r1, :64]!
|
|
|
|
|
|
+ vld1.8 {q0}, [r1]!
|
|
bl aes_encrypt
|
|
bl aes_encrypt
|
|
- vst1.8 {q0}, [r0, :64]!
|
|
|
|
|
|
+ vst1.8 {q0}, [r0]!
|
|
subs r4, r4, #1
|
|
subs r4, r4, #1
|
|
bne .Lecbencloop
|
|
bne .Lecbencloop
|
|
.Lecbencout:
|
|
.Lecbencout:
|
|
@@ -195,19 +195,19 @@ ENTRY(ce_aes_ecb_decrypt)
|
|
.Lecbdecloop3x:
|
|
.Lecbdecloop3x:
|
|
subs r4, r4, #3
|
|
subs r4, r4, #3
|
|
bmi .Lecbdec1x
|
|
bmi .Lecbdec1x
|
|
- vld1.8 {q0-q1}, [r1, :64]!
|
|
|
|
- vld1.8 {q2}, [r1, :64]!
|
|
|
|
|
|
+ vld1.8 {q0-q1}, [r1]!
|
|
|
|
+ vld1.8 {q2}, [r1]!
|
|
bl aes_decrypt_3x
|
|
bl aes_decrypt_3x
|
|
- vst1.8 {q0-q1}, [r0, :64]!
|
|
|
|
- vst1.8 {q2}, [r0, :64]!
|
|
|
|
|
|
+ vst1.8 {q0-q1}, [r0]!
|
|
|
|
+ vst1.8 {q2}, [r0]!
|
|
b .Lecbdecloop3x
|
|
b .Lecbdecloop3x
|
|
.Lecbdec1x:
|
|
.Lecbdec1x:
|
|
adds r4, r4, #3
|
|
adds r4, r4, #3
|
|
beq .Lecbdecout
|
|
beq .Lecbdecout
|
|
.Lecbdecloop:
|
|
.Lecbdecloop:
|
|
- vld1.8 {q0}, [r1, :64]!
|
|
|
|
|
|
+ vld1.8 {q0}, [r1]!
|
|
bl aes_decrypt
|
|
bl aes_decrypt
|
|
- vst1.8 {q0}, [r0, :64]!
|
|
|
|
|
|
+ vst1.8 {q0}, [r0]!
|
|
subs r4, r4, #1
|
|
subs r4, r4, #1
|
|
bne .Lecbdecloop
|
|
bne .Lecbdecloop
|
|
.Lecbdecout:
|
|
.Lecbdecout:
|
|
@@ -226,10 +226,10 @@ ENTRY(ce_aes_cbc_encrypt)
|
|
vld1.8 {q0}, [r5]
|
|
vld1.8 {q0}, [r5]
|
|
prepare_key r2, r3
|
|
prepare_key r2, r3
|
|
.Lcbcencloop:
|
|
.Lcbcencloop:
|
|
- vld1.8 {q1}, [r1, :64]! @ get next pt block
|
|
|
|
|
|
+ vld1.8 {q1}, [r1]! @ get next pt block
|
|
veor q0, q0, q1 @ ..and xor with iv
|
|
veor q0, q0, q1 @ ..and xor with iv
|
|
bl aes_encrypt
|
|
bl aes_encrypt
|
|
- vst1.8 {q0}, [r0, :64]!
|
|
|
|
|
|
+ vst1.8 {q0}, [r0]!
|
|
subs r4, r4, #1
|
|
subs r4, r4, #1
|
|
bne .Lcbcencloop
|
|
bne .Lcbcencloop
|
|
vst1.8 {q0}, [r5]
|
|
vst1.8 {q0}, [r5]
|
|
@@ -244,8 +244,8 @@ ENTRY(ce_aes_cbc_decrypt)
|
|
.Lcbcdecloop3x:
|
|
.Lcbcdecloop3x:
|
|
subs r4, r4, #3
|
|
subs r4, r4, #3
|
|
bmi .Lcbcdec1x
|
|
bmi .Lcbcdec1x
|
|
- vld1.8 {q0-q1}, [r1, :64]!
|
|
|
|
- vld1.8 {q2}, [r1, :64]!
|
|
|
|
|
|
+ vld1.8 {q0-q1}, [r1]!
|
|
|
|
+ vld1.8 {q2}, [r1]!
|
|
vmov q3, q0
|
|
vmov q3, q0
|
|
vmov q4, q1
|
|
vmov q4, q1
|
|
vmov q5, q2
|
|
vmov q5, q2
|
|
@@ -254,19 +254,19 @@ ENTRY(ce_aes_cbc_decrypt)
|
|
veor q1, q1, q3
|
|
veor q1, q1, q3
|
|
veor q2, q2, q4
|
|
veor q2, q2, q4
|
|
vmov q6, q5
|
|
vmov q6, q5
|
|
- vst1.8 {q0-q1}, [r0, :64]!
|
|
|
|
- vst1.8 {q2}, [r0, :64]!
|
|
|
|
|
|
+ vst1.8 {q0-q1}, [r0]!
|
|
|
|
+ vst1.8 {q2}, [r0]!
|
|
b .Lcbcdecloop3x
|
|
b .Lcbcdecloop3x
|
|
.Lcbcdec1x:
|
|
.Lcbcdec1x:
|
|
adds r4, r4, #3
|
|
adds r4, r4, #3
|
|
beq .Lcbcdecout
|
|
beq .Lcbcdecout
|
|
vmov q15, q14 @ preserve last round key
|
|
vmov q15, q14 @ preserve last round key
|
|
.Lcbcdecloop:
|
|
.Lcbcdecloop:
|
|
- vld1.8 {q0}, [r1, :64]! @ get next ct block
|
|
|
|
|
|
+ vld1.8 {q0}, [r1]! @ get next ct block
|
|
veor q14, q15, q6 @ combine prev ct with last key
|
|
veor q14, q15, q6 @ combine prev ct with last key
|
|
vmov q6, q0
|
|
vmov q6, q0
|
|
bl aes_decrypt
|
|
bl aes_decrypt
|
|
- vst1.8 {q0}, [r0, :64]!
|
|
|
|
|
|
+ vst1.8 {q0}, [r0]!
|
|
subs r4, r4, #1
|
|
subs r4, r4, #1
|
|
bne .Lcbcdecloop
|
|
bne .Lcbcdecloop
|
|
.Lcbcdecout:
|
|
.Lcbcdecout:
|
|
@@ -300,15 +300,15 @@ ENTRY(ce_aes_ctr_encrypt)
|
|
rev ip, r6
|
|
rev ip, r6
|
|
add r6, r6, #1
|
|
add r6, r6, #1
|
|
vmov s11, ip
|
|
vmov s11, ip
|
|
- vld1.8 {q3-q4}, [r1, :64]!
|
|
|
|
- vld1.8 {q5}, [r1, :64]!
|
|
|
|
|
|
+ vld1.8 {q3-q4}, [r1]!
|
|
|
|
+ vld1.8 {q5}, [r1]!
|
|
bl aes_encrypt_3x
|
|
bl aes_encrypt_3x
|
|
veor q0, q0, q3
|
|
veor q0, q0, q3
|
|
veor q1, q1, q4
|
|
veor q1, q1, q4
|
|
veor q2, q2, q5
|
|
veor q2, q2, q5
|
|
rev ip, r6
|
|
rev ip, r6
|
|
- vst1.8 {q0-q1}, [r0, :64]!
|
|
|
|
- vst1.8 {q2}, [r0, :64]!
|
|
|
|
|
|
+ vst1.8 {q0-q1}, [r0]!
|
|
|
|
+ vst1.8 {q2}, [r0]!
|
|
vmov s27, ip
|
|
vmov s27, ip
|
|
b .Lctrloop3x
|
|
b .Lctrloop3x
|
|
.Lctr1x:
|
|
.Lctr1x:
|
|
@@ -318,10 +318,10 @@ ENTRY(ce_aes_ctr_encrypt)
|
|
vmov q0, q6
|
|
vmov q0, q6
|
|
bl aes_encrypt
|
|
bl aes_encrypt
|
|
subs r4, r4, #1
|
|
subs r4, r4, #1
|
|
- bmi .Lctrhalfblock @ blocks < 0 means 1/2 block
|
|
|
|
- vld1.8 {q3}, [r1, :64]!
|
|
|
|
|
|
+ bmi .Lctrtailblock @ blocks < 0 means tail block
|
|
|
|
+ vld1.8 {q3}, [r1]!
|
|
veor q3, q0, q3
|
|
veor q3, q0, q3
|
|
- vst1.8 {q3}, [r0, :64]!
|
|
|
|
|
|
+ vst1.8 {q3}, [r0]!
|
|
|
|
|
|
adds r6, r6, #1 @ increment BE ctr
|
|
adds r6, r6, #1 @ increment BE ctr
|
|
rev ip, r6
|
|
rev ip, r6
|
|
@@ -333,10 +333,8 @@ ENTRY(ce_aes_ctr_encrypt)
|
|
vst1.8 {q6}, [r5]
|
|
vst1.8 {q6}, [r5]
|
|
pop {r4-r6, pc}
|
|
pop {r4-r6, pc}
|
|
|
|
|
|
-.Lctrhalfblock:
|
|
|
|
- vld1.8 {d1}, [r1, :64]
|
|
|
|
- veor d0, d0, d1
|
|
|
|
- vst1.8 {d0}, [r0, :64]
|
|
|
|
|
|
+.Lctrtailblock:
|
|
|
|
+ vst1.8 {q0}, [r0, :64] @ return just the key stream
|
|
pop {r4-r6, pc}
|
|
pop {r4-r6, pc}
|
|
|
|
|
|
.Lctrcarry:
|
|
.Lctrcarry:
|
|
@@ -405,8 +403,8 @@ ENTRY(ce_aes_xts_encrypt)
|
|
.Lxtsenc3x:
|
|
.Lxtsenc3x:
|
|
subs r4, r4, #3
|
|
subs r4, r4, #3
|
|
bmi .Lxtsenc1x
|
|
bmi .Lxtsenc1x
|
|
- vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks
|
|
|
|
- vld1.8 {q2}, [r1, :64]!
|
|
|
|
|
|
+ vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks
|
|
|
|
+ vld1.8 {q2}, [r1]!
|
|
next_tweak q4, q3, q7, q6
|
|
next_tweak q4, q3, q7, q6
|
|
veor q0, q0, q3
|
|
veor q0, q0, q3
|
|
next_tweak q5, q4, q7, q6
|
|
next_tweak q5, q4, q7, q6
|
|
@@ -416,8 +414,8 @@ ENTRY(ce_aes_xts_encrypt)
|
|
veor q0, q0, q3
|
|
veor q0, q0, q3
|
|
veor q1, q1, q4
|
|
veor q1, q1, q4
|
|
veor q2, q2, q5
|
|
veor q2, q2, q5
|
|
- vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks
|
|
|
|
- vst1.8 {q2}, [r0, :64]!
|
|
|
|
|
|
+ vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks
|
|
|
|
+ vst1.8 {q2}, [r0]!
|
|
vmov q3, q5
|
|
vmov q3, q5
|
|
teq r4, #0
|
|
teq r4, #0
|
|
beq .Lxtsencout
|
|
beq .Lxtsencout
|
|
@@ -426,11 +424,11 @@ ENTRY(ce_aes_xts_encrypt)
|
|
adds r4, r4, #3
|
|
adds r4, r4, #3
|
|
beq .Lxtsencout
|
|
beq .Lxtsencout
|
|
.Lxtsencloop:
|
|
.Lxtsencloop:
|
|
- vld1.8 {q0}, [r1, :64]!
|
|
|
|
|
|
+ vld1.8 {q0}, [r1]!
|
|
veor q0, q0, q3
|
|
veor q0, q0, q3
|
|
bl aes_encrypt
|
|
bl aes_encrypt
|
|
veor q0, q0, q3
|
|
veor q0, q0, q3
|
|
- vst1.8 {q0}, [r0, :64]!
|
|
|
|
|
|
+ vst1.8 {q0}, [r0]!
|
|
subs r4, r4, #1
|
|
subs r4, r4, #1
|
|
beq .Lxtsencout
|
|
beq .Lxtsencout
|
|
next_tweak q3, q3, q7, q6
|
|
next_tweak q3, q3, q7, q6
|
|
@@ -456,8 +454,8 @@ ENTRY(ce_aes_xts_decrypt)
|
|
.Lxtsdec3x:
|
|
.Lxtsdec3x:
|
|
subs r4, r4, #3
|
|
subs r4, r4, #3
|
|
bmi .Lxtsdec1x
|
|
bmi .Lxtsdec1x
|
|
- vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks
|
|
|
|
- vld1.8 {q2}, [r1, :64]!
|
|
|
|
|
|
+ vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks
|
|
|
|
+ vld1.8 {q2}, [r1]!
|
|
next_tweak q4, q3, q7, q6
|
|
next_tweak q4, q3, q7, q6
|
|
veor q0, q0, q3
|
|
veor q0, q0, q3
|
|
next_tweak q5, q4, q7, q6
|
|
next_tweak q5, q4, q7, q6
|
|
@@ -467,8 +465,8 @@ ENTRY(ce_aes_xts_decrypt)
|
|
veor q0, q0, q3
|
|
veor q0, q0, q3
|
|
veor q1, q1, q4
|
|
veor q1, q1, q4
|
|
veor q2, q2, q5
|
|
veor q2, q2, q5
|
|
- vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks
|
|
|
|
- vst1.8 {q2}, [r0, :64]!
|
|
|
|
|
|
+ vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks
|
|
|
|
+ vst1.8 {q2}, [r0]!
|
|
vmov q3, q5
|
|
vmov q3, q5
|
|
teq r4, #0
|
|
teq r4, #0
|
|
beq .Lxtsdecout
|
|
beq .Lxtsdecout
|
|
@@ -477,12 +475,12 @@ ENTRY(ce_aes_xts_decrypt)
|
|
adds r4, r4, #3
|
|
adds r4, r4, #3
|
|
beq .Lxtsdecout
|
|
beq .Lxtsdecout
|
|
.Lxtsdecloop:
|
|
.Lxtsdecloop:
|
|
- vld1.8 {q0}, [r1, :64]!
|
|
|
|
|
|
+ vld1.8 {q0}, [r1]!
|
|
veor q0, q0, q3
|
|
veor q0, q0, q3
|
|
add ip, r2, #32 @ 3rd round key
|
|
add ip, r2, #32 @ 3rd round key
|
|
bl aes_decrypt
|
|
bl aes_decrypt
|
|
veor q0, q0, q3
|
|
veor q0, q0, q3
|
|
- vst1.8 {q0}, [r0, :64]!
|
|
|
|
|
|
+ vst1.8 {q0}, [r0]!
|
|
subs r4, r4, #1
|
|
subs r4, r4, #1
|
|
beq .Lxtsdecout
|
|
beq .Lxtsdecout
|
|
next_tweak q3, q3, q7, q6
|
|
next_tweak q3, q3, q7, q6
|