sha1-ce-core.S 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. /*
  2. * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
  3. *
  4. * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. */
  10. #include <linux/linkage.h>
  11. #include <asm/assembler.h>
  12. .text
  13. .arch armv8-a+crypto
  14. k0 .req v0
  15. k1 .req v1
  16. k2 .req v2
  17. k3 .req v3
  18. t0 .req v4
  19. t1 .req v5
  20. dga .req q6
  21. dgav .req v6
  22. dgb .req s7
  23. dgbv .req v7
  24. dg0q .req q12
  25. dg0s .req s12
  26. dg0v .req v12
  27. dg1s .req s13
  28. dg1v .req v13
  29. dg2s .req s14
  30. .macro add_only, op, ev, rc, s0, dg1
  31. .ifc \ev, ev
  32. add t1.4s, v\s0\().4s, \rc\().4s
  33. sha1h dg2s, dg0s
  34. .ifnb \dg1
  35. sha1\op dg0q, \dg1, t0.4s
  36. .else
  37. sha1\op dg0q, dg1s, t0.4s
  38. .endif
  39. .else
  40. .ifnb \s0
  41. add t0.4s, v\s0\().4s, \rc\().4s
  42. .endif
  43. sha1h dg1s, dg0s
  44. sha1\op dg0q, dg2s, t1.4s
  45. .endif
  46. .endm
  47. .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
  48. sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
  49. add_only \op, \ev, \rc, \s1, \dg1
  50. sha1su1 v\s0\().4s, v\s3\().4s
  51. .endm
  52. /*
  53. * The SHA1 round constants
  54. */
  55. .align 4
  56. .Lsha1_rcon:
  57. .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
  58. /*
  59. * void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
  60. * u8 *head, long bytes)
  61. */
  62. ENTRY(sha1_ce_transform)
  63. /* load round constants */
  64. adr x6, .Lsha1_rcon
  65. ld1r {k0.4s}, [x6], #4
  66. ld1r {k1.4s}, [x6], #4
  67. ld1r {k2.4s}, [x6], #4
  68. ld1r {k3.4s}, [x6]
  69. /* load state */
  70. ldr dga, [x2]
  71. ldr dgb, [x2, #16]
  72. /* load partial state (if supplied) */
  73. cbz x3, 0f
  74. ld1 {v8.4s-v11.4s}, [x3]
  75. b 1f
  76. /* load input */
  77. 0: ld1 {v8.4s-v11.4s}, [x1], #64
  78. sub w0, w0, #1
  79. 1:
  80. CPU_LE( rev32 v8.16b, v8.16b )
  81. CPU_LE( rev32 v9.16b, v9.16b )
  82. CPU_LE( rev32 v10.16b, v10.16b )
  83. CPU_LE( rev32 v11.16b, v11.16b )
  84. 2: add t0.4s, v8.4s, k0.4s
  85. mov dg0v.16b, dgav.16b
  86. add_update c, ev, k0, 8, 9, 10, 11, dgb
  87. add_update c, od, k0, 9, 10, 11, 8
  88. add_update c, ev, k0, 10, 11, 8, 9
  89. add_update c, od, k0, 11, 8, 9, 10
  90. add_update c, ev, k1, 8, 9, 10, 11
  91. add_update p, od, k1, 9, 10, 11, 8
  92. add_update p, ev, k1, 10, 11, 8, 9
  93. add_update p, od, k1, 11, 8, 9, 10
  94. add_update p, ev, k1, 8, 9, 10, 11
  95. add_update p, od, k2, 9, 10, 11, 8
  96. add_update m, ev, k2, 10, 11, 8, 9
  97. add_update m, od, k2, 11, 8, 9, 10
  98. add_update m, ev, k2, 8, 9, 10, 11
  99. add_update m, od, k2, 9, 10, 11, 8
  100. add_update m, ev, k3, 10, 11, 8, 9
  101. add_update p, od, k3, 11, 8, 9, 10
  102. add_only p, ev, k3, 9
  103. add_only p, od, k3, 10
  104. add_only p, ev, k3, 11
  105. add_only p, od
  106. /* update state */
  107. add dgbv.2s, dgbv.2s, dg1v.2s
  108. add dgav.4s, dgav.4s, dg0v.4s
  109. cbnz w0, 0b
  110. /*
  111. * Final block: add padding and total bit count.
  112. * Skip if we have no total byte count in x4. In that case, the input
  113. * size was not a round multiple of the block size, and the padding is
  114. * handled by the C code.
  115. */
  116. cbz x4, 3f
  117. movi v9.2d, #0
  118. mov x8, #0x80000000
  119. movi v10.2d, #0
  120. ror x7, x4, #29 // ror(lsl(x4, 3), 32)
  121. fmov d8, x8
  122. mov x4, #0
  123. mov v11.d[0], xzr
  124. mov v11.d[1], x7
  125. b 2b
  126. /* store new state */
  127. 3: str dga, [x2]
  128. str dgb, [x2, #16]
  129. ret
  130. ENDPROC(sha1_ce_transform)