fpsimdmacros.h 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. /*
  2. * FP/SIMD state saving and restoring macros
  3. *
  4. * Copyright (C) 2012 ARM Ltd.
  5. * Author: Catalin Marinas <catalin.marinas@arm.com>
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License version 2 as
  9. * published by the Free Software Foundation.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. .macro fpsimd_save state, tmpnr
  20. stp q0, q1, [\state, #16 * 0]
  21. stp q2, q3, [\state, #16 * 2]
  22. stp q4, q5, [\state, #16 * 4]
  23. stp q6, q7, [\state, #16 * 6]
  24. stp q8, q9, [\state, #16 * 8]
  25. stp q10, q11, [\state, #16 * 10]
  26. stp q12, q13, [\state, #16 * 12]
  27. stp q14, q15, [\state, #16 * 14]
  28. stp q16, q17, [\state, #16 * 16]
  29. stp q18, q19, [\state, #16 * 18]
  30. stp q20, q21, [\state, #16 * 20]
  31. stp q22, q23, [\state, #16 * 22]
  32. stp q24, q25, [\state, #16 * 24]
  33. stp q26, q27, [\state, #16 * 26]
  34. stp q28, q29, [\state, #16 * 28]
  35. stp q30, q31, [\state, #16 * 30]!
  36. mrs x\tmpnr, fpsr
  37. str w\tmpnr, [\state, #16 * 2]
  38. mrs x\tmpnr, fpcr
  39. str w\tmpnr, [\state, #16 * 2 + 4]
  40. .endm
  41. .macro fpsimd_restore_fpcr state, tmp
  42. /*
  43. * Writes to fpcr may be self-synchronising, so avoid restoring
  44. * the register if it hasn't changed.
  45. */
  46. mrs \tmp, fpcr
  47. cmp \tmp, \state
  48. b.eq 9999f
  49. msr fpcr, \state
  50. 9999:
  51. .endm
  52. /* Clobbers \state */
  53. .macro fpsimd_restore state, tmpnr
  54. ldp q0, q1, [\state, #16 * 0]
  55. ldp q2, q3, [\state, #16 * 2]
  56. ldp q4, q5, [\state, #16 * 4]
  57. ldp q6, q7, [\state, #16 * 6]
  58. ldp q8, q9, [\state, #16 * 8]
  59. ldp q10, q11, [\state, #16 * 10]
  60. ldp q12, q13, [\state, #16 * 12]
  61. ldp q14, q15, [\state, #16 * 14]
  62. ldp q16, q17, [\state, #16 * 16]
  63. ldp q18, q19, [\state, #16 * 18]
  64. ldp q20, q21, [\state, #16 * 20]
  65. ldp q22, q23, [\state, #16 * 22]
  66. ldp q24, q25, [\state, #16 * 24]
  67. ldp q26, q27, [\state, #16 * 26]
  68. ldp q28, q29, [\state, #16 * 28]
  69. ldp q30, q31, [\state, #16 * 30]!
  70. ldr w\tmpnr, [\state, #16 * 2]
  71. msr fpsr, x\tmpnr
  72. ldr w\tmpnr, [\state, #16 * 2 + 4]
  73. fpsimd_restore_fpcr x\tmpnr, \state
  74. .endm
  75. /* Sanity-check macros to help avoid encoding garbage instructions */
  76. .macro _check_general_reg nr
  77. .if (\nr) < 0 || (\nr) > 30
  78. .error "Bad register number \nr."
  79. .endif
  80. .endm
  81. .macro _sve_check_zreg znr
  82. .if (\znr) < 0 || (\znr) > 31
  83. .error "Bad Scalable Vector Extension vector register number \znr."
  84. .endif
  85. .endm
  86. .macro _sve_check_preg pnr
  87. .if (\pnr) < 0 || (\pnr) > 15
  88. .error "Bad Scalable Vector Extension predicate register number \pnr."
  89. .endif
  90. .endm
  91. .macro _check_num n, min, max
  92. .if (\n) < (\min) || (\n) > (\max)
  93. .error "Number \n out of range [\min,\max]"
  94. .endif
  95. .endm
  96. /* SVE instruction encodings for non-SVE-capable assemblers */
  97. /* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
  98. .macro _sve_str_v nz, nxbase, offset=0
  99. _sve_check_zreg \nz
  100. _check_general_reg \nxbase
  101. _check_num (\offset), -0x100, 0xff
  102. .inst 0xe5804000 \
  103. | (\nz) \
  104. | ((\nxbase) << 5) \
  105. | (((\offset) & 7) << 10) \
  106. | (((\offset) & 0x1f8) << 13)
  107. .endm
  108. /* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
  109. .macro _sve_ldr_v nz, nxbase, offset=0
  110. _sve_check_zreg \nz
  111. _check_general_reg \nxbase
  112. _check_num (\offset), -0x100, 0xff
  113. .inst 0x85804000 \
  114. | (\nz) \
  115. | ((\nxbase) << 5) \
  116. | (((\offset) & 7) << 10) \
  117. | (((\offset) & 0x1f8) << 13)
  118. .endm
  119. /* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
  120. .macro _sve_str_p np, nxbase, offset=0
  121. _sve_check_preg \np
  122. _check_general_reg \nxbase
  123. _check_num (\offset), -0x100, 0xff
  124. .inst 0xe5800000 \
  125. | (\np) \
  126. | ((\nxbase) << 5) \
  127. | (((\offset) & 7) << 10) \
  128. | (((\offset) & 0x1f8) << 13)
  129. .endm
  130. /* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
  131. .macro _sve_ldr_p np, nxbase, offset=0
  132. _sve_check_preg \np
  133. _check_general_reg \nxbase
  134. _check_num (\offset), -0x100, 0xff
  135. .inst 0x85800000 \
  136. | (\np) \
  137. | ((\nxbase) << 5) \
  138. | (((\offset) & 7) << 10) \
  139. | (((\offset) & 0x1f8) << 13)
  140. .endm
  141. /* RDVL X\nx, #\imm */
  142. .macro _sve_rdvl nx, imm
  143. _check_general_reg \nx
  144. _check_num (\imm), -0x20, 0x1f
  145. .inst 0x04bf5000 \
  146. | (\nx) \
  147. | (((\imm) & 0x3f) << 5)
  148. .endm
  149. /* RDFFR (unpredicated): RDFFR P\np.B */
  150. .macro _sve_rdffr np
  151. _sve_check_preg \np
  152. .inst 0x2519f000 \
  153. | (\np)
  154. .endm
  155. /* WRFFR P\np.B */
  156. .macro _sve_wrffr np
  157. _sve_check_preg \np
  158. .inst 0x25289000 \
  159. | ((\np) << 5)
  160. .endm
  161. .macro __for from:req, to:req
  162. .if (\from) == (\to)
  163. _for__body \from
  164. .else
  165. __for \from, (\from) + ((\to) - (\from)) / 2
  166. __for (\from) + ((\to) - (\from)) / 2 + 1, \to
  167. .endif
  168. .endm
  169. .macro _for var:req, from:req, to:req, insn:vararg
  170. .macro _for__body \var:req
  171. \insn
  172. .endm
  173. __for \from, \to
  174. .purgem _for__body
  175. .endm
  176. .macro sve_save nxbase, xpfpsr, nxtmp
  177. _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34
  178. _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16
  179. _sve_rdffr 0
  180. _sve_str_p 0, \nxbase
  181. _sve_ldr_p 0, \nxbase, -16
  182. mrs x\nxtmp, fpsr
  183. str w\nxtmp, [\xpfpsr]
  184. mrs x\nxtmp, fpcr
  185. str w\nxtmp, [\xpfpsr, #4]
  186. .endm
  187. .macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
  188. mrs_s x\nxtmp, SYS_ZCR_EL1
  189. bic \xtmp2, x\nxtmp, ZCR_ELx_LEN_MASK
  190. orr \xtmp2, \xtmp2, \xvqminus1
  191. cmp \xtmp2, x\nxtmp
  192. b.eq 921f
  193. msr_s SYS_ZCR_EL1, \xtmp2 // self-synchronising
  194. 921:
  195. _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
  196. _sve_ldr_p 0, \nxbase
  197. _sve_wrffr 0
  198. _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16
  199. ldr w\nxtmp, [\xpfpsr]
  200. msr fpsr, x\nxtmp
  201. ldr w\nxtmp, [\xpfpsr, #4]
  202. msr fpcr, x\nxtmp
  203. .endm