|
@@ -75,3 +75,151 @@
|
|
|
ldr w\tmpnr, [\state, #16 * 2 + 4]
|
|
|
fpsimd_restore_fpcr x\tmpnr, \state
|
|
|
.endm
|
|
|
+
|
|
|
+/* Sanity-check macros to help avoid encoding garbage instructions */
|
|
|
+
|
|
|
+.macro _check_general_reg nr
|
|
|
+ .if (\nr) < 0 || (\nr) > 30
|
|
|
+ .error "Bad register number \nr."
|
|
|
+ .endif
|
|
|
+.endm
|
|
|
+
|
|
|
+.macro _sve_check_zreg znr
|
|
|
+ .if (\znr) < 0 || (\znr) > 31
|
|
|
+ .error "Bad Scalable Vector Extension vector register number \znr."
|
|
|
+ .endif
|
|
|
+.endm
|
|
|
+
|
|
|
+.macro _sve_check_preg pnr
|
|
|
+ .if (\pnr) < 0 || (\pnr) > 15
|
|
|
+ .error "Bad Scalable Vector Extension predicate register number \pnr."
|
|
|
+ .endif
|
|
|
+.endm
|
|
|
+
|
|
|
+.macro _check_num n, min, max
|
|
|
+ .if (\n) < (\min) || (\n) > (\max)
|
|
|
+ .error "Number \n out of range [\min,\max]"
|
|
|
+ .endif
|
|
|
+.endm
|
|
|
+
|
|
|
+/* SVE instruction encodings for non-SVE-capable assemblers */
|
|
|
+
|
|
|
+/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
|
|
|
+.macro _sve_str_v nz, nxbase, offset=0
|
|
|
+ _sve_check_zreg \nz
|
|
|
+ _check_general_reg \nxbase
|
|
|
+ _check_num (\offset), -0x100, 0xff
|
|
|
+ .inst 0xe5804000 \
|
|
|
+ | (\nz) \
|
|
|
+ | ((\nxbase) << 5) \
|
|
|
+ | (((\offset) & 7) << 10) \
|
|
|
+ | (((\offset) & 0x1f8) << 13)
|
|
|
+.endm
|
|
|
+
|
|
|
+/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
|
|
|
+.macro _sve_ldr_v nz, nxbase, offset=0
|
|
|
+ _sve_check_zreg \nz
|
|
|
+ _check_general_reg \nxbase
|
|
|
+ _check_num (\offset), -0x100, 0xff
|
|
|
+ .inst 0x85804000 \
|
|
|
+ | (\nz) \
|
|
|
+ | ((\nxbase) << 5) \
|
|
|
+ | (((\offset) & 7) << 10) \
|
|
|
+ | (((\offset) & 0x1f8) << 13)
|
|
|
+.endm
|
|
|
+
|
|
|
+/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
|
|
|
+.macro _sve_str_p np, nxbase, offset=0
|
|
|
+ _sve_check_preg \np
|
|
|
+ _check_general_reg \nxbase
|
|
|
+ _check_num (\offset), -0x100, 0xff
|
|
|
+ .inst 0xe5800000 \
|
|
|
+ | (\np) \
|
|
|
+ | ((\nxbase) << 5) \
|
|
|
+ | (((\offset) & 7) << 10) \
|
|
|
+ | (((\offset) & 0x1f8) << 13)
|
|
|
+.endm
|
|
|
+
|
|
|
+/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
|
|
|
+.macro _sve_ldr_p np, nxbase, offset=0
|
|
|
+ _sve_check_preg \np
|
|
|
+ _check_general_reg \nxbase
|
|
|
+ _check_num (\offset), -0x100, 0xff
|
|
|
+ .inst 0x85800000 \
|
|
|
+ | (\np) \
|
|
|
+ | ((\nxbase) << 5) \
|
|
|
+ | (((\offset) & 7) << 10) \
|
|
|
+ | (((\offset) & 0x1f8) << 13)
|
|
|
+.endm
|
|
|
+
|
|
|
+/* RDVL X\nx, #\imm */
|
|
|
+.macro _sve_rdvl nx, imm
|
|
|
+ _check_general_reg \nx
|
|
|
+ _check_num (\imm), -0x20, 0x1f
|
|
|
+ .inst 0x04bf5000 \
|
|
|
+ | (\nx) \
|
|
|
+ | (((\imm) & 0x3f) << 5)
|
|
|
+.endm
|
|
|
+
|
|
|
+/* RDFFR (unpredicated): RDFFR P\np.B */
|
|
|
+.macro _sve_rdffr np
|
|
|
+ _sve_check_preg \np
|
|
|
+ .inst 0x2519f000 \
|
|
|
+ | (\np)
|
|
|
+.endm
|
|
|
+
|
|
|
+/* WRFFR P\np.B */
|
|
|
+.macro _sve_wrffr np
|
|
|
+ _sve_check_preg \np
|
|
|
+ .inst 0x25289000 \
|
|
|
+ | ((\np) << 5)
|
|
|
+.endm
|
|
|
+
|
|
|
+.macro __for from:req, to:req
|
|
|
+ .if (\from) == (\to)
|
|
|
+ _for__body \from
|
|
|
+ .else
|
|
|
+ __for \from, (\from) + ((\to) - (\from)) / 2
|
|
|
+ __for (\from) + ((\to) - (\from)) / 2 + 1, \to
|
|
|
+ .endif
|
|
|
+.endm
|
|
|
+
|
|
|
+.macro _for var:req, from:req, to:req, insn:vararg
|
|
|
+ .macro _for__body \var:req
|
|
|
+ \insn
|
|
|
+ .endm
|
|
|
+
|
|
|
+ __for \from, \to
|
|
|
+
|
|
|
+ .purgem _for__body
|
|
|
+.endm
|
|
|
+
|
|
|
+.macro sve_save nxbase, xpfpsr, nxtmp
|
|
|
+ _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34
|
|
|
+ _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16
|
|
|
+ _sve_rdffr 0
|
|
|
+ _sve_str_p 0, \nxbase
|
|
|
+ _sve_ldr_p 0, \nxbase, -16
|
|
|
+
|
|
|
+ mrs x\nxtmp, fpsr
|
|
|
+ str w\nxtmp, [\xpfpsr]
|
|
|
+ mrs x\nxtmp, fpcr
|
|
|
+ str w\nxtmp, [\xpfpsr, #4]
|
|
|
+.endm
|
|
|
+
|
|
|
+.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp
|
|
|
+ mrs_s x\nxtmp, SYS_ZCR_EL1
|
|
|
+ bic x\nxtmp, x\nxtmp, ZCR_ELx_LEN_MASK
|
|
|
+ orr x\nxtmp, x\nxtmp, \xvqminus1
|
|
|
+ msr_s SYS_ZCR_EL1, x\nxtmp // self-synchronising
|
|
|
+
|
|
|
+ _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
|
|
|
+ _sve_ldr_p 0, \nxbase
|
|
|
+ _sve_wrffr 0
|
|
|
+ _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16
|
|
|
+
|
|
|
+ ldr w\nxtmp, [\xpfpsr]
|
|
|
+ msr fpsr, x\nxtmp
|
|
|
+ ldr w\nxtmp, [\xpfpsr, #4]
|
|
|
+ msr fpcr, x\nxtmp
|
|
|
+.endm
|