vecemu.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Routines to emulate some Altivec/VMX instructions, specifically
  4. * those that can trap when given denormalized operands in Java mode.
  5. */
  6. #include <linux/kernel.h>
  7. #include <linux/errno.h>
  8. #include <linux/sched.h>
  9. #include <asm/ptrace.h>
  10. #include <asm/processor.h>
  11. #include <linux/uaccess.h>
  12. /* Functions in vector.S */
  13. extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
  14. extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
  15. extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  16. extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  17. extern void vrefp(vector128 *dst, vector128 *src);
  18. extern void vrsqrtefp(vector128 *dst, vector128 *src);
  19. extern void vexptep(vector128 *dst, vector128 *src);
  20. static unsigned int exp2s[8] = {
  21. 0x800000,
  22. 0x8b95c2,
  23. 0x9837f0,
  24. 0xa5fed7,
  25. 0xb504f3,
  26. 0xc5672a,
  27. 0xd744fd,
  28. 0xeac0c7
  29. };
  30. /*
  31. * Computes an estimate of 2^x. The `s' argument is the 32-bit
  32. * single-precision floating-point representation of x.
  33. */
  34. static unsigned int eexp2(unsigned int s)
  35. {
  36. int exp, pwr;
  37. unsigned int mant, frac;
  38. /* extract exponent field from input */
  39. exp = ((s >> 23) & 0xff) - 127;
  40. if (exp > 7) {
  41. /* check for NaN input */
  42. if (exp == 128 && (s & 0x7fffff) != 0)
  43. return s | 0x400000; /* return QNaN */
  44. /* 2^-big = 0, 2^+big = +Inf */
  45. return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
  46. }
  47. if (exp < -23)
  48. return 0x3f800000; /* 1.0 */
  49. /* convert to fixed point integer in 9.23 representation */
  50. pwr = (s & 0x7fffff) | 0x800000;
  51. if (exp > 0)
  52. pwr <<= exp;
  53. else
  54. pwr >>= -exp;
  55. if (s & 0x80000000)
  56. pwr = -pwr;
  57. /* extract integer part, which becomes exponent part of result */
  58. exp = (pwr >> 23) + 126;
  59. if (exp >= 254)
  60. return 0x7f800000;
  61. if (exp < -23)
  62. return 0;
  63. /* table lookup on top 3 bits of fraction to get mantissa */
  64. mant = exp2s[(pwr >> 20) & 7];
  65. /* linear interpolation using remaining 20 bits of fraction */
  66. asm("mulhwu %0,%1,%2" : "=r" (frac)
  67. : "r" (pwr << 12), "r" (0x172b83ff));
  68. asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
  69. mant += frac;
  70. if (exp >= 0)
  71. return mant + (exp << 23);
  72. /* denormalized result */
  73. exp = -exp;
  74. mant += 1 << (exp - 1);
  75. return mant >> exp;
  76. }
  77. /*
  78. * Computes an estimate of log_2(x). The `s' argument is the 32-bit
  79. * single-precision floating-point representation of x.
  80. */
  81. static unsigned int elog2(unsigned int s)
  82. {
  83. int exp, mant, lz, frac;
  84. exp = s & 0x7f800000;
  85. mant = s & 0x7fffff;
  86. if (exp == 0x7f800000) { /* Inf or NaN */
  87. if (mant != 0)
  88. s |= 0x400000; /* turn NaN into QNaN */
  89. return s;
  90. }
  91. if ((exp | mant) == 0) /* +0 or -0 */
  92. return 0xff800000; /* return -Inf */
  93. if (exp == 0) {
  94. /* denormalized */
  95. asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
  96. mant <<= lz - 8;
  97. exp = (-118 - lz) << 23;
  98. } else {
  99. mant |= 0x800000;
  100. exp -= 127 << 23;
  101. }
  102. if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */
  103. exp |= 0x400000; /* 0.5 * 2^23 */
  104. asm("mulhwu %0,%1,%2" : "=r" (mant)
  105. : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */
  106. }
  107. if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */
  108. exp |= 0x200000; /* 0.25 * 2^23 */
  109. asm("mulhwu %0,%1,%2" : "=r" (mant)
  110. : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */
  111. }
  112. if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */
  113. exp |= 0x100000; /* 0.125 * 2^23 */
  114. asm("mulhwu %0,%1,%2" : "=r" (mant)
  115. : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */
  116. }
  117. if (mant > 0x800000) { /* 1.0 * 2^23 */
  118. /* calculate (mant - 1) * 1.381097463 */
  119. /* 1.381097463 == 0.125 / (2^0.125 - 1) */
  120. asm("mulhwu %0,%1,%2" : "=r" (frac)
  121. : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
  122. exp += frac;
  123. }
  124. s = exp & 0x80000000;
  125. if (exp != 0) {
  126. if (s)
  127. exp = -exp;
  128. asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
  129. lz = 8 - lz;
  130. if (lz > 0)
  131. exp >>= lz;
  132. else if (lz < 0)
  133. exp <<= -lz;
  134. s += ((lz + 126) << 23) + exp;
  135. }
  136. return s;
  137. }
  138. #define VSCR_SAT 1
  139. static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
  140. {
  141. int exp, mant;
  142. exp = (x >> 23) & 0xff;
  143. mant = x & 0x7fffff;
  144. if (exp == 255 && mant != 0)
  145. return 0; /* NaN -> 0 */
  146. exp = exp - 127 + scale;
  147. if (exp < 0)
  148. return 0; /* round towards zero */
  149. if (exp >= 31) {
  150. /* saturate, unless the result would be -2^31 */
  151. if (x + (scale << 23) != 0xcf000000)
  152. *vscrp |= VSCR_SAT;
  153. return (x & 0x80000000)? 0x80000000: 0x7fffffff;
  154. }
  155. mant |= 0x800000;
  156. mant = (mant << 7) >> (30 - exp);
  157. return (x & 0x80000000)? -mant: mant;
  158. }
  159. static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
  160. {
  161. int exp;
  162. unsigned int mant;
  163. exp = (x >> 23) & 0xff;
  164. mant = x & 0x7fffff;
  165. if (exp == 255 && mant != 0)
  166. return 0; /* NaN -> 0 */
  167. exp = exp - 127 + scale;
  168. if (exp < 0)
  169. return 0; /* round towards zero */
  170. if (x & 0x80000000) {
  171. /* negative => saturate to 0 */
  172. *vscrp |= VSCR_SAT;
  173. return 0;
  174. }
  175. if (exp >= 32) {
  176. /* saturate */
  177. *vscrp |= VSCR_SAT;
  178. return 0xffffffff;
  179. }
  180. mant |= 0x800000;
  181. mant = (mant << 8) >> (31 - exp);
  182. return mant;
  183. }
  184. /* Round to floating integer, towards 0 */
  185. static unsigned int rfiz(unsigned int x)
  186. {
  187. int exp;
  188. exp = ((x >> 23) & 0xff) - 127;
  189. if (exp == 128 && (x & 0x7fffff) != 0)
  190. return x | 0x400000; /* NaN -> make it a QNaN */
  191. if (exp >= 23)
  192. return x; /* it's an integer already (or Inf) */
  193. if (exp < 0)
  194. return x & 0x80000000; /* |x| < 1.0 rounds to 0 */
  195. return x & ~(0x7fffff >> exp);
  196. }
  197. /* Round to floating integer, towards +/- Inf */
  198. static unsigned int rfii(unsigned int x)
  199. {
  200. int exp, mask;
  201. exp = ((x >> 23) & 0xff) - 127;
  202. if (exp == 128 && (x & 0x7fffff) != 0)
  203. return x | 0x400000; /* NaN -> make it a QNaN */
  204. if (exp >= 23)
  205. return x; /* it's an integer already (or Inf) */
  206. if ((x & 0x7fffffff) == 0)
  207. return x; /* +/-0 -> +/-0 */
  208. if (exp < 0)
  209. /* 0 < |x| < 1.0 rounds to +/- 1.0 */
  210. return (x & 0x80000000) | 0x3f800000;
  211. mask = 0x7fffff >> exp;
  212. /* mantissa overflows into exponent - that's OK,
  213. it can't overflow into the sign bit */
  214. return (x + mask) & ~mask;
  215. }
  216. /* Round to floating integer, to nearest */
  217. static unsigned int rfin(unsigned int x)
  218. {
  219. int exp, half;
  220. exp = ((x >> 23) & 0xff) - 127;
  221. if (exp == 128 && (x & 0x7fffff) != 0)
  222. return x | 0x400000; /* NaN -> make it a QNaN */
  223. if (exp >= 23)
  224. return x; /* it's an integer already (or Inf) */
  225. if (exp < -1)
  226. return x & 0x80000000; /* |x| < 0.5 -> +/-0 */
  227. if (exp == -1)
  228. /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
  229. return (x & 0x80000000) | 0x3f800000;
  230. half = 0x400000 >> exp;
  231. /* add 0.5 to the magnitude and chop off the fraction bits */
  232. return (x + half) & ~(0x7fffff >> exp);
  233. }
  234. int emulate_altivec(struct pt_regs *regs)
  235. {
  236. unsigned int instr, i;
  237. unsigned int va, vb, vc, vd;
  238. vector128 *vrs;
  239. if (get_user(instr, (unsigned int __user *) regs->nip))
  240. return -EFAULT;
  241. if ((instr >> 26) != 4)
  242. return -EINVAL; /* not an altivec instruction */
  243. vd = (instr >> 21) & 0x1f;
  244. va = (instr >> 16) & 0x1f;
  245. vb = (instr >> 11) & 0x1f;
  246. vc = (instr >> 6) & 0x1f;
  247. vrs = current->thread.vr_state.vr;
  248. switch (instr & 0x3f) {
  249. case 10:
  250. switch (vc) {
  251. case 0: /* vaddfp */
  252. vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
  253. break;
  254. case 1: /* vsubfp */
  255. vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
  256. break;
  257. case 4: /* vrefp */
  258. vrefp(&vrs[vd], &vrs[vb]);
  259. break;
  260. case 5: /* vrsqrtefp */
  261. vrsqrtefp(&vrs[vd], &vrs[vb]);
  262. break;
  263. case 6: /* vexptefp */
  264. for (i = 0; i < 4; ++i)
  265. vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
  266. break;
  267. case 7: /* vlogefp */
  268. for (i = 0; i < 4; ++i)
  269. vrs[vd].u[i] = elog2(vrs[vb].u[i]);
  270. break;
  271. case 8: /* vrfin */
  272. for (i = 0; i < 4; ++i)
  273. vrs[vd].u[i] = rfin(vrs[vb].u[i]);
  274. break;
  275. case 9: /* vrfiz */
  276. for (i = 0; i < 4; ++i)
  277. vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
  278. break;
  279. case 10: /* vrfip */
  280. for (i = 0; i < 4; ++i) {
  281. u32 x = vrs[vb].u[i];
  282. x = (x & 0x80000000)? rfiz(x): rfii(x);
  283. vrs[vd].u[i] = x;
  284. }
  285. break;
  286. case 11: /* vrfim */
  287. for (i = 0; i < 4; ++i) {
  288. u32 x = vrs[vb].u[i];
  289. x = (x & 0x80000000)? rfii(x): rfiz(x);
  290. vrs[vd].u[i] = x;
  291. }
  292. break;
  293. case 14: /* vctuxs */
  294. for (i = 0; i < 4; ++i)
  295. vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
  296. &current->thread.vr_state.vscr.u[3]);
  297. break;
  298. case 15: /* vctsxs */
  299. for (i = 0; i < 4; ++i)
  300. vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
  301. &current->thread.vr_state.vscr.u[3]);
  302. break;
  303. default:
  304. return -EINVAL;
  305. }
  306. break;
  307. case 46: /* vmaddfp */
  308. vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
  309. break;
  310. case 47: /* vnmsubfp */
  311. vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
  312. break;
  313. default:
  314. return -EINVAL;
  315. }
  316. return 0;
  317. }