fpu_asm.S 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. /*
  2. * Copyright 2015, Cyril Bur, IBM Corp.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation; either version
  7. * 2 of the License, or (at your option) any later version.
  8. */
  9. #include "../basic_asm.h"
  10. #define PUSH_FPU(pos) \
  11. stfd f14,pos(sp); \
  12. stfd f15,pos+8(sp); \
  13. stfd f16,pos+16(sp); \
  14. stfd f17,pos+24(sp); \
  15. stfd f18,pos+32(sp); \
  16. stfd f19,pos+40(sp); \
  17. stfd f20,pos+48(sp); \
  18. stfd f21,pos+56(sp); \
  19. stfd f22,pos+64(sp); \
  20. stfd f23,pos+72(sp); \
  21. stfd f24,pos+80(sp); \
  22. stfd f25,pos+88(sp); \
  23. stfd f26,pos+96(sp); \
  24. stfd f27,pos+104(sp); \
  25. stfd f28,pos+112(sp); \
  26. stfd f29,pos+120(sp); \
  27. stfd f30,pos+128(sp); \
  28. stfd f31,pos+136(sp);
  29. #define POP_FPU(pos) \
  30. lfd f14,pos(sp); \
  31. lfd f15,pos+8(sp); \
  32. lfd f16,pos+16(sp); \
  33. lfd f17,pos+24(sp); \
  34. lfd f18,pos+32(sp); \
  35. lfd f19,pos+40(sp); \
  36. lfd f20,pos+48(sp); \
  37. lfd f21,pos+56(sp); \
  38. lfd f22,pos+64(sp); \
  39. lfd f23,pos+72(sp); \
  40. lfd f24,pos+80(sp); \
  41. lfd f25,pos+88(sp); \
  42. lfd f26,pos+96(sp); \
  43. lfd f27,pos+104(sp); \
  44. lfd f28,pos+112(sp); \
  45. lfd f29,pos+120(sp); \
  46. lfd f30,pos+128(sp); \
  47. lfd f31,pos+136(sp);
  48. # Careful calling this, it will 'clobber' fpu (by design)
  49. # Don't call this from C
  50. FUNC_START(load_fpu)
  51. lfd f14,0(r3)
  52. lfd f15,8(r3)
  53. lfd f16,16(r3)
  54. lfd f17,24(r3)
  55. lfd f18,32(r3)
  56. lfd f19,40(r3)
  57. lfd f20,48(r3)
  58. lfd f21,56(r3)
  59. lfd f22,64(r3)
  60. lfd f23,72(r3)
  61. lfd f24,80(r3)
  62. lfd f25,88(r3)
  63. lfd f26,96(r3)
  64. lfd f27,104(r3)
  65. lfd f28,112(r3)
  66. lfd f29,120(r3)
  67. lfd f30,128(r3)
  68. lfd f31,136(r3)
  69. blr
  70. FUNC_END(load_fpu)
  71. FUNC_START(check_fpu)
  72. mr r4,r3
  73. li r3,1 # assume a bad result
  74. lfd f0,0(r4)
  75. fcmpu cr1,f0,f14
  76. bne cr1,1f
  77. lfd f0,8(r4)
  78. fcmpu cr1,f0,f15
  79. bne cr1,1f
  80. lfd f0,16(r4)
  81. fcmpu cr1,f0,f16
  82. bne cr1,1f
  83. lfd f0,24(r4)
  84. fcmpu cr1,f0,f17
  85. bne cr1,1f
  86. lfd f0,32(r4)
  87. fcmpu cr1,f0,f18
  88. bne cr1,1f
  89. lfd f0,40(r4)
  90. fcmpu cr1,f0,f19
  91. bne cr1,1f
  92. lfd f0,48(r4)
  93. fcmpu cr1,f0,f20
  94. bne cr1,1f
  95. lfd f0,56(r4)
  96. fcmpu cr1,f0,f21
  97. bne cr1,1f
  98. lfd f0,64(r4)
  99. fcmpu cr1,f0,f22
  100. bne cr1,1f
  101. lfd f0,72(r4)
  102. fcmpu cr1,f0,f23
  103. bne cr1,1f
  104. lfd f0,80(r4)
  105. fcmpu cr1,f0,f24
  106. bne cr1,1f
  107. lfd f0,88(r4)
  108. fcmpu cr1,f0,f25
  109. bne cr1,1f
  110. lfd f0,96(r4)
  111. fcmpu cr1,f0,f26
  112. bne cr1,1f
  113. lfd f0,104(r4)
  114. fcmpu cr1,f0,f27
  115. bne cr1,1f
  116. lfd f0,112(r4)
  117. fcmpu cr1,f0,f28
  118. bne cr1,1f
  119. lfd f0,120(r4)
  120. fcmpu cr1,f0,f29
  121. bne cr1,1f
  122. lfd f0,128(r4)
  123. fcmpu cr1,f0,f30
  124. bne cr1,1f
  125. lfd f0,136(r4)
  126. fcmpu cr1,f0,f31
  127. bne cr1,1f
  128. li r3,0 # Success!!!
  129. 1: blr
  130. FUNC_START(test_fpu)
  131. # r3 holds pointer to where to put the result of fork
  132. # r4 holds pointer to the pid
  133. # f14-f31 are non volatiles
  134. PUSH_BASIC_STACK(256)
  135. std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray
  136. std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid
  137. PUSH_FPU(STACK_FRAME_LOCAL(2,0))
  138. bl load_fpu
  139. nop
  140. li r0,__NR_fork
  141. sc
  142. # pass the result of the fork to the caller
  143. ld r9,STACK_FRAME_PARAM(1)(sp)
  144. std r3,0(r9)
  145. ld r3,STACK_FRAME_PARAM(0)(sp)
  146. bl check_fpu
  147. nop
  148. POP_FPU(STACK_FRAME_LOCAL(2,0))
  149. POP_BASIC_STACK(256)
  150. blr
  151. FUNC_END(test_fpu)
  152. # int preempt_fpu(double *darray, int *threads_running, int *running)
  153. # On starting will (atomically) decrement not_ready as a signal that the FPU
  154. # has been loaded with darray. Will proceed to check the validity of the FPU
  155. # registers while running is not zero.
  156. FUNC_START(preempt_fpu)
  157. PUSH_BASIC_STACK(256)
  158. std r3,STACK_FRAME_PARAM(0)(sp) # double *darray
  159. std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
  160. std r5,STACK_FRAME_PARAM(2)(sp) # int *running
  161. PUSH_FPU(STACK_FRAME_LOCAL(3,0))
  162. bl load_fpu
  163. nop
  164. sync
  165. # Atomic DEC
  166. ld r3,STACK_FRAME_PARAM(1)(sp)
  167. 1: lwarx r4,0,r3
  168. addi r4,r4,-1
  169. stwcx. r4,0,r3
  170. bne- 1b
  171. 2: ld r3,STACK_FRAME_PARAM(0)(sp)
  172. bl check_fpu
  173. nop
  174. cmpdi r3,0
  175. bne 3f
  176. ld r4,STACK_FRAME_PARAM(2)(sp)
  177. ld r5,0(r4)
  178. cmpwi r5,0
  179. bne 2b
  180. 3: POP_FPU(STACK_FRAME_LOCAL(3,0))
  181. POP_BASIC_STACK(256)
  182. blr
  183. FUNC_END(preempt_fpu)