vmx_asm.S 4.4 KB


  1. /*
  2. * Copyright 2015, Cyril Bur, IBM Corp.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation; either version
  7. * 2 of the License, or (at your option) any later version.
  8. */
  9. #include "../basic_asm.h"
  10. # POS MUST BE 16 ALIGNED!
  11. #define PUSH_VMX(pos,reg) \
  12. li reg,pos; \
  13. stvx v20,reg,sp; \
  14. addi reg,reg,16; \
  15. stvx v21,reg,sp; \
  16. addi reg,reg,16; \
  17. stvx v22,reg,sp; \
  18. addi reg,reg,16; \
  19. stvx v23,reg,sp; \
  20. addi reg,reg,16; \
  21. stvx v24,reg,sp; \
  22. addi reg,reg,16; \
  23. stvx v25,reg,sp; \
  24. addi reg,reg,16; \
  25. stvx v26,reg,sp; \
  26. addi reg,reg,16; \
  27. stvx v27,reg,sp; \
  28. addi reg,reg,16; \
  29. stvx v28,reg,sp; \
  30. addi reg,reg,16; \
  31. stvx v29,reg,sp; \
  32. addi reg,reg,16; \
  33. stvx v30,reg,sp; \
  34. addi reg,reg,16; \
  35. stvx v31,reg,sp;
  36. # POS MUST BE 16 ALIGNED!
  37. #define POP_VMX(pos,reg) \
  38. li reg,pos; \
  39. lvx v20,reg,sp; \
  40. addi reg,reg,16; \
  41. lvx v21,reg,sp; \
  42. addi reg,reg,16; \
  43. lvx v22,reg,sp; \
  44. addi reg,reg,16; \
  45. lvx v23,reg,sp; \
  46. addi reg,reg,16; \
  47. lvx v24,reg,sp; \
  48. addi reg,reg,16; \
  49. lvx v25,reg,sp; \
  50. addi reg,reg,16; \
  51. lvx v26,reg,sp; \
  52. addi reg,reg,16; \
  53. lvx v27,reg,sp; \
  54. addi reg,reg,16; \
  55. lvx v28,reg,sp; \
  56. addi reg,reg,16; \
  57. lvx v29,reg,sp; \
  58. addi reg,reg,16; \
  59. lvx v30,reg,sp; \
  60. addi reg,reg,16; \
  61. lvx v31,reg,sp;
  62. # Carefull this will 'clobber' vmx (by design)
  63. # Don't call this from C
  64. FUNC_START(load_vmx)
  65. li r5,0
  66. lvx v20,r5,r3
  67. addi r5,r5,16
  68. lvx v21,r5,r3
  69. addi r5,r5,16
  70. lvx v22,r5,r3
  71. addi r5,r5,16
  72. lvx v23,r5,r3
  73. addi r5,r5,16
  74. lvx v24,r5,r3
  75. addi r5,r5,16
  76. lvx v25,r5,r3
  77. addi r5,r5,16
  78. lvx v26,r5,r3
  79. addi r5,r5,16
  80. lvx v27,r5,r3
  81. addi r5,r5,16
  82. lvx v28,r5,r3
  83. addi r5,r5,16
  84. lvx v29,r5,r3
  85. addi r5,r5,16
  86. lvx v30,r5,r3
  87. addi r5,r5,16
  88. lvx v31,r5,r3
  89. blr
  90. FUNC_END(load_vmx)
  91. # Should be safe from C, only touches r4, r5 and v0,v1,v2
  92. FUNC_START(check_vmx)
  93. PUSH_BASIC_STACK(32)
  94. mr r4,r3
  95. li r3,1 # assume a bad result
  96. li r5,0
  97. lvx v0,r5,r4
  98. vcmpequd. v1,v0,v20
  99. vmr v2,v1
  100. addi r5,r5,16
  101. lvx v0,r5,r4
  102. vcmpequd. v1,v0,v21
  103. vand v2,v2,v1
  104. addi r5,r5,16
  105. lvx v0,r5,r4
  106. vcmpequd. v1,v0,v22
  107. vand v2,v2,v1
  108. addi r5,r5,16
  109. lvx v0,r5,r4
  110. vcmpequd. v1,v0,v23
  111. vand v2,v2,v1
  112. addi r5,r5,16
  113. lvx v0,r5,r4
  114. vcmpequd. v1,v0,v24
  115. vand v2,v2,v1
  116. addi r5,r5,16
  117. lvx v0,r5,r4
  118. vcmpequd. v1,v0,v25
  119. vand v2,v2,v1
  120. addi r5,r5,16
  121. lvx v0,r5,r4
  122. vcmpequd. v1,v0,v26
  123. vand v2,v2,v1
  124. addi r5,r5,16
  125. lvx v0,r5,r4
  126. vcmpequd. v1,v0,v27
  127. vand v2,v2,v1
  128. addi r5,r5,16
  129. lvx v0,r5,r4
  130. vcmpequd. v1,v0,v28
  131. vand v2,v2,v1
  132. addi r5,r5,16
  133. lvx v0,r5,r4
  134. vcmpequd. v1,v0,v29
  135. vand v2,v2,v1
  136. addi r5,r5,16
  137. lvx v0,r5,r4
  138. vcmpequd. v1,v0,v30
  139. vand v2,v2,v1
  140. addi r5,r5,16
  141. lvx v0,r5,r4
  142. vcmpequd. v1,v0,v31
  143. vand v2,v2,v1
  144. li r5,STACK_FRAME_LOCAL(0,0)
  145. stvx v2,r5,sp
  146. ldx r0,r5,sp
  147. cmpdi r0,0xffffffffffffffff
  148. bne 1f
  149. li r3,0
  150. 1: POP_BASIC_STACK(32)
  151. blr
  152. FUNC_END(check_vmx)
  153. # Safe from C
  154. FUNC_START(test_vmx)
  155. # r3 holds pointer to where to put the result of fork
  156. # r4 holds pointer to the pid
  157. # v20-v31 are non-volatile
  158. PUSH_BASIC_STACK(512)
  159. std r3,STACK_FRAME_PARAM(0)(sp) # Address of varray
  160. std r4,STACK_FRAME_PARAM(1)(sp) # address of pid
  161. PUSH_VMX(STACK_FRAME_LOCAL(2,0),r4)
  162. bl load_vmx
  163. nop
  164. li r0,__NR_fork
  165. sc
  166. # Pass the result of fork back to the caller
  167. ld r9,STACK_FRAME_PARAM(1)(sp)
  168. std r3,0(r9)
  169. ld r3,STACK_FRAME_PARAM(0)(sp)
  170. bl check_vmx
  171. nop
  172. POP_VMX(STACK_FRAME_LOCAL(2,0),r4)
  173. POP_BASIC_STACK(512)
  174. blr
  175. FUNC_END(test_vmx)
  176. # int preempt_vmx(vector int *varray, int *threads_starting, int *running)
  177. # On starting will (atomically) decrement threads_starting as a signal that
  178. # the VMX have been loaded with varray. Will proceed to check the validity of
  179. # the VMX registers while running is not zero.
  180. FUNC_START(preempt_vmx)
  181. PUSH_BASIC_STACK(512)
  182. std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray
  183. std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
  184. std r5,STACK_FRAME_PARAM(2)(sp) # int *running
  185. # VMX need to write to 16 byte aligned addresses, skip STACK_FRAME_LOCAL(3,0)
  186. PUSH_VMX(STACK_FRAME_LOCAL(4,0),r4)
  187. bl load_vmx
  188. nop
  189. sync
  190. # Atomic DEC
  191. ld r3,STACK_FRAME_PARAM(1)(sp)
  192. 1: lwarx r4,0,r3
  193. addi r4,r4,-1
  194. stwcx. r4,0,r3
  195. bne- 1b
  196. 2: ld r3,STACK_FRAME_PARAM(0)(sp)
  197. bl check_vmx
  198. nop
  199. cmpdi r3,0
  200. bne 3f
  201. ld r4,STACK_FRAME_PARAM(2)(sp)
  202. ld r5,0(r4)
  203. cmpwi r5,0
  204. bne 2b
  205. 3: POP_VMX(STACK_FRAME_LOCAL(4,0),r4)
  206. POP_BASIC_STACK(512)
  207. blr
  208. FUNC_END(preempt_vmx)