string_64.S 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. /*
  2. * This program is free software; you can redistribute it and/or modify
  3. * it under the terms of the GNU General Public License as published by
  4. * the Free Software Foundation; either version 2 of the License, or
  5. * (at your option) any later version.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU General Public License for more details.
  11. *
  12. * You should have received a copy of the GNU General Public License
  13. * along with this program; if not, write to the Free Software
  14. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  15. *
  16. * Copyright (C) IBM Corporation, 2012
  17. *
  18. * Author: Anton Blanchard <anton@au.ibm.com>
  19. */
  20. #include <asm/ppc_asm.h>
  21. #include <asm/linkage.h>
  22. #include <asm/asm-offsets.h>
  23. #include <asm/export.h>
  24. .section ".toc","aw"
  25. PPC64_CACHES:
  26. .tc ppc64_caches[TC],ppc64_caches
  27. .section ".text"
  28. /**
  29. * __clear_user: - Zero a block of memory in user space, with less checking.
  30. * @to: Destination address, in user space.
  31. * @n: Number of bytes to zero.
  32. *
  33. * Zero a block of memory in user space. Caller must check
  34. * the specified block with access_ok() before calling this function.
  35. *
  36. * Returns number of bytes that could not be cleared.
  37. * On success, this will be zero.
  38. */
  39. .macro err1
  40. 100:
  41. EX_TABLE(100b,.Ldo_err1)
  42. .endm
  43. .macro err2
  44. 200:
  45. EX_TABLE(200b,.Ldo_err2)
  46. .endm
  47. .macro err3
  48. 300:
  49. EX_TABLE(300b,.Ldo_err3)
  50. .endm
  51. .Ldo_err1:
  52. mr r3,r8
  53. .Ldo_err2:
  54. mtctr r4
  55. 1:
  56. err3; stb r0,0(r3)
  57. addi r3,r3,1
  58. addi r4,r4,-1
  59. bdnz 1b
  60. .Ldo_err3:
  61. mr r3,r4
  62. blr
  63. _GLOBAL_TOC(__clear_user)
  64. cmpdi r4,32
  65. neg r6,r3
  66. li r0,0
  67. blt .Lshort_clear
  68. mr r8,r3
  69. mtocrf 0x01,r6
  70. clrldi r6,r6,(64-3)
  71. /* Get the destination 8 byte aligned */
  72. bf cr7*4+3,1f
  73. err1; stb r0,0(r3)
  74. addi r3,r3,1
  75. 1: bf cr7*4+2,2f
  76. err1; sth r0,0(r3)
  77. addi r3,r3,2
  78. 2: bf cr7*4+1,3f
  79. err1; stw r0,0(r3)
  80. addi r3,r3,4
  81. 3: sub r4,r4,r6
  82. cmpdi r4,32
  83. cmpdi cr1,r4,512
  84. blt .Lshort_clear
  85. bgt cr1,.Llong_clear
  86. .Lmedium_clear:
  87. srdi r6,r4,5
  88. mtctr r6
  89. /* Do 32 byte chunks */
  90. 4:
  91. err2; std r0,0(r3)
  92. err2; std r0,8(r3)
  93. err2; std r0,16(r3)
  94. err2; std r0,24(r3)
  95. addi r3,r3,32
  96. addi r4,r4,-32
  97. bdnz 4b
  98. .Lshort_clear:
  99. /* up to 31 bytes to go */
  100. cmpdi r4,16
  101. blt 6f
  102. err2; std r0,0(r3)
  103. err2; std r0,8(r3)
  104. addi r3,r3,16
  105. addi r4,r4,-16
  106. /* Up to 15 bytes to go */
  107. 6: mr r8,r3
  108. clrldi r4,r4,(64-4)
  109. mtocrf 0x01,r4
  110. bf cr7*4+0,7f
  111. err1; std r0,0(r3)
  112. addi r3,r3,8
  113. 7: bf cr7*4+1,8f
  114. err1; stw r0,0(r3)
  115. addi r3,r3,4
  116. 8: bf cr7*4+2,9f
  117. err1; sth r0,0(r3)
  118. addi r3,r3,2
  119. 9: bf cr7*4+3,10f
  120. err1; stb r0,0(r3)
  121. 10: li r3,0
  122. blr
  123. .Llong_clear:
  124. ld r5,PPC64_CACHES@toc(r2)
  125. bf cr7*4+0,11f
  126. err2; std r0,0(r3)
  127. addi r3,r3,8
  128. addi r4,r4,-8
  129. /* Destination is 16 byte aligned, need to get it cache block aligned */
  130. 11: lwz r7,DCACHEL1LOGBLOCKSIZE(r5)
  131. lwz r9,DCACHEL1BLOCKSIZE(r5)
  132. /*
  133. * With worst case alignment the long clear loop takes a minimum
  134. * of 1 byte less than 2 cachelines.
  135. */
  136. sldi r10,r9,2
  137. cmpd r4,r10
  138. blt .Lmedium_clear
  139. neg r6,r3
  140. addi r10,r9,-1
  141. and. r5,r6,r10
  142. beq 13f
  143. srdi r6,r5,4
  144. mtctr r6
  145. mr r8,r3
  146. 12:
  147. err1; std r0,0(r3)
  148. err1; std r0,8(r3)
  149. addi r3,r3,16
  150. bdnz 12b
  151. sub r4,r4,r5
  152. 13: srd r6,r4,r7
  153. mtctr r6
  154. mr r8,r3
  155. 14:
  156. err1; dcbz 0,r3
  157. add r3,r3,r9
  158. bdnz 14b
  159. and r4,r4,r10
  160. cmpdi r4,32
  161. blt .Lshort_clear
  162. b .Lmedium_clear
  163. EXPORT_SYMBOL(__clear_user)