copy_user_64.S 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. /*
  2. * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com>
  3. * Copyright 2002 Andi Kleen, SuSE Labs.
  4. * Subject to the GNU Public License v2.
  5. *
  6. * Functions to copy from and to user space.
  7. */
  8. #include <linux/linkage.h>
  9. #include <asm/current.h>
  10. #include <asm/asm-offsets.h>
  11. #include <asm/thread_info.h>
  12. #include <asm/cpufeatures.h>
  13. #include <asm/alternative-asm.h>
  14. #include <asm/asm.h>
  15. #include <asm/smap.h>
  16. #include <asm/export.h>
  17. /*
  18. * copy_user_generic_unrolled - memory copy with exception handling.
  19. * This version is for CPUs like P4 that don't have efficient micro
  20. * code for rep movsq
  21. *
  22. * Input:
  23. * rdi destination
  24. * rsi source
  25. * rdx count
  26. *
  27. * Output:
  28. * eax uncopied bytes or 0 if successful.
  29. */
  30. ENTRY(copy_user_generic_unrolled)
  31. ASM_STAC
  32. cmpl $8,%edx
  33. jb 20f /* less then 8 bytes, go to byte copy loop */
  34. ALIGN_DESTINATION
  35. movl %edx,%ecx
  36. andl $63,%edx
  37. shrl $6,%ecx
  38. jz 17f
  39. 1: movq (%rsi),%r8
  40. 2: movq 1*8(%rsi),%r9
  41. 3: movq 2*8(%rsi),%r10
  42. 4: movq 3*8(%rsi),%r11
  43. 5: movq %r8,(%rdi)
  44. 6: movq %r9,1*8(%rdi)
  45. 7: movq %r10,2*8(%rdi)
  46. 8: movq %r11,3*8(%rdi)
  47. 9: movq 4*8(%rsi),%r8
  48. 10: movq 5*8(%rsi),%r9
  49. 11: movq 6*8(%rsi),%r10
  50. 12: movq 7*8(%rsi),%r11
  51. 13: movq %r8,4*8(%rdi)
  52. 14: movq %r9,5*8(%rdi)
  53. 15: movq %r10,6*8(%rdi)
  54. 16: movq %r11,7*8(%rdi)
  55. leaq 64(%rsi),%rsi
  56. leaq 64(%rdi),%rdi
  57. decl %ecx
  58. jnz 1b
  59. 17: movl %edx,%ecx
  60. andl $7,%edx
  61. shrl $3,%ecx
  62. jz 20f
  63. 18: movq (%rsi),%r8
  64. 19: movq %r8,(%rdi)
  65. leaq 8(%rsi),%rsi
  66. leaq 8(%rdi),%rdi
  67. decl %ecx
  68. jnz 18b
  69. 20: andl %edx,%edx
  70. jz 23f
  71. movl %edx,%ecx
  72. 21: movb (%rsi),%al
  73. 22: movb %al,(%rdi)
  74. incq %rsi
  75. incq %rdi
  76. decl %ecx
  77. jnz 21b
  78. 23: xor %eax,%eax
  79. ASM_CLAC
  80. ret
  81. .section .fixup,"ax"
  82. 30: shll $6,%ecx
  83. addl %ecx,%edx
  84. jmp 60f
  85. 40: leal (%rdx,%rcx,8),%edx
  86. jmp 60f
  87. 50: movl %ecx,%edx
  88. 60: jmp copy_user_handle_tail /* ecx is zerorest also */
  89. .previous
  90. _ASM_EXTABLE(1b,30b)
  91. _ASM_EXTABLE(2b,30b)
  92. _ASM_EXTABLE(3b,30b)
  93. _ASM_EXTABLE(4b,30b)
  94. _ASM_EXTABLE(5b,30b)
  95. _ASM_EXTABLE(6b,30b)
  96. _ASM_EXTABLE(7b,30b)
  97. _ASM_EXTABLE(8b,30b)
  98. _ASM_EXTABLE(9b,30b)
  99. _ASM_EXTABLE(10b,30b)
  100. _ASM_EXTABLE(11b,30b)
  101. _ASM_EXTABLE(12b,30b)
  102. _ASM_EXTABLE(13b,30b)
  103. _ASM_EXTABLE(14b,30b)
  104. _ASM_EXTABLE(15b,30b)
  105. _ASM_EXTABLE(16b,30b)
  106. _ASM_EXTABLE(18b,40b)
  107. _ASM_EXTABLE(19b,40b)
  108. _ASM_EXTABLE(21b,50b)
  109. _ASM_EXTABLE(22b,50b)
  110. ENDPROC(copy_user_generic_unrolled)
  111. EXPORT_SYMBOL(copy_user_generic_unrolled)
  112. /* Some CPUs run faster using the string copy instructions.
  113. * This is also a lot simpler. Use them when possible.
  114. *
  115. * Only 4GB of copy is supported. This shouldn't be a problem
  116. * because the kernel normally only writes from/to page sized chunks
  117. * even if user space passed a longer buffer.
  118. * And more would be dangerous because both Intel and AMD have
  119. * errata with rep movsq > 4GB. If someone feels the need to fix
  120. * this please consider this.
  121. *
  122. * Input:
  123. * rdi destination
  124. * rsi source
  125. * rdx count
  126. *
  127. * Output:
  128. * eax uncopied bytes or 0 if successful.
  129. */
  130. ENTRY(copy_user_generic_string)
  131. ASM_STAC
  132. cmpl $8,%edx
  133. jb 2f /* less than 8 bytes, go to byte copy loop */
  134. ALIGN_DESTINATION
  135. movl %edx,%ecx
  136. shrl $3,%ecx
  137. andl $7,%edx
  138. 1: rep
  139. movsq
  140. 2: movl %edx,%ecx
  141. 3: rep
  142. movsb
  143. xorl %eax,%eax
  144. ASM_CLAC
  145. ret
  146. .section .fixup,"ax"
  147. 11: leal (%rdx,%rcx,8),%ecx
  148. 12: movl %ecx,%edx /* ecx is zerorest also */
  149. jmp copy_user_handle_tail
  150. .previous
  151. _ASM_EXTABLE(1b,11b)
  152. _ASM_EXTABLE(3b,12b)
  153. ENDPROC(copy_user_generic_string)
  154. EXPORT_SYMBOL(copy_user_generic_string)
  155. /*
  156. * Some CPUs are adding enhanced REP MOVSB/STOSB instructions.
  157. * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled.
  158. *
  159. * Input:
  160. * rdi destination
  161. * rsi source
  162. * rdx count
  163. *
  164. * Output:
  165. * eax uncopied bytes or 0 if successful.
  166. */
  167. ENTRY(copy_user_enhanced_fast_string)
  168. ASM_STAC
  169. movl %edx,%ecx
  170. 1: rep
  171. movsb
  172. xorl %eax,%eax
  173. ASM_CLAC
  174. ret
  175. .section .fixup,"ax"
  176. 12: movl %ecx,%edx /* ecx is zerorest also */
  177. jmp copy_user_handle_tail
  178. .previous
  179. _ASM_EXTABLE(1b,12b)
  180. ENDPROC(copy_user_enhanced_fast_string)
  181. EXPORT_SYMBOL(copy_user_enhanced_fast_string)
  182. /*
  183. * copy_user_nocache - Uncached memory copy with exception handling
  184. * This will force destination out of cache for more performance.
  185. *
  186. * Note: Cached memory copy is used when destination or size is not
  187. * naturally aligned. That is:
  188. * - Require 8-byte alignment when size is 8 bytes or larger.
  189. * - Require 4-byte alignment when size is 4 bytes.
  190. */
  191. ENTRY(__copy_user_nocache)
  192. ASM_STAC
  193. /* If size is less than 8 bytes, go to 4-byte copy */
  194. cmpl $8,%edx
  195. jb .L_4b_nocache_copy_entry
  196. /* If destination is not 8-byte aligned, "cache" copy to align it */
  197. ALIGN_DESTINATION
  198. /* Set 4x8-byte copy count and remainder */
  199. movl %edx,%ecx
  200. andl $63,%edx
  201. shrl $6,%ecx
  202. jz .L_8b_nocache_copy_entry /* jump if count is 0 */
  203. /* Perform 4x8-byte nocache loop-copy */
  204. .L_4x8b_nocache_copy_loop:
  205. 1: movq (%rsi),%r8
  206. 2: movq 1*8(%rsi),%r9
  207. 3: movq 2*8(%rsi),%r10
  208. 4: movq 3*8(%rsi),%r11
  209. 5: movnti %r8,(%rdi)
  210. 6: movnti %r9,1*8(%rdi)
  211. 7: movnti %r10,2*8(%rdi)
  212. 8: movnti %r11,3*8(%rdi)
  213. 9: movq 4*8(%rsi),%r8
  214. 10: movq 5*8(%rsi),%r9
  215. 11: movq 6*8(%rsi),%r10
  216. 12: movq 7*8(%rsi),%r11
  217. 13: movnti %r8,4*8(%rdi)
  218. 14: movnti %r9,5*8(%rdi)
  219. 15: movnti %r10,6*8(%rdi)
  220. 16: movnti %r11,7*8(%rdi)
  221. leaq 64(%rsi),%rsi
  222. leaq 64(%rdi),%rdi
  223. decl %ecx
  224. jnz .L_4x8b_nocache_copy_loop
  225. /* Set 8-byte copy count and remainder */
  226. .L_8b_nocache_copy_entry:
  227. movl %edx,%ecx
  228. andl $7,%edx
  229. shrl $3,%ecx
  230. jz .L_4b_nocache_copy_entry /* jump if count is 0 */
  231. /* Perform 8-byte nocache loop-copy */
  232. .L_8b_nocache_copy_loop:
  233. 20: movq (%rsi),%r8
  234. 21: movnti %r8,(%rdi)
  235. leaq 8(%rsi),%rsi
  236. leaq 8(%rdi),%rdi
  237. decl %ecx
  238. jnz .L_8b_nocache_copy_loop
  239. /* If no byte left, we're done */
  240. .L_4b_nocache_copy_entry:
  241. andl %edx,%edx
  242. jz .L_finish_copy
  243. /* If destination is not 4-byte aligned, go to byte copy: */
  244. movl %edi,%ecx
  245. andl $3,%ecx
  246. jnz .L_1b_cache_copy_entry
  247. /* Set 4-byte copy count (1 or 0) and remainder */
  248. movl %edx,%ecx
  249. andl $3,%edx
  250. shrl $2,%ecx
  251. jz .L_1b_cache_copy_entry /* jump if count is 0 */
  252. /* Perform 4-byte nocache copy: */
  253. 30: movl (%rsi),%r8d
  254. 31: movnti %r8d,(%rdi)
  255. leaq 4(%rsi),%rsi
  256. leaq 4(%rdi),%rdi
  257. /* If no bytes left, we're done: */
  258. andl %edx,%edx
  259. jz .L_finish_copy
  260. /* Perform byte "cache" loop-copy for the remainder */
  261. .L_1b_cache_copy_entry:
  262. movl %edx,%ecx
  263. .L_1b_cache_copy_loop:
  264. 40: movb (%rsi),%al
  265. 41: movb %al,(%rdi)
  266. incq %rsi
  267. incq %rdi
  268. decl %ecx
  269. jnz .L_1b_cache_copy_loop
  270. /* Finished copying; fence the prior stores */
  271. .L_finish_copy:
  272. xorl %eax,%eax
  273. ASM_CLAC
  274. sfence
  275. ret
  276. .section .fixup,"ax"
  277. .L_fixup_4x8b_copy:
  278. shll $6,%ecx
  279. addl %ecx,%edx
  280. jmp .L_fixup_handle_tail
  281. .L_fixup_8b_copy:
  282. lea (%rdx,%rcx,8),%rdx
  283. jmp .L_fixup_handle_tail
  284. .L_fixup_4b_copy:
  285. lea (%rdx,%rcx,4),%rdx
  286. jmp .L_fixup_handle_tail
  287. .L_fixup_1b_copy:
  288. movl %ecx,%edx
  289. .L_fixup_handle_tail:
  290. sfence
  291. jmp copy_user_handle_tail
  292. .previous
  293. _ASM_EXTABLE(1b,.L_fixup_4x8b_copy)
  294. _ASM_EXTABLE(2b,.L_fixup_4x8b_copy)
  295. _ASM_EXTABLE(3b,.L_fixup_4x8b_copy)
  296. _ASM_EXTABLE(4b,.L_fixup_4x8b_copy)
  297. _ASM_EXTABLE(5b,.L_fixup_4x8b_copy)
  298. _ASM_EXTABLE(6b,.L_fixup_4x8b_copy)
  299. _ASM_EXTABLE(7b,.L_fixup_4x8b_copy)
  300. _ASM_EXTABLE(8b,.L_fixup_4x8b_copy)
  301. _ASM_EXTABLE(9b,.L_fixup_4x8b_copy)
  302. _ASM_EXTABLE(10b,.L_fixup_4x8b_copy)
  303. _ASM_EXTABLE(11b,.L_fixup_4x8b_copy)
  304. _ASM_EXTABLE(12b,.L_fixup_4x8b_copy)
  305. _ASM_EXTABLE(13b,.L_fixup_4x8b_copy)
  306. _ASM_EXTABLE(14b,.L_fixup_4x8b_copy)
  307. _ASM_EXTABLE(15b,.L_fixup_4x8b_copy)
  308. _ASM_EXTABLE(16b,.L_fixup_4x8b_copy)
  309. _ASM_EXTABLE(20b,.L_fixup_8b_copy)
  310. _ASM_EXTABLE(21b,.L_fixup_8b_copy)
  311. _ASM_EXTABLE(30b,.L_fixup_4b_copy)
  312. _ASM_EXTABLE(31b,.L_fixup_4b_copy)
  313. _ASM_EXTABLE(40b,.L_fixup_1b_copy)
  314. _ASM_EXTABLE(41b,.L_fixup_1b_copy)
  315. ENDPROC(__copy_user_nocache)
  316. EXPORT_SYMBOL(__copy_user_nocache)