copyuser_64.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644
  1. /*
  2. * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation; either version
  7. * 2 of the License, or (at your option) any later version.
  8. */
  9. #include <asm/processor.h>
  10. #include <asm/ppc_asm.h>
  11. #include <asm/export.h>
  12. #include <asm/asm-compat.h>
  13. #ifdef __BIG_ENDIAN__
  14. #define sLd sld /* Shift towards low-numbered address. */
  15. #define sHd srd /* Shift towards high-numbered address. */
  16. #else
  17. #define sLd srd /* Shift towards low-numbered address. */
  18. #define sHd sld /* Shift towards high-numbered address. */
  19. #endif
  20. .align 7
  21. _GLOBAL_TOC(__copy_tofrom_user)
  22. #ifdef CONFIG_PPC_BOOK3S_64
  23. BEGIN_FTR_SECTION
  24. nop
  25. FTR_SECTION_ELSE
  26. b __copy_tofrom_user_power7
  27. ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
  28. #endif
  29. _GLOBAL(__copy_tofrom_user_base)
  30. /* first check for a whole page copy on a page boundary */
  31. cmpldi cr1,r5,16
  32. cmpdi cr6,r5,4096
  33. or r0,r3,r4
  34. neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
  35. andi. r0,r0,4095
  36. std r3,-24(r1)
  37. crand cr0*4+2,cr0*4+2,cr6*4+2
  38. std r4,-16(r1)
  39. std r5,-8(r1)
  40. dcbt 0,r4
  41. beq .Lcopy_page_4K
  42. andi. r6,r6,7
  43. PPC_MTOCRF(0x01,r5)
  44. blt cr1,.Lshort_copy
  45. /* Below we want to nop out the bne if we're on a CPU that has the
  46. * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
  47. * cleared.
  48. * At the time of writing the only CPU that has this combination of bits
  49. * set is Power6.
  50. */
  51. BEGIN_FTR_SECTION
  52. nop
  53. FTR_SECTION_ELSE
  54. bne .Ldst_unaligned
  55. ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
  56. CPU_FTR_UNALIGNED_LD_STD)
  57. .Ldst_aligned:
  58. addi r3,r3,-16
  59. BEGIN_FTR_SECTION
  60. andi. r0,r4,7
  61. bne .Lsrc_unaligned
  62. END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
  63. blt cr1,.Ldo_tail /* if < 16 bytes to copy */
  64. srdi r0,r5,5
  65. cmpdi cr1,r0,0
  66. 20: ld r7,0(r4)
  67. 220: ld r6,8(r4)
  68. addi r4,r4,16
  69. mtctr r0
  70. andi. r0,r5,0x10
  71. beq 22f
  72. addi r3,r3,16
  73. addi r4,r4,-16
  74. mr r9,r7
  75. mr r8,r6
  76. beq cr1,72f
  77. 21: ld r7,16(r4)
  78. 221: ld r6,24(r4)
  79. addi r4,r4,32
  80. 70: std r9,0(r3)
  81. 270: std r8,8(r3)
  82. 22: ld r9,0(r4)
  83. 222: ld r8,8(r4)
  84. 71: std r7,16(r3)
  85. 271: std r6,24(r3)
  86. addi r3,r3,32
  87. bdnz 21b
  88. 72: std r9,0(r3)
  89. 272: std r8,8(r3)
  90. andi. r5,r5,0xf
  91. beq+ 3f
  92. addi r4,r4,16
  93. .Ldo_tail:
  94. addi r3,r3,16
  95. bf cr7*4+0,246f
  96. 244: ld r9,0(r4)
  97. addi r4,r4,8
  98. 245: std r9,0(r3)
  99. addi r3,r3,8
  100. 246: bf cr7*4+1,1f
  101. 23: lwz r9,0(r4)
  102. addi r4,r4,4
  103. 73: stw r9,0(r3)
  104. addi r3,r3,4
  105. 1: bf cr7*4+2,2f
  106. 44: lhz r9,0(r4)
  107. addi r4,r4,2
  108. 74: sth r9,0(r3)
  109. addi r3,r3,2
  110. 2: bf cr7*4+3,3f
  111. 45: lbz r9,0(r4)
  112. 75: stb r9,0(r3)
  113. 3: li r3,0
  114. blr
  115. .Lsrc_unaligned:
  116. srdi r6,r5,3
  117. addi r5,r5,-16
  118. subf r4,r0,r4
  119. srdi r7,r5,4
  120. sldi r10,r0,3
  121. cmpldi cr6,r6,3
  122. andi. r5,r5,7
  123. mtctr r7
  124. subfic r11,r10,64
  125. add r5,r5,r0
  126. bt cr7*4+0,28f
  127. 24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
  128. 25: ld r0,8(r4)
  129. sLd r6,r9,r10
  130. 26: ldu r9,16(r4)
  131. sHd r7,r0,r11
  132. sLd r8,r0,r10
  133. or r7,r7,r6
  134. blt cr6,79f
  135. 27: ld r0,8(r4)
  136. b 2f
  137. 28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
  138. 29: ldu r9,8(r4)
  139. sLd r8,r0,r10
  140. addi r3,r3,-8
  141. blt cr6,5f
  142. 30: ld r0,8(r4)
  143. sHd r12,r9,r11
  144. sLd r6,r9,r10
  145. 31: ldu r9,16(r4)
  146. or r12,r8,r12
  147. sHd r7,r0,r11
  148. sLd r8,r0,r10
  149. addi r3,r3,16
  150. beq cr6,78f
  151. 1: or r7,r7,r6
  152. 32: ld r0,8(r4)
  153. 76: std r12,8(r3)
  154. 2: sHd r12,r9,r11
  155. sLd r6,r9,r10
  156. 33: ldu r9,16(r4)
  157. or r12,r8,r12
  158. 77: stdu r7,16(r3)
  159. sHd r7,r0,r11
  160. sLd r8,r0,r10
  161. bdnz 1b
  162. 78: std r12,8(r3)
  163. or r7,r7,r6
  164. 79: std r7,16(r3)
  165. 5: sHd r12,r9,r11
  166. or r12,r8,r12
  167. 80: std r12,24(r3)
  168. bne 6f
  169. li r3,0
  170. blr
  171. 6: cmpwi cr1,r5,8
  172. addi r3,r3,32
  173. sLd r9,r9,r10
  174. ble cr1,7f
  175. 34: ld r0,8(r4)
  176. sHd r7,r0,r11
  177. or r9,r7,r9
  178. 7:
  179. bf cr7*4+1,1f
  180. #ifdef __BIG_ENDIAN__
  181. rotldi r9,r9,32
  182. #endif
  183. 94: stw r9,0(r3)
  184. #ifdef __LITTLE_ENDIAN__
  185. rotrdi r9,r9,32
  186. #endif
  187. addi r3,r3,4
  188. 1: bf cr7*4+2,2f
  189. #ifdef __BIG_ENDIAN__
  190. rotldi r9,r9,16
  191. #endif
  192. 95: sth r9,0(r3)
  193. #ifdef __LITTLE_ENDIAN__
  194. rotrdi r9,r9,16
  195. #endif
  196. addi r3,r3,2
  197. 2: bf cr7*4+3,3f
  198. #ifdef __BIG_ENDIAN__
  199. rotldi r9,r9,8
  200. #endif
  201. 96: stb r9,0(r3)
  202. #ifdef __LITTLE_ENDIAN__
  203. rotrdi r9,r9,8
  204. #endif
  205. 3: li r3,0
  206. blr
  207. .Ldst_unaligned:
  208. PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
  209. subf r5,r6,r5
  210. li r7,0
  211. cmpldi cr1,r5,16
  212. bf cr7*4+3,1f
  213. 35: lbz r0,0(r4)
  214. 81: stb r0,0(r3)
  215. addi r7,r7,1
  216. 1: bf cr7*4+2,2f
  217. 36: lhzx r0,r7,r4
  218. 82: sthx r0,r7,r3
  219. addi r7,r7,2
  220. 2: bf cr7*4+1,3f
  221. 37: lwzx r0,r7,r4
  222. 83: stwx r0,r7,r3
  223. 3: PPC_MTOCRF(0x01,r5)
  224. add r4,r6,r4
  225. add r3,r6,r3
  226. b .Ldst_aligned
  227. .Lshort_copy:
  228. bf cr7*4+0,1f
  229. 38: lwz r0,0(r4)
  230. 39: lwz r9,4(r4)
  231. addi r4,r4,8
  232. 84: stw r0,0(r3)
  233. 85: stw r9,4(r3)
  234. addi r3,r3,8
  235. 1: bf cr7*4+1,2f
  236. 40: lwz r0,0(r4)
  237. addi r4,r4,4
  238. 86: stw r0,0(r3)
  239. addi r3,r3,4
  240. 2: bf cr7*4+2,3f
  241. 41: lhz r0,0(r4)
  242. addi r4,r4,2
  243. 87: sth r0,0(r3)
  244. addi r3,r3,2
  245. 3: bf cr7*4+3,4f
  246. 42: lbz r0,0(r4)
  247. 88: stb r0,0(r3)
  248. 4: li r3,0
  249. blr
  250. /*
  251. * exception handlers follow
  252. * we have to return the number of bytes not copied
  253. * for an exception on a load, we set the rest of the destination to 0
  254. */
  255. 136:
  256. 137:
  257. add r3,r3,r7
  258. b 1f
  259. 130:
  260. 131:
  261. addi r3,r3,8
  262. 120:
  263. 320:
  264. 122:
  265. 322:
  266. 124:
  267. 125:
  268. 126:
  269. 127:
  270. 128:
  271. 129:
  272. 133:
  273. addi r3,r3,8
  274. 132:
  275. addi r3,r3,8
  276. 121:
  277. 321:
  278. 344:
  279. 134:
  280. 135:
  281. 138:
  282. 139:
  283. 140:
  284. 141:
  285. 142:
  286. 123:
  287. 144:
  288. 145:
  289. /*
  290. * here we have had a fault on a load and r3 points to the first
  291. * unmodified byte of the destination
  292. */
  293. 1: ld r6,-24(r1)
  294. ld r4,-16(r1)
  295. ld r5,-8(r1)
  296. subf r6,r6,r3
  297. add r4,r4,r6
  298. subf r5,r6,r5 /* #bytes left to go */
  299. /*
  300. * first see if we can copy any more bytes before hitting another exception
  301. */
  302. mtctr r5
  303. 43: lbz r0,0(r4)
  304. addi r4,r4,1
  305. 89: stb r0,0(r3)
  306. addi r3,r3,1
  307. bdnz 43b
  308. li r3,0 /* huh? all copied successfully this time? */
  309. blr
  310. /*
  311. * here we have trapped again, amount remaining is in ctr.
  312. */
  313. 143: mfctr r3
  314. blr
  315. /*
  316. * exception handlers for stores: we just need to work
  317. * out how many bytes weren't copied
  318. */
  319. 182:
  320. 183:
  321. add r3,r3,r7
  322. b 1f
  323. 371:
  324. 180:
  325. addi r3,r3,8
  326. 171:
  327. 177:
  328. 179:
  329. addi r3,r3,8
  330. 370:
  331. 372:
  332. 176:
  333. 178:
  334. addi r3,r3,4
  335. 185:
  336. addi r3,r3,4
  337. 170:
  338. 172:
  339. 345:
  340. 173:
  341. 174:
  342. 175:
  343. 181:
  344. 184:
  345. 186:
  346. 187:
  347. 188:
  348. 189:
  349. 194:
  350. 195:
  351. 196:
  352. 1:
  353. ld r6,-24(r1)
  354. ld r5,-8(r1)
  355. add r6,r6,r5
  356. subf r3,r3,r6 /* #bytes not copied */
  357. blr
  358. EX_TABLE(20b,120b)
  359. EX_TABLE(220b,320b)
  360. EX_TABLE(21b,121b)
  361. EX_TABLE(221b,321b)
  362. EX_TABLE(70b,170b)
  363. EX_TABLE(270b,370b)
  364. EX_TABLE(22b,122b)
  365. EX_TABLE(222b,322b)
  366. EX_TABLE(71b,171b)
  367. EX_TABLE(271b,371b)
  368. EX_TABLE(72b,172b)
  369. EX_TABLE(272b,372b)
  370. EX_TABLE(244b,344b)
  371. EX_TABLE(245b,345b)
  372. EX_TABLE(23b,123b)
  373. EX_TABLE(73b,173b)
  374. EX_TABLE(44b,144b)
  375. EX_TABLE(74b,174b)
  376. EX_TABLE(45b,145b)
  377. EX_TABLE(75b,175b)
  378. EX_TABLE(24b,124b)
  379. EX_TABLE(25b,125b)
  380. EX_TABLE(26b,126b)
  381. EX_TABLE(27b,127b)
  382. EX_TABLE(28b,128b)
  383. EX_TABLE(29b,129b)
  384. EX_TABLE(30b,130b)
  385. EX_TABLE(31b,131b)
  386. EX_TABLE(32b,132b)
  387. EX_TABLE(76b,176b)
  388. EX_TABLE(33b,133b)
  389. EX_TABLE(77b,177b)
  390. EX_TABLE(78b,178b)
  391. EX_TABLE(79b,179b)
  392. EX_TABLE(80b,180b)
  393. EX_TABLE(34b,134b)
  394. EX_TABLE(94b,194b)
  395. EX_TABLE(95b,195b)
  396. EX_TABLE(96b,196b)
  397. EX_TABLE(35b,135b)
  398. EX_TABLE(81b,181b)
  399. EX_TABLE(36b,136b)
  400. EX_TABLE(82b,182b)
  401. EX_TABLE(37b,137b)
  402. EX_TABLE(83b,183b)
  403. EX_TABLE(38b,138b)
  404. EX_TABLE(39b,139b)
  405. EX_TABLE(84b,184b)
  406. EX_TABLE(85b,185b)
  407. EX_TABLE(40b,140b)
  408. EX_TABLE(86b,186b)
  409. EX_TABLE(41b,141b)
  410. EX_TABLE(87b,187b)
  411. EX_TABLE(42b,142b)
  412. EX_TABLE(88b,188b)
  413. EX_TABLE(43b,143b)
  414. EX_TABLE(89b,189b)
  415. /*
  416. * Routine to copy a whole page of data, optimized for POWER4.
  417. * On POWER4 it is more than 50% faster than the simple loop
  418. * above (following the .Ldst_aligned label).
  419. */
  420. .Lcopy_page_4K:
  421. std r31,-32(1)
  422. std r30,-40(1)
  423. std r29,-48(1)
  424. std r28,-56(1)
  425. std r27,-64(1)
  426. std r26,-72(1)
  427. std r25,-80(1)
  428. std r24,-88(1)
  429. std r23,-96(1)
  430. std r22,-104(1)
  431. std r21,-112(1)
  432. std r20,-120(1)
  433. li r5,4096/32 - 1
  434. addi r3,r3,-8
  435. li r0,5
  436. 0: addi r5,r5,-24
  437. mtctr r0
  438. 20: ld r22,640(4)
  439. 21: ld r21,512(4)
  440. 22: ld r20,384(4)
  441. 23: ld r11,256(4)
  442. 24: ld r9,128(4)
  443. 25: ld r7,0(4)
  444. 26: ld r25,648(4)
  445. 27: ld r24,520(4)
  446. 28: ld r23,392(4)
  447. 29: ld r10,264(4)
  448. 30: ld r8,136(4)
  449. 31: ldu r6,8(4)
  450. cmpwi r5,24
  451. 1:
  452. 32: std r22,648(3)
  453. 33: std r21,520(3)
  454. 34: std r20,392(3)
  455. 35: std r11,264(3)
  456. 36: std r9,136(3)
  457. 37: std r7,8(3)
  458. 38: ld r28,648(4)
  459. 39: ld r27,520(4)
  460. 40: ld r26,392(4)
  461. 41: ld r31,264(4)
  462. 42: ld r30,136(4)
  463. 43: ld r29,8(4)
  464. 44: std r25,656(3)
  465. 45: std r24,528(3)
  466. 46: std r23,400(3)
  467. 47: std r10,272(3)
  468. 48: std r8,144(3)
  469. 49: std r6,16(3)
  470. 50: ld r22,656(4)
  471. 51: ld r21,528(4)
  472. 52: ld r20,400(4)
  473. 53: ld r11,272(4)
  474. 54: ld r9,144(4)
  475. 55: ld r7,16(4)
  476. 56: std r28,664(3)
  477. 57: std r27,536(3)
  478. 58: std r26,408(3)
  479. 59: std r31,280(3)
  480. 60: std r30,152(3)
  481. 61: stdu r29,24(3)
  482. 62: ld r25,664(4)
  483. 63: ld r24,536(4)
  484. 64: ld r23,408(4)
  485. 65: ld r10,280(4)
  486. 66: ld r8,152(4)
  487. 67: ldu r6,24(4)
  488. bdnz 1b
  489. 68: std r22,648(3)
  490. 69: std r21,520(3)
  491. 70: std r20,392(3)
  492. 71: std r11,264(3)
  493. 72: std r9,136(3)
  494. 73: std r7,8(3)
  495. 74: addi r4,r4,640
  496. 75: addi r3,r3,648
  497. bge 0b
  498. mtctr r5
  499. 76: ld r7,0(4)
  500. 77: ld r8,8(4)
  501. 78: ldu r9,16(4)
  502. 3:
  503. 79: ld r10,8(4)
  504. 80: std r7,8(3)
  505. 81: ld r7,16(4)
  506. 82: std r8,16(3)
  507. 83: ld r8,24(4)
  508. 84: std r9,24(3)
  509. 85: ldu r9,32(4)
  510. 86: stdu r10,32(3)
  511. bdnz 3b
  512. 4:
  513. 87: ld r10,8(4)
  514. 88: std r7,8(3)
  515. 89: std r8,16(3)
  516. 90: std r9,24(3)
  517. 91: std r10,32(3)
  518. 9: ld r20,-120(1)
  519. ld r21,-112(1)
  520. ld r22,-104(1)
  521. ld r23,-96(1)
  522. ld r24,-88(1)
  523. ld r25,-80(1)
  524. ld r26,-72(1)
  525. ld r27,-64(1)
  526. ld r28,-56(1)
  527. ld r29,-48(1)
  528. ld r30,-40(1)
  529. ld r31,-32(1)
  530. li r3,0
  531. blr
  532. /*
  533. * on an exception, reset to the beginning and jump back into the
  534. * standard __copy_tofrom_user
  535. */
  536. 100: ld r20,-120(1)
  537. ld r21,-112(1)
  538. ld r22,-104(1)
  539. ld r23,-96(1)
  540. ld r24,-88(1)
  541. ld r25,-80(1)
  542. ld r26,-72(1)
  543. ld r27,-64(1)
  544. ld r28,-56(1)
  545. ld r29,-48(1)
  546. ld r30,-40(1)
  547. ld r31,-32(1)
  548. ld r3,-24(r1)
  549. ld r4,-16(r1)
  550. li r5,4096
  551. b .Ldst_aligned
  552. EX_TABLE(20b,100b)
  553. EX_TABLE(21b,100b)
  554. EX_TABLE(22b,100b)
  555. EX_TABLE(23b,100b)
  556. EX_TABLE(24b,100b)
  557. EX_TABLE(25b,100b)
  558. EX_TABLE(26b,100b)
  559. EX_TABLE(27b,100b)
  560. EX_TABLE(28b,100b)
  561. EX_TABLE(29b,100b)
  562. EX_TABLE(30b,100b)
  563. EX_TABLE(31b,100b)
  564. EX_TABLE(32b,100b)
  565. EX_TABLE(33b,100b)
  566. EX_TABLE(34b,100b)
  567. EX_TABLE(35b,100b)
  568. EX_TABLE(36b,100b)
  569. EX_TABLE(37b,100b)
  570. EX_TABLE(38b,100b)
  571. EX_TABLE(39b,100b)
  572. EX_TABLE(40b,100b)
  573. EX_TABLE(41b,100b)
  574. EX_TABLE(42b,100b)
  575. EX_TABLE(43b,100b)
  576. EX_TABLE(44b,100b)
  577. EX_TABLE(45b,100b)
  578. EX_TABLE(46b,100b)
  579. EX_TABLE(47b,100b)
  580. EX_TABLE(48b,100b)
  581. EX_TABLE(49b,100b)
  582. EX_TABLE(50b,100b)
  583. EX_TABLE(51b,100b)
  584. EX_TABLE(52b,100b)
  585. EX_TABLE(53b,100b)
  586. EX_TABLE(54b,100b)
  587. EX_TABLE(55b,100b)
  588. EX_TABLE(56b,100b)
  589. EX_TABLE(57b,100b)
  590. EX_TABLE(58b,100b)
  591. EX_TABLE(59b,100b)
  592. EX_TABLE(60b,100b)
  593. EX_TABLE(61b,100b)
  594. EX_TABLE(62b,100b)
  595. EX_TABLE(63b,100b)
  596. EX_TABLE(64b,100b)
  597. EX_TABLE(65b,100b)
  598. EX_TABLE(66b,100b)
  599. EX_TABLE(67b,100b)
  600. EX_TABLE(68b,100b)
  601. EX_TABLE(69b,100b)
  602. EX_TABLE(70b,100b)
  603. EX_TABLE(71b,100b)
  604. EX_TABLE(72b,100b)
  605. EX_TABLE(73b,100b)
  606. EX_TABLE(74b,100b)
  607. EX_TABLE(75b,100b)
  608. EX_TABLE(76b,100b)
  609. EX_TABLE(77b,100b)
  610. EX_TABLE(78b,100b)
  611. EX_TABLE(79b,100b)
  612. EX_TABLE(80b,100b)
  613. EX_TABLE(81b,100b)
  614. EX_TABLE(82b,100b)
  615. EX_TABLE(83b,100b)
  616. EX_TABLE(84b,100b)
  617. EX_TABLE(85b,100b)
  618. EX_TABLE(86b,100b)
  619. EX_TABLE(87b,100b)
  620. EX_TABLE(88b,100b)
  621. EX_TABLE(89b,100b)
  622. EX_TABLE(90b,100b)
  623. EX_TABLE(91b,100b)
  624. EXPORT_SYMBOL(__copy_tofrom_user)