copyuser_64.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. /*
  2. * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation; either version
  7. * 2 of the License, or (at your option) any later version.
  8. */
  9. #include <asm/processor.h>
  10. #include <asm/ppc_asm.h>
  11. #include <asm/export.h>
  12. #ifdef __BIG_ENDIAN__
  13. #define sLd sld /* Shift towards low-numbered address. */
  14. #define sHd srd /* Shift towards high-numbered address. */
  15. #else
  16. #define sLd srd /* Shift towards low-numbered address. */
  17. #define sHd sld /* Shift towards high-numbered address. */
  18. #endif
  19. .align 7
  20. _GLOBAL_TOC(__copy_tofrom_user)
  21. #ifdef CONFIG_PPC_BOOK3S_64
  22. BEGIN_FTR_SECTION
  23. nop
  24. FTR_SECTION_ELSE
  25. b __copy_tofrom_user_power7
  26. ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
  27. #endif
  28. _GLOBAL(__copy_tofrom_user_base)
  29. /* first check for a whole page copy on a page boundary */
  30. cmpldi cr1,r5,16
  31. cmpdi cr6,r5,4096
  32. or r0,r3,r4
  33. neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
  34. andi. r0,r0,4095
  35. std r3,-24(r1)
  36. crand cr0*4+2,cr0*4+2,cr6*4+2
  37. std r4,-16(r1)
  38. std r5,-8(r1)
  39. dcbt 0,r4
  40. beq .Lcopy_page_4K
  41. andi. r6,r6,7
  42. PPC_MTOCRF(0x01,r5)
  43. blt cr1,.Lshort_copy
  44. /* Below we want to nop out the bne if we're on a CPU that has the
  45. * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
  46. * cleared.
  47. * At the time of writing the only CPU that has this combination of bits
  48. * set is Power6.
  49. */
  50. BEGIN_FTR_SECTION
  51. nop
  52. FTR_SECTION_ELSE
  53. bne .Ldst_unaligned
  54. ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
  55. CPU_FTR_UNALIGNED_LD_STD)
  56. .Ldst_aligned:
  57. addi r3,r3,-16
  58. BEGIN_FTR_SECTION
  59. andi. r0,r4,7
  60. bne .Lsrc_unaligned
  61. END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
  62. blt cr1,.Ldo_tail /* if < 16 bytes to copy */
  63. srdi r0,r5,5
  64. cmpdi cr1,r0,0
  65. 20: ld r7,0(r4)
  66. 220: ld r6,8(r4)
  67. addi r4,r4,16
  68. mtctr r0
  69. andi. r0,r5,0x10
  70. beq 22f
  71. addi r3,r3,16
  72. addi r4,r4,-16
  73. mr r9,r7
  74. mr r8,r6
  75. beq cr1,72f
  76. 21: ld r7,16(r4)
  77. 221: ld r6,24(r4)
  78. addi r4,r4,32
  79. 70: std r9,0(r3)
  80. 270: std r8,8(r3)
  81. 22: ld r9,0(r4)
  82. 222: ld r8,8(r4)
  83. 71: std r7,16(r3)
  84. 271: std r6,24(r3)
  85. addi r3,r3,32
  86. bdnz 21b
  87. 72: std r9,0(r3)
  88. 272: std r8,8(r3)
  89. andi. r5,r5,0xf
  90. beq+ 3f
  91. addi r4,r4,16
  92. .Ldo_tail:
  93. addi r3,r3,16
  94. bf cr7*4+0,246f
  95. 244: ld r9,0(r4)
  96. addi r4,r4,8
  97. 245: std r9,0(r3)
  98. addi r3,r3,8
  99. 246: bf cr7*4+1,1f
  100. 23: lwz r9,0(r4)
  101. addi r4,r4,4
  102. 73: stw r9,0(r3)
  103. addi r3,r3,4
  104. 1: bf cr7*4+2,2f
  105. 44: lhz r9,0(r4)
  106. addi r4,r4,2
  107. 74: sth r9,0(r3)
  108. addi r3,r3,2
  109. 2: bf cr7*4+3,3f
  110. 45: lbz r9,0(r4)
  111. 75: stb r9,0(r3)
  112. 3: li r3,0
  113. blr
  114. .Lsrc_unaligned:
  115. srdi r6,r5,3
  116. addi r5,r5,-16
  117. subf r4,r0,r4
  118. srdi r7,r5,4
  119. sldi r10,r0,3
  120. cmpldi cr6,r6,3
  121. andi. r5,r5,7
  122. mtctr r7
  123. subfic r11,r10,64
  124. add r5,r5,r0
  125. bt cr7*4+0,28f
  126. 24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
  127. 25: ld r0,8(r4)
  128. sLd r6,r9,r10
  129. 26: ldu r9,16(r4)
  130. sHd r7,r0,r11
  131. sLd r8,r0,r10
  132. or r7,r7,r6
  133. blt cr6,79f
  134. 27: ld r0,8(r4)
  135. b 2f
  136. 28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
  137. 29: ldu r9,8(r4)
  138. sLd r8,r0,r10
  139. addi r3,r3,-8
  140. blt cr6,5f
  141. 30: ld r0,8(r4)
  142. sHd r12,r9,r11
  143. sLd r6,r9,r10
  144. 31: ldu r9,16(r4)
  145. or r12,r8,r12
  146. sHd r7,r0,r11
  147. sLd r8,r0,r10
  148. addi r3,r3,16
  149. beq cr6,78f
  150. 1: or r7,r7,r6
  151. 32: ld r0,8(r4)
  152. 76: std r12,8(r3)
  153. 2: sHd r12,r9,r11
  154. sLd r6,r9,r10
  155. 33: ldu r9,16(r4)
  156. or r12,r8,r12
  157. 77: stdu r7,16(r3)
  158. sHd r7,r0,r11
  159. sLd r8,r0,r10
  160. bdnz 1b
  161. 78: std r12,8(r3)
  162. or r7,r7,r6
  163. 79: std r7,16(r3)
  164. 5: sHd r12,r9,r11
  165. or r12,r8,r12
  166. 80: std r12,24(r3)
  167. bne 6f
  168. li r3,0
  169. blr
  170. 6: cmpwi cr1,r5,8
  171. addi r3,r3,32
  172. sLd r9,r9,r10
  173. ble cr1,7f
  174. 34: ld r0,8(r4)
  175. sHd r7,r0,r11
  176. or r9,r7,r9
  177. 7:
  178. bf cr7*4+1,1f
  179. #ifdef __BIG_ENDIAN__
  180. rotldi r9,r9,32
  181. #endif
  182. 94: stw r9,0(r3)
  183. #ifdef __LITTLE_ENDIAN__
  184. rotrdi r9,r9,32
  185. #endif
  186. addi r3,r3,4
  187. 1: bf cr7*4+2,2f
  188. #ifdef __BIG_ENDIAN__
  189. rotldi r9,r9,16
  190. #endif
  191. 95: sth r9,0(r3)
  192. #ifdef __LITTLE_ENDIAN__
  193. rotrdi r9,r9,16
  194. #endif
  195. addi r3,r3,2
  196. 2: bf cr7*4+3,3f
  197. #ifdef __BIG_ENDIAN__
  198. rotldi r9,r9,8
  199. #endif
  200. 96: stb r9,0(r3)
  201. #ifdef __LITTLE_ENDIAN__
  202. rotrdi r9,r9,8
  203. #endif
  204. 3: li r3,0
  205. blr
  206. .Ldst_unaligned:
  207. PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
  208. subf r5,r6,r5
  209. li r7,0
  210. cmpldi cr1,r5,16
  211. bf cr7*4+3,1f
  212. 35: lbz r0,0(r4)
  213. 81: stb r0,0(r3)
  214. addi r7,r7,1
  215. 1: bf cr7*4+2,2f
  216. 36: lhzx r0,r7,r4
  217. 82: sthx r0,r7,r3
  218. addi r7,r7,2
  219. 2: bf cr7*4+1,3f
  220. 37: lwzx r0,r7,r4
  221. 83: stwx r0,r7,r3
  222. 3: PPC_MTOCRF(0x01,r5)
  223. add r4,r6,r4
  224. add r3,r6,r3
  225. b .Ldst_aligned
  226. .Lshort_copy:
  227. bf cr7*4+0,1f
  228. 38: lwz r0,0(r4)
  229. 39: lwz r9,4(r4)
  230. addi r4,r4,8
  231. 84: stw r0,0(r3)
  232. 85: stw r9,4(r3)
  233. addi r3,r3,8
  234. 1: bf cr7*4+1,2f
  235. 40: lwz r0,0(r4)
  236. addi r4,r4,4
  237. 86: stw r0,0(r3)
  238. addi r3,r3,4
  239. 2: bf cr7*4+2,3f
  240. 41: lhz r0,0(r4)
  241. addi r4,r4,2
  242. 87: sth r0,0(r3)
  243. addi r3,r3,2
  244. 3: bf cr7*4+3,4f
  245. 42: lbz r0,0(r4)
  246. 88: stb r0,0(r3)
  247. 4: li r3,0
  248. blr
  249. /*
  250. * exception handlers follow
  251. * we have to return the number of bytes not copied
  252. * for an exception on a load, we set the rest of the destination to 0
  253. */
  254. 136:
  255. 137:
  256. add r3,r3,r7
  257. b 1f
  258. 130:
  259. 131:
  260. addi r3,r3,8
  261. 120:
  262. 320:
  263. 122:
  264. 322:
  265. 124:
  266. 125:
  267. 126:
  268. 127:
  269. 128:
  270. 129:
  271. 133:
  272. addi r3,r3,8
  273. 132:
  274. addi r3,r3,8
  275. 121:
  276. 321:
  277. 344:
  278. 134:
  279. 135:
  280. 138:
  281. 139:
  282. 140:
  283. 141:
  284. 142:
  285. 123:
  286. 144:
  287. 145:
  288. /*
  289. * here we have had a fault on a load and r3 points to the first
  290. * unmodified byte of the destination
  291. */
  292. 1: ld r6,-24(r1)
  293. ld r4,-16(r1)
  294. ld r5,-8(r1)
  295. subf r6,r6,r3
  296. add r4,r4,r6
  297. subf r5,r6,r5 /* #bytes left to go */
  298. /*
  299. * first see if we can copy any more bytes before hitting another exception
  300. */
  301. mtctr r5
  302. 43: lbz r0,0(r4)
  303. addi r4,r4,1
  304. 89: stb r0,0(r3)
  305. addi r3,r3,1
  306. bdnz 43b
  307. li r3,0 /* huh? all copied successfully this time? */
  308. blr
  309. /*
  310. * here we have trapped again, amount remaining is in ctr.
  311. */
  312. 143: mfctr r3
  313. blr
  314. /*
  315. * exception handlers for stores: we just need to work
  316. * out how many bytes weren't copied
  317. */
  318. 182:
  319. 183:
  320. add r3,r3,r7
  321. b 1f
  322. 371:
  323. 180:
  324. addi r3,r3,8
  325. 171:
  326. 177:
  327. 179:
  328. addi r3,r3,8
  329. 370:
  330. 372:
  331. 176:
  332. 178:
  333. addi r3,r3,4
  334. 185:
  335. addi r3,r3,4
  336. 170:
  337. 172:
  338. 345:
  339. 173:
  340. 174:
  341. 175:
  342. 181:
  343. 184:
  344. 186:
  345. 187:
  346. 188:
  347. 189:
  348. 194:
  349. 195:
  350. 196:
  351. 1:
  352. ld r6,-24(r1)
  353. ld r5,-8(r1)
  354. add r6,r6,r5
  355. subf r3,r3,r6 /* #bytes not copied */
  356. blr
  357. EX_TABLE(20b,120b)
  358. EX_TABLE(220b,320b)
  359. EX_TABLE(21b,121b)
  360. EX_TABLE(221b,321b)
  361. EX_TABLE(70b,170b)
  362. EX_TABLE(270b,370b)
  363. EX_TABLE(22b,122b)
  364. EX_TABLE(222b,322b)
  365. EX_TABLE(71b,171b)
  366. EX_TABLE(271b,371b)
  367. EX_TABLE(72b,172b)
  368. EX_TABLE(272b,372b)
  369. EX_TABLE(244b,344b)
  370. EX_TABLE(245b,345b)
  371. EX_TABLE(23b,123b)
  372. EX_TABLE(73b,173b)
  373. EX_TABLE(44b,144b)
  374. EX_TABLE(74b,174b)
  375. EX_TABLE(45b,145b)
  376. EX_TABLE(75b,175b)
  377. EX_TABLE(24b,124b)
  378. EX_TABLE(25b,125b)
  379. EX_TABLE(26b,126b)
  380. EX_TABLE(27b,127b)
  381. EX_TABLE(28b,128b)
  382. EX_TABLE(29b,129b)
  383. EX_TABLE(30b,130b)
  384. EX_TABLE(31b,131b)
  385. EX_TABLE(32b,132b)
  386. EX_TABLE(76b,176b)
  387. EX_TABLE(33b,133b)
  388. EX_TABLE(77b,177b)
  389. EX_TABLE(78b,178b)
  390. EX_TABLE(79b,179b)
  391. EX_TABLE(80b,180b)
  392. EX_TABLE(34b,134b)
  393. EX_TABLE(94b,194b)
  394. EX_TABLE(95b,195b)
  395. EX_TABLE(96b,196b)
  396. EX_TABLE(35b,135b)
  397. EX_TABLE(81b,181b)
  398. EX_TABLE(36b,136b)
  399. EX_TABLE(82b,182b)
  400. EX_TABLE(37b,137b)
  401. EX_TABLE(83b,183b)
  402. EX_TABLE(38b,138b)
  403. EX_TABLE(39b,139b)
  404. EX_TABLE(84b,184b)
  405. EX_TABLE(85b,185b)
  406. EX_TABLE(40b,140b)
  407. EX_TABLE(86b,186b)
  408. EX_TABLE(41b,141b)
  409. EX_TABLE(87b,187b)
  410. EX_TABLE(42b,142b)
  411. EX_TABLE(88b,188b)
  412. EX_TABLE(43b,143b)
  413. EX_TABLE(89b,189b)
  414. /*
  415. * Routine to copy a whole page of data, optimized for POWER4.
  416. * On POWER4 it is more than 50% faster than the simple loop
  417. * above (following the .Ldst_aligned label).
  418. */
  419. .Lcopy_page_4K:
  420. std r31,-32(1)
  421. std r30,-40(1)
  422. std r29,-48(1)
  423. std r28,-56(1)
  424. std r27,-64(1)
  425. std r26,-72(1)
  426. std r25,-80(1)
  427. std r24,-88(1)
  428. std r23,-96(1)
  429. std r22,-104(1)
  430. std r21,-112(1)
  431. std r20,-120(1)
  432. li r5,4096/32 - 1
  433. addi r3,r3,-8
  434. li r0,5
  435. 0: addi r5,r5,-24
  436. mtctr r0
  437. 20: ld r22,640(4)
  438. 21: ld r21,512(4)
  439. 22: ld r20,384(4)
  440. 23: ld r11,256(4)
  441. 24: ld r9,128(4)
  442. 25: ld r7,0(4)
  443. 26: ld r25,648(4)
  444. 27: ld r24,520(4)
  445. 28: ld r23,392(4)
  446. 29: ld r10,264(4)
  447. 30: ld r8,136(4)
  448. 31: ldu r6,8(4)
  449. cmpwi r5,24
  450. 1:
  451. 32: std r22,648(3)
  452. 33: std r21,520(3)
  453. 34: std r20,392(3)
  454. 35: std r11,264(3)
  455. 36: std r9,136(3)
  456. 37: std r7,8(3)
  457. 38: ld r28,648(4)
  458. 39: ld r27,520(4)
  459. 40: ld r26,392(4)
  460. 41: ld r31,264(4)
  461. 42: ld r30,136(4)
  462. 43: ld r29,8(4)
  463. 44: std r25,656(3)
  464. 45: std r24,528(3)
  465. 46: std r23,400(3)
  466. 47: std r10,272(3)
  467. 48: std r8,144(3)
  468. 49: std r6,16(3)
  469. 50: ld r22,656(4)
  470. 51: ld r21,528(4)
  471. 52: ld r20,400(4)
  472. 53: ld r11,272(4)
  473. 54: ld r9,144(4)
  474. 55: ld r7,16(4)
  475. 56: std r28,664(3)
  476. 57: std r27,536(3)
  477. 58: std r26,408(3)
  478. 59: std r31,280(3)
  479. 60: std r30,152(3)
  480. 61: stdu r29,24(3)
  481. 62: ld r25,664(4)
  482. 63: ld r24,536(4)
  483. 64: ld r23,408(4)
  484. 65: ld r10,280(4)
  485. 66: ld r8,152(4)
  486. 67: ldu r6,24(4)
  487. bdnz 1b
  488. 68: std r22,648(3)
  489. 69: std r21,520(3)
  490. 70: std r20,392(3)
  491. 71: std r11,264(3)
  492. 72: std r9,136(3)
  493. 73: std r7,8(3)
  494. 74: addi r4,r4,640
  495. 75: addi r3,r3,648
  496. bge 0b
  497. mtctr r5
  498. 76: ld r7,0(4)
  499. 77: ld r8,8(4)
  500. 78: ldu r9,16(4)
  501. 3:
  502. 79: ld r10,8(4)
  503. 80: std r7,8(3)
  504. 81: ld r7,16(4)
  505. 82: std r8,16(3)
  506. 83: ld r8,24(4)
  507. 84: std r9,24(3)
  508. 85: ldu r9,32(4)
  509. 86: stdu r10,32(3)
  510. bdnz 3b
  511. 4:
  512. 87: ld r10,8(4)
  513. 88: std r7,8(3)
  514. 89: std r8,16(3)
  515. 90: std r9,24(3)
  516. 91: std r10,32(3)
  517. 9: ld r20,-120(1)
  518. ld r21,-112(1)
  519. ld r22,-104(1)
  520. ld r23,-96(1)
  521. ld r24,-88(1)
  522. ld r25,-80(1)
  523. ld r26,-72(1)
  524. ld r27,-64(1)
  525. ld r28,-56(1)
  526. ld r29,-48(1)
  527. ld r30,-40(1)
  528. ld r31,-32(1)
  529. li r3,0
  530. blr
  531. /*
  532. * on an exception, reset to the beginning and jump back into the
  533. * standard __copy_tofrom_user
  534. */
  535. 100: ld r20,-120(1)
  536. ld r21,-112(1)
  537. ld r22,-104(1)
  538. ld r23,-96(1)
  539. ld r24,-88(1)
  540. ld r25,-80(1)
  541. ld r26,-72(1)
  542. ld r27,-64(1)
  543. ld r28,-56(1)
  544. ld r29,-48(1)
  545. ld r30,-40(1)
  546. ld r31,-32(1)
  547. ld r3,-24(r1)
  548. ld r4,-16(r1)
  549. li r5,4096
  550. b .Ldst_aligned
  551. EX_TABLE(20b,100b)
  552. EX_TABLE(21b,100b)
  553. EX_TABLE(22b,100b)
  554. EX_TABLE(23b,100b)
  555. EX_TABLE(24b,100b)
  556. EX_TABLE(25b,100b)
  557. EX_TABLE(26b,100b)
  558. EX_TABLE(27b,100b)
  559. EX_TABLE(28b,100b)
  560. EX_TABLE(29b,100b)
  561. EX_TABLE(30b,100b)
  562. EX_TABLE(31b,100b)
  563. EX_TABLE(32b,100b)
  564. EX_TABLE(33b,100b)
  565. EX_TABLE(34b,100b)
  566. EX_TABLE(35b,100b)
  567. EX_TABLE(36b,100b)
  568. EX_TABLE(37b,100b)
  569. EX_TABLE(38b,100b)
  570. EX_TABLE(39b,100b)
  571. EX_TABLE(40b,100b)
  572. EX_TABLE(41b,100b)
  573. EX_TABLE(42b,100b)
  574. EX_TABLE(43b,100b)
  575. EX_TABLE(44b,100b)
  576. EX_TABLE(45b,100b)
  577. EX_TABLE(46b,100b)
  578. EX_TABLE(47b,100b)
  579. EX_TABLE(48b,100b)
  580. EX_TABLE(49b,100b)
  581. EX_TABLE(50b,100b)
  582. EX_TABLE(51b,100b)
  583. EX_TABLE(52b,100b)
  584. EX_TABLE(53b,100b)
  585. EX_TABLE(54b,100b)
  586. EX_TABLE(55b,100b)
  587. EX_TABLE(56b,100b)
  588. EX_TABLE(57b,100b)
  589. EX_TABLE(58b,100b)
  590. EX_TABLE(59b,100b)
  591. EX_TABLE(60b,100b)
  592. EX_TABLE(61b,100b)
  593. EX_TABLE(62b,100b)
  594. EX_TABLE(63b,100b)
  595. EX_TABLE(64b,100b)
  596. EX_TABLE(65b,100b)
  597. EX_TABLE(66b,100b)
  598. EX_TABLE(67b,100b)
  599. EX_TABLE(68b,100b)
  600. EX_TABLE(69b,100b)
  601. EX_TABLE(70b,100b)
  602. EX_TABLE(71b,100b)
  603. EX_TABLE(72b,100b)
  604. EX_TABLE(73b,100b)
  605. EX_TABLE(74b,100b)
  606. EX_TABLE(75b,100b)
  607. EX_TABLE(76b,100b)
  608. EX_TABLE(77b,100b)
  609. EX_TABLE(78b,100b)
  610. EX_TABLE(79b,100b)
  611. EX_TABLE(80b,100b)
  612. EX_TABLE(81b,100b)
  613. EX_TABLE(82b,100b)
  614. EX_TABLE(83b,100b)
  615. EX_TABLE(84b,100b)
  616. EX_TABLE(85b,100b)
  617. EX_TABLE(86b,100b)
  618. EX_TABLE(87b,100b)
  619. EX_TABLE(88b,100b)
  620. EX_TABLE(89b,100b)
  621. EX_TABLE(90b,100b)
  622. EX_TABLE(91b,100b)
  623. EXPORT_SYMBOL(__copy_tofrom_user)