123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644 |
- /*
- * Copyright (C) 2002 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
- #include <asm/processor.h>
- #include <asm/ppc_asm.h>
- #include <asm/export.h>
- #include <asm/asm-compat.h>
- #ifdef __BIG_ENDIAN__
- #define sLd sld /* Shift towards low-numbered address. */
- #define sHd srd /* Shift towards high-numbered address. */
- #else
- #define sLd srd /* Shift towards low-numbered address. */
- #define sHd sld /* Shift towards high-numbered address. */
- #endif
- .align 7
- _GLOBAL_TOC(__copy_tofrom_user)
- #ifdef CONFIG_PPC_BOOK3S_64
- BEGIN_FTR_SECTION
- nop
- FTR_SECTION_ELSE
- b __copy_tofrom_user_power7
- ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
- #endif
- _GLOBAL(__copy_tofrom_user_base)
- /* first check for a whole page copy on a page boundary */
- cmpldi cr1,r5,16
- cmpdi cr6,r5,4096
- or r0,r3,r4
- neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
- andi. r0,r0,4095
- std r3,-24(r1)
- crand cr0*4+2,cr0*4+2,cr6*4+2
- std r4,-16(r1)
- std r5,-8(r1)
- dcbt 0,r4
- beq .Lcopy_page_4K
- andi. r6,r6,7
- PPC_MTOCRF(0x01,r5)
- blt cr1,.Lshort_copy
- /* Below we want to nop out the bne if we're on a CPU that has the
- * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
- * cleared.
- * At the time of writing the only CPU that has this combination of bits
- * set is Power6.
- */
- BEGIN_FTR_SECTION
- nop
- FTR_SECTION_ELSE
- bne .Ldst_unaligned
- ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
- CPU_FTR_UNALIGNED_LD_STD)
- .Ldst_aligned:
- addi r3,r3,-16
- BEGIN_FTR_SECTION
- andi. r0,r4,7
- bne .Lsrc_unaligned
- END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
- blt cr1,.Ldo_tail /* if < 16 bytes to copy */
- srdi r0,r5,5
- cmpdi cr1,r0,0
- 20: ld r7,0(r4)
- 220: ld r6,8(r4)
- addi r4,r4,16
- mtctr r0
- andi. r0,r5,0x10
- beq 22f
- addi r3,r3,16
- addi r4,r4,-16
- mr r9,r7
- mr r8,r6
- beq cr1,72f
- 21: ld r7,16(r4)
- 221: ld r6,24(r4)
- addi r4,r4,32
- 70: std r9,0(r3)
- 270: std r8,8(r3)
- 22: ld r9,0(r4)
- 222: ld r8,8(r4)
- 71: std r7,16(r3)
- 271: std r6,24(r3)
- addi r3,r3,32
- bdnz 21b
- 72: std r9,0(r3)
- 272: std r8,8(r3)
- andi. r5,r5,0xf
- beq+ 3f
- addi r4,r4,16
- .Ldo_tail:
- addi r3,r3,16
- bf cr7*4+0,246f
- 244: ld r9,0(r4)
- addi r4,r4,8
- 245: std r9,0(r3)
- addi r3,r3,8
- 246: bf cr7*4+1,1f
- 23: lwz r9,0(r4)
- addi r4,r4,4
- 73: stw r9,0(r3)
- addi r3,r3,4
- 1: bf cr7*4+2,2f
- 44: lhz r9,0(r4)
- addi r4,r4,2
- 74: sth r9,0(r3)
- addi r3,r3,2
- 2: bf cr7*4+3,3f
- 45: lbz r9,0(r4)
- 75: stb r9,0(r3)
- 3: li r3,0
- blr
- .Lsrc_unaligned:
- srdi r6,r5,3
- addi r5,r5,-16
- subf r4,r0,r4
- srdi r7,r5,4
- sldi r10,r0,3
- cmpldi cr6,r6,3
- andi. r5,r5,7
- mtctr r7
- subfic r11,r10,64
- add r5,r5,r0
- bt cr7*4+0,28f
- 24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
- 25: ld r0,8(r4)
- sLd r6,r9,r10
- 26: ldu r9,16(r4)
- sHd r7,r0,r11
- sLd r8,r0,r10
- or r7,r7,r6
- blt cr6,79f
- 27: ld r0,8(r4)
- b 2f
- 28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
- 29: ldu r9,8(r4)
- sLd r8,r0,r10
- addi r3,r3,-8
- blt cr6,5f
- 30: ld r0,8(r4)
- sHd r12,r9,r11
- sLd r6,r9,r10
- 31: ldu r9,16(r4)
- or r12,r8,r12
- sHd r7,r0,r11
- sLd r8,r0,r10
- addi r3,r3,16
- beq cr6,78f
- 1: or r7,r7,r6
- 32: ld r0,8(r4)
- 76: std r12,8(r3)
- 2: sHd r12,r9,r11
- sLd r6,r9,r10
- 33: ldu r9,16(r4)
- or r12,r8,r12
- 77: stdu r7,16(r3)
- sHd r7,r0,r11
- sLd r8,r0,r10
- bdnz 1b
- 78: std r12,8(r3)
- or r7,r7,r6
- 79: std r7,16(r3)
- 5: sHd r12,r9,r11
- or r12,r8,r12
- 80: std r12,24(r3)
- bne 6f
- li r3,0
- blr
- 6: cmpwi cr1,r5,8
- addi r3,r3,32
- sLd r9,r9,r10
- ble cr1,7f
- 34: ld r0,8(r4)
- sHd r7,r0,r11
- or r9,r7,r9
- 7:
- bf cr7*4+1,1f
- #ifdef __BIG_ENDIAN__
- rotldi r9,r9,32
- #endif
- 94: stw r9,0(r3)
- #ifdef __LITTLE_ENDIAN__
- rotrdi r9,r9,32
- #endif
- addi r3,r3,4
- 1: bf cr7*4+2,2f
- #ifdef __BIG_ENDIAN__
- rotldi r9,r9,16
- #endif
- 95: sth r9,0(r3)
- #ifdef __LITTLE_ENDIAN__
- rotrdi r9,r9,16
- #endif
- addi r3,r3,2
- 2: bf cr7*4+3,3f
- #ifdef __BIG_ENDIAN__
- rotldi r9,r9,8
- #endif
- 96: stb r9,0(r3)
- #ifdef __LITTLE_ENDIAN__
- rotrdi r9,r9,8
- #endif
- 3: li r3,0
- blr
- .Ldst_unaligned:
- PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
- subf r5,r6,r5
- li r7,0
- cmpldi cr1,r5,16
- bf cr7*4+3,1f
- 35: lbz r0,0(r4)
- 81: stb r0,0(r3)
- addi r7,r7,1
- 1: bf cr7*4+2,2f
- 36: lhzx r0,r7,r4
- 82: sthx r0,r7,r3
- addi r7,r7,2
- 2: bf cr7*4+1,3f
- 37: lwzx r0,r7,r4
- 83: stwx r0,r7,r3
- 3: PPC_MTOCRF(0x01,r5)
- add r4,r6,r4
- add r3,r6,r3
- b .Ldst_aligned
- .Lshort_copy:
- bf cr7*4+0,1f
- 38: lwz r0,0(r4)
- 39: lwz r9,4(r4)
- addi r4,r4,8
- 84: stw r0,0(r3)
- 85: stw r9,4(r3)
- addi r3,r3,8
- 1: bf cr7*4+1,2f
- 40: lwz r0,0(r4)
- addi r4,r4,4
- 86: stw r0,0(r3)
- addi r3,r3,4
- 2: bf cr7*4+2,3f
- 41: lhz r0,0(r4)
- addi r4,r4,2
- 87: sth r0,0(r3)
- addi r3,r3,2
- 3: bf cr7*4+3,4f
- 42: lbz r0,0(r4)
- 88: stb r0,0(r3)
- 4: li r3,0
- blr
- /*
- * exception handlers follow
- * we have to return the number of bytes not copied
- * for an exception on a load, we set the rest of the destination to 0
- */
- 136:
- 137:
- add r3,r3,r7
- b 1f
- 130:
- 131:
- addi r3,r3,8
- 120:
- 320:
- 122:
- 322:
- 124:
- 125:
- 126:
- 127:
- 128:
- 129:
- 133:
- addi r3,r3,8
- 132:
- addi r3,r3,8
- 121:
- 321:
- 344:
- 134:
- 135:
- 138:
- 139:
- 140:
- 141:
- 142:
- 123:
- 144:
- 145:
- /*
- * here we have had a fault on a load and r3 points to the first
- * unmodified byte of the destination
- */
- 1: ld r6,-24(r1)
- ld r4,-16(r1)
- ld r5,-8(r1)
- subf r6,r6,r3
- add r4,r4,r6
- subf r5,r6,r5 /* #bytes left to go */
- /*
- * first see if we can copy any more bytes before hitting another exception
- */
- mtctr r5
- 43: lbz r0,0(r4)
- addi r4,r4,1
- 89: stb r0,0(r3)
- addi r3,r3,1
- bdnz 43b
- li r3,0 /* huh? all copied successfully this time? */
- blr
- /*
- * here we have trapped again, amount remaining is in ctr.
- */
- 143: mfctr r3
- blr
- /*
- * exception handlers for stores: we just need to work
- * out how many bytes weren't copied
- */
- 182:
- 183:
- add r3,r3,r7
- b 1f
- 371:
- 180:
- addi r3,r3,8
- 171:
- 177:
- 179:
- addi r3,r3,8
- 370:
- 372:
- 176:
- 178:
- addi r3,r3,4
- 185:
- addi r3,r3,4
- 170:
- 172:
- 345:
- 173:
- 174:
- 175:
- 181:
- 184:
- 186:
- 187:
- 188:
- 189:
- 194:
- 195:
- 196:
- 1:
- ld r6,-24(r1)
- ld r5,-8(r1)
- add r6,r6,r5
- subf r3,r3,r6 /* #bytes not copied */
- blr
- EX_TABLE(20b,120b)
- EX_TABLE(220b,320b)
- EX_TABLE(21b,121b)
- EX_TABLE(221b,321b)
- EX_TABLE(70b,170b)
- EX_TABLE(270b,370b)
- EX_TABLE(22b,122b)
- EX_TABLE(222b,322b)
- EX_TABLE(71b,171b)
- EX_TABLE(271b,371b)
- EX_TABLE(72b,172b)
- EX_TABLE(272b,372b)
- EX_TABLE(244b,344b)
- EX_TABLE(245b,345b)
- EX_TABLE(23b,123b)
- EX_TABLE(73b,173b)
- EX_TABLE(44b,144b)
- EX_TABLE(74b,174b)
- EX_TABLE(45b,145b)
- EX_TABLE(75b,175b)
- EX_TABLE(24b,124b)
- EX_TABLE(25b,125b)
- EX_TABLE(26b,126b)
- EX_TABLE(27b,127b)
- EX_TABLE(28b,128b)
- EX_TABLE(29b,129b)
- EX_TABLE(30b,130b)
- EX_TABLE(31b,131b)
- EX_TABLE(32b,132b)
- EX_TABLE(76b,176b)
- EX_TABLE(33b,133b)
- EX_TABLE(77b,177b)
- EX_TABLE(78b,178b)
- EX_TABLE(79b,179b)
- EX_TABLE(80b,180b)
- EX_TABLE(34b,134b)
- EX_TABLE(94b,194b)
- EX_TABLE(95b,195b)
- EX_TABLE(96b,196b)
- EX_TABLE(35b,135b)
- EX_TABLE(81b,181b)
- EX_TABLE(36b,136b)
- EX_TABLE(82b,182b)
- EX_TABLE(37b,137b)
- EX_TABLE(83b,183b)
- EX_TABLE(38b,138b)
- EX_TABLE(39b,139b)
- EX_TABLE(84b,184b)
- EX_TABLE(85b,185b)
- EX_TABLE(40b,140b)
- EX_TABLE(86b,186b)
- EX_TABLE(41b,141b)
- EX_TABLE(87b,187b)
- EX_TABLE(42b,142b)
- EX_TABLE(88b,188b)
- EX_TABLE(43b,143b)
- EX_TABLE(89b,189b)
- /*
- * Routine to copy a whole page of data, optimized for POWER4.
- * On POWER4 it is more than 50% faster than the simple loop
- * above (following the .Ldst_aligned label).
- */
- .Lcopy_page_4K:
- std r31,-32(1)
- std r30,-40(1)
- std r29,-48(1)
- std r28,-56(1)
- std r27,-64(1)
- std r26,-72(1)
- std r25,-80(1)
- std r24,-88(1)
- std r23,-96(1)
- std r22,-104(1)
- std r21,-112(1)
- std r20,-120(1)
- li r5,4096/32 - 1
- addi r3,r3,-8
- li r0,5
- 0: addi r5,r5,-24
- mtctr r0
- 20: ld r22,640(4)
- 21: ld r21,512(4)
- 22: ld r20,384(4)
- 23: ld r11,256(4)
- 24: ld r9,128(4)
- 25: ld r7,0(4)
- 26: ld r25,648(4)
- 27: ld r24,520(4)
- 28: ld r23,392(4)
- 29: ld r10,264(4)
- 30: ld r8,136(4)
- 31: ldu r6,8(4)
- cmpwi r5,24
- 1:
- 32: std r22,648(3)
- 33: std r21,520(3)
- 34: std r20,392(3)
- 35: std r11,264(3)
- 36: std r9,136(3)
- 37: std r7,8(3)
- 38: ld r28,648(4)
- 39: ld r27,520(4)
- 40: ld r26,392(4)
- 41: ld r31,264(4)
- 42: ld r30,136(4)
- 43: ld r29,8(4)
- 44: std r25,656(3)
- 45: std r24,528(3)
- 46: std r23,400(3)
- 47: std r10,272(3)
- 48: std r8,144(3)
- 49: std r6,16(3)
- 50: ld r22,656(4)
- 51: ld r21,528(4)
- 52: ld r20,400(4)
- 53: ld r11,272(4)
- 54: ld r9,144(4)
- 55: ld r7,16(4)
- 56: std r28,664(3)
- 57: std r27,536(3)
- 58: std r26,408(3)
- 59: std r31,280(3)
- 60: std r30,152(3)
- 61: stdu r29,24(3)
- 62: ld r25,664(4)
- 63: ld r24,536(4)
- 64: ld r23,408(4)
- 65: ld r10,280(4)
- 66: ld r8,152(4)
- 67: ldu r6,24(4)
- bdnz 1b
- 68: std r22,648(3)
- 69: std r21,520(3)
- 70: std r20,392(3)
- 71: std r11,264(3)
- 72: std r9,136(3)
- 73: std r7,8(3)
- 74: addi r4,r4,640
- 75: addi r3,r3,648
- bge 0b
- mtctr r5
- 76: ld r7,0(4)
- 77: ld r8,8(4)
- 78: ldu r9,16(4)
- 3:
- 79: ld r10,8(4)
- 80: std r7,8(3)
- 81: ld r7,16(4)
- 82: std r8,16(3)
- 83: ld r8,24(4)
- 84: std r9,24(3)
- 85: ldu r9,32(4)
- 86: stdu r10,32(3)
- bdnz 3b
- 4:
- 87: ld r10,8(4)
- 88: std r7,8(3)
- 89: std r8,16(3)
- 90: std r9,24(3)
- 91: std r10,32(3)
- 9: ld r20,-120(1)
- ld r21,-112(1)
- ld r22,-104(1)
- ld r23,-96(1)
- ld r24,-88(1)
- ld r25,-80(1)
- ld r26,-72(1)
- ld r27,-64(1)
- ld r28,-56(1)
- ld r29,-48(1)
- ld r30,-40(1)
- ld r31,-32(1)
- li r3,0
- blr
- /*
- * on an exception, reset to the beginning and jump back into the
- * standard __copy_tofrom_user
- */
- 100: ld r20,-120(1)
- ld r21,-112(1)
- ld r22,-104(1)
- ld r23,-96(1)
- ld r24,-88(1)
- ld r25,-80(1)
- ld r26,-72(1)
- ld r27,-64(1)
- ld r28,-56(1)
- ld r29,-48(1)
- ld r30,-40(1)
- ld r31,-32(1)
- ld r3,-24(r1)
- ld r4,-16(r1)
- li r5,4096
- b .Ldst_aligned
- EX_TABLE(20b,100b)
- EX_TABLE(21b,100b)
- EX_TABLE(22b,100b)
- EX_TABLE(23b,100b)
- EX_TABLE(24b,100b)
- EX_TABLE(25b,100b)
- EX_TABLE(26b,100b)
- EX_TABLE(27b,100b)
- EX_TABLE(28b,100b)
- EX_TABLE(29b,100b)
- EX_TABLE(30b,100b)
- EX_TABLE(31b,100b)
- EX_TABLE(32b,100b)
- EX_TABLE(33b,100b)
- EX_TABLE(34b,100b)
- EX_TABLE(35b,100b)
- EX_TABLE(36b,100b)
- EX_TABLE(37b,100b)
- EX_TABLE(38b,100b)
- EX_TABLE(39b,100b)
- EX_TABLE(40b,100b)
- EX_TABLE(41b,100b)
- EX_TABLE(42b,100b)
- EX_TABLE(43b,100b)
- EX_TABLE(44b,100b)
- EX_TABLE(45b,100b)
- EX_TABLE(46b,100b)
- EX_TABLE(47b,100b)
- EX_TABLE(48b,100b)
- EX_TABLE(49b,100b)
- EX_TABLE(50b,100b)
- EX_TABLE(51b,100b)
- EX_TABLE(52b,100b)
- EX_TABLE(53b,100b)
- EX_TABLE(54b,100b)
- EX_TABLE(55b,100b)
- EX_TABLE(56b,100b)
- EX_TABLE(57b,100b)
- EX_TABLE(58b,100b)
- EX_TABLE(59b,100b)
- EX_TABLE(60b,100b)
- EX_TABLE(61b,100b)
- EX_TABLE(62b,100b)
- EX_TABLE(63b,100b)
- EX_TABLE(64b,100b)
- EX_TABLE(65b,100b)
- EX_TABLE(66b,100b)
- EX_TABLE(67b,100b)
- EX_TABLE(68b,100b)
- EX_TABLE(69b,100b)
- EX_TABLE(70b,100b)
- EX_TABLE(71b,100b)
- EX_TABLE(72b,100b)
- EX_TABLE(73b,100b)
- EX_TABLE(74b,100b)
- EX_TABLE(75b,100b)
- EX_TABLE(76b,100b)
- EX_TABLE(77b,100b)
- EX_TABLE(78b,100b)
- EX_TABLE(79b,100b)
- EX_TABLE(80b,100b)
- EX_TABLE(81b,100b)
- EX_TABLE(82b,100b)
- EX_TABLE(83b,100b)
- EX_TABLE(84b,100b)
- EX_TABLE(85b,100b)
- EX_TABLE(86b,100b)
- EX_TABLE(87b,100b)
- EX_TABLE(88b,100b)
- EX_TABLE(89b,100b)
- EX_TABLE(90b,100b)
- EX_TABLE(91b,100b)
- EXPORT_SYMBOL(__copy_tofrom_user)
|