strncpy_user.S 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
  1. /*
  2. * arch/xtensa/lib/strncpy_user.S
  3. *
  4. * This file is subject to the terms and conditions of the GNU General
  5. * Public License. See the file "COPYING" in the main directory of
  6. * this archive for more details.
  7. *
  8. * Returns: -EFAULT if exception before terminator, N if the entire
  9. * buffer filled, else strlen.
  10. *
  11. * Copyright (C) 2002 Tensilica Inc.
  12. */
  13. #include <linux/errno.h>
  14. #include <variant/core.h>
  15. #include <asm/asmmacro.h>
  16. /*
  17. * char *__strncpy_user(char *dst, const char *src, size_t len)
  18. */
  19. #ifdef __XTENSA_EB__
  20. # define MASK0 0xff000000
  21. # define MASK1 0x00ff0000
  22. # define MASK2 0x0000ff00
  23. # define MASK3 0x000000ff
  24. #else
  25. # define MASK0 0x000000ff
  26. # define MASK1 0x0000ff00
  27. # define MASK2 0x00ff0000
  28. # define MASK3 0xff000000
  29. #endif
  30. # Register use
  31. # a0/ return address
  32. # a1/ stack pointer
  33. # a2/ return value
  34. # a3/ src
  35. # a4/ len
  36. # a5/ mask0
  37. # a6/ mask1
  38. # a7/ mask2
  39. # a8/ mask3
  40. # a9/ tmp
  41. # a10/ tmp
  42. # a11/ dst
  43. # a12/ tmp
  44. .text
  45. .align 4
  46. .global __strncpy_user
  47. .type __strncpy_user,@function
  48. __strncpy_user:
  49. entry sp, 16 # minimal stack frame
  50. # a2/ dst, a3/ src, a4/ len
  51. mov a11, a2 # leave dst in return value register
  52. beqz a4, .Lret # if len is zero
  53. movi a5, MASK0 # mask for byte 0
  54. movi a6, MASK1 # mask for byte 1
  55. movi a7, MASK2 # mask for byte 2
  56. movi a8, MASK3 # mask for byte 3
  57. bbsi.l a3, 0, .Lsrc1mod2 # if only 8-bit aligned
  58. bbsi.l a3, 1, .Lsrc2mod4 # if only 16-bit aligned
  59. .Lsrcaligned: # return here when src is word-aligned
  60. srli a12, a4, 2 # number of loop iterations with 4B per loop
  61. movi a9, 3
  62. bnone a11, a9, .Laligned
  63. j .Ldstunaligned
  64. .Lsrc1mod2: # src address is odd
  65. EX(11f) l8ui a9, a3, 0 # get byte 0
  66. addi a3, a3, 1 # advance src pointer
  67. EX(10f) s8i a9, a11, 0 # store byte 0
  68. beqz a9, .Lret # if byte 0 is zero
  69. addi a11, a11, 1 # advance dst pointer
  70. addi a4, a4, -1 # decrement len
  71. beqz a4, .Lret # if len is zero
  72. bbci.l a3, 1, .Lsrcaligned # if src is now word-aligned
  73. .Lsrc2mod4: # src address is 2 mod 4
  74. EX(11f) l8ui a9, a3, 0 # get byte 0
  75. /* 1-cycle interlock */
  76. EX(10f) s8i a9, a11, 0 # store byte 0
  77. beqz a9, .Lret # if byte 0 is zero
  78. addi a11, a11, 1 # advance dst pointer
  79. addi a4, a4, -1 # decrement len
  80. beqz a4, .Lret # if len is zero
  81. EX(11f) l8ui a9, a3, 1 # get byte 0
  82. addi a3, a3, 2 # advance src pointer
  83. EX(10f) s8i a9, a11, 0 # store byte 0
  84. beqz a9, .Lret # if byte 0 is zero
  85. addi a11, a11, 1 # advance dst pointer
  86. addi a4, a4, -1 # decrement len
  87. bnez a4, .Lsrcaligned # if len is nonzero
  88. .Lret:
  89. sub a2, a11, a2 # compute strlen
  90. retw
  91. /*
  92. * dst is word-aligned, src is word-aligned
  93. */
  94. .align 4 # 1 mod 4 alignment for LOOPNEZ
  95. .byte 0 # (0 mod 4 alignment for LBEG)
  96. .Laligned:
  97. #if XCHAL_HAVE_LOOPS
  98. loopnez a12, .Loop1done
  99. #else
  100. beqz a12, .Loop1done
  101. slli a12, a12, 2
  102. add a12, a12, a11 # a12 = end of last 4B chunck
  103. #endif
  104. .Loop1:
  105. EX(11f) l32i a9, a3, 0 # get word from src
  106. addi a3, a3, 4 # advance src pointer
  107. bnone a9, a5, .Lz0 # if byte 0 is zero
  108. bnone a9, a6, .Lz1 # if byte 1 is zero
  109. bnone a9, a7, .Lz2 # if byte 2 is zero
  110. EX(10f) s32i a9, a11, 0 # store word to dst
  111. bnone a9, a8, .Lz3 # if byte 3 is zero
  112. addi a11, a11, 4 # advance dst pointer
  113. #if !XCHAL_HAVE_LOOPS
  114. blt a11, a12, .Loop1
  115. #endif
  116. .Loop1done:
  117. bbci.l a4, 1, .L100
  118. # copy 2 bytes
  119. EX(11f) l16ui a9, a3, 0
  120. addi a3, a3, 2 # advance src pointer
  121. #ifdef __XTENSA_EB__
  122. bnone a9, a7, .Lz0 # if byte 2 is zero
  123. bnone a9, a8, .Lz1 # if byte 3 is zero
  124. #else
  125. bnone a9, a5, .Lz0 # if byte 0 is zero
  126. bnone a9, a6, .Lz1 # if byte 1 is zero
  127. #endif
  128. EX(10f) s16i a9, a11, 0
  129. addi a11, a11, 2 # advance dst pointer
  130. .L100:
  131. bbci.l a4, 0, .Lret
  132. EX(11f) l8ui a9, a3, 0
  133. /* slot */
  134. EX(10f) s8i a9, a11, 0
  135. beqz a9, .Lret # if byte is zero
  136. addi a11, a11, 1-3 # advance dst ptr 1, but also cancel
  137. # the effect of adding 3 in .Lz3 code
  138. /* fall thru to .Lz3 and "retw" */
  139. .Lz3: # byte 3 is zero
  140. addi a11, a11, 3 # advance dst pointer
  141. sub a2, a11, a2 # compute strlen
  142. retw
  143. .Lz0: # byte 0 is zero
  144. #ifdef __XTENSA_EB__
  145. movi a9, 0
  146. #endif /* __XTENSA_EB__ */
  147. EX(10f) s8i a9, a11, 0
  148. sub a2, a11, a2 # compute strlen
  149. retw
  150. .Lz1: # byte 1 is zero
  151. #ifdef __XTENSA_EB__
  152. extui a9, a9, 16, 16
  153. #endif /* __XTENSA_EB__ */
  154. EX(10f) s16i a9, a11, 0
  155. addi a11, a11, 1 # advance dst pointer
  156. sub a2, a11, a2 # compute strlen
  157. retw
  158. .Lz2: # byte 2 is zero
  159. #ifdef __XTENSA_EB__
  160. extui a9, a9, 16, 16
  161. #endif /* __XTENSA_EB__ */
  162. EX(10f) s16i a9, a11, 0
  163. movi a9, 0
  164. EX(10f) s8i a9, a11, 2
  165. addi a11, a11, 2 # advance dst pointer
  166. sub a2, a11, a2 # compute strlen
  167. retw
  168. .align 4 # 1 mod 4 alignment for LOOPNEZ
  169. .byte 0 # (0 mod 4 alignment for LBEG)
  170. .Ldstunaligned:
  171. /*
  172. * for now just use byte copy loop
  173. */
  174. #if XCHAL_HAVE_LOOPS
  175. loopnez a4, .Lunalignedend
  176. #else
  177. beqz a4, .Lunalignedend
  178. add a12, a11, a4 # a12 = ending address
  179. #endif /* XCHAL_HAVE_LOOPS */
  180. .Lnextbyte:
  181. EX(11f) l8ui a9, a3, 0
  182. addi a3, a3, 1
  183. EX(10f) s8i a9, a11, 0
  184. beqz a9, .Lunalignedend
  185. addi a11, a11, 1
  186. #if !XCHAL_HAVE_LOOPS
  187. blt a11, a12, .Lnextbyte
  188. #endif
  189. .Lunalignedend:
  190. sub a2, a11, a2 # compute strlen
  191. retw
  192. .section .fixup, "ax"
  193. .align 4
  194. /* For now, just return -EFAULT. Future implementations might
  195. * like to clear remaining kernel space, like the fixup
  196. * implementation in memset(). Thus, we differentiate between
  197. * load/store fixups. */
  198. 10:
  199. 11:
  200. movi a2, -EFAULT
  201. retw