lib1funcs.S 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. /*
  2. * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
  3. *
  4. * Author: Nicolas Pitre <nico@fluxnic.net>
  5. * - contributed to gcc-3.4 on Sep 30, 2003
  6. * - adapted for the Linux kernel on Oct 2, 2003
  7. */
  8. /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
  9. This file is free software; you can redistribute it and/or modify it
  10. under the terms of the GNU General Public License as published by the
  11. Free Software Foundation; either version 2, or (at your option) any
  12. later version.
  13. In addition to the permissions in the GNU General Public License, the
  14. Free Software Foundation gives you unlimited permission to link the
  15. compiled version of this file into combinations with other programs,
  16. and to distribute those combinations without any restriction coming
  17. from the use of this file. (The General Public License restrictions
  18. do apply in other respects; for example, they cover modification of
  19. the file, and distribution when not linked into a combine
  20. executable.)
  21. This file is distributed in the hope that it will be useful, but
  22. WITHOUT ANY WARRANTY; without even the implied warranty of
  23. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  24. General Public License for more details.
  25. You should have received a copy of the GNU General Public License
  26. along with this program; see the file COPYING. If not, write to
  27. the Free Software Foundation, 59 Temple Place - Suite 330,
  28. Boston, MA 02111-1307, USA. */
  29. #include <linux/linkage.h>
  30. #include <asm/assembler.h>
  31. #include <asm/unwind.h>
  32. #include <asm/export.h>
  33. .macro ARM_DIV_BODY dividend, divisor, result, curbit
  34. #if __LINUX_ARM_ARCH__ >= 5
  35. clz \curbit, \divisor
  36. clz \result, \dividend
  37. sub \result, \curbit, \result
  38. mov \curbit, #1
  39. mov \divisor, \divisor, lsl \result
  40. mov \curbit, \curbit, lsl \result
  41. mov \result, #0
  42. #else
  43. @ Initially shift the divisor left 3 bits if possible,
  44. @ set curbit accordingly. This allows for curbit to be located
  45. @ at the left end of each 4 bit nibbles in the division loop
  46. @ to save one loop in most cases.
  47. tst \divisor, #0xe0000000
  48. moveq \divisor, \divisor, lsl #3
  49. moveq \curbit, #8
  50. movne \curbit, #1
  51. @ Unless the divisor is very big, shift it up in multiples of
  52. @ four bits, since this is the amount of unwinding in the main
  53. @ division loop. Continue shifting until the divisor is
  54. @ larger than the dividend.
  55. 1: cmp \divisor, #0x10000000
  56. cmplo \divisor, \dividend
  57. movlo \divisor, \divisor, lsl #4
  58. movlo \curbit, \curbit, lsl #4
  59. blo 1b
  60. @ For very big divisors, we must shift it a bit at a time, or
  61. @ we will be in danger of overflowing.
  62. 1: cmp \divisor, #0x80000000
  63. cmplo \divisor, \dividend
  64. movlo \divisor, \divisor, lsl #1
  65. movlo \curbit, \curbit, lsl #1
  66. blo 1b
  67. mov \result, #0
  68. #endif
  69. @ Division loop
  70. 1: cmp \dividend, \divisor
  71. subhs \dividend, \dividend, \divisor
  72. orrhs \result, \result, \curbit
  73. cmp \dividend, \divisor, lsr #1
  74. subhs \dividend, \dividend, \divisor, lsr #1
  75. orrhs \result, \result, \curbit, lsr #1
  76. cmp \dividend, \divisor, lsr #2
  77. subhs \dividend, \dividend, \divisor, lsr #2
  78. orrhs \result, \result, \curbit, lsr #2
  79. cmp \dividend, \divisor, lsr #3
  80. subhs \dividend, \dividend, \divisor, lsr #3
  81. orrhs \result, \result, \curbit, lsr #3
  82. cmp \dividend, #0 @ Early termination?
  83. movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
  84. movne \divisor, \divisor, lsr #4
  85. bne 1b
  86. .endm
  87. .macro ARM_DIV2_ORDER divisor, order
  88. #if __LINUX_ARM_ARCH__ >= 5
  89. clz \order, \divisor
  90. rsb \order, \order, #31
  91. #else
  92. cmp \divisor, #(1 << 16)
  93. movhs \divisor, \divisor, lsr #16
  94. movhs \order, #16
  95. movlo \order, #0
  96. cmp \divisor, #(1 << 8)
  97. movhs \divisor, \divisor, lsr #8
  98. addhs \order, \order, #8
  99. cmp \divisor, #(1 << 4)
  100. movhs \divisor, \divisor, lsr #4
  101. addhs \order, \order, #4
  102. cmp \divisor, #(1 << 2)
  103. addhi \order, \order, #3
  104. addls \order, \order, \divisor, lsr #1
  105. #endif
  106. .endm
  107. .macro ARM_MOD_BODY dividend, divisor, order, spare
  108. #if __LINUX_ARM_ARCH__ >= 5
  109. clz \order, \divisor
  110. clz \spare, \dividend
  111. sub \order, \order, \spare
  112. mov \divisor, \divisor, lsl \order
  113. #else
  114. mov \order, #0
  115. @ Unless the divisor is very big, shift it up in multiples of
  116. @ four bits, since this is the amount of unwinding in the main
  117. @ division loop. Continue shifting until the divisor is
  118. @ larger than the dividend.
  119. 1: cmp \divisor, #0x10000000
  120. cmplo \divisor, \dividend
  121. movlo \divisor, \divisor, lsl #4
  122. addlo \order, \order, #4
  123. blo 1b
  124. @ For very big divisors, we must shift it a bit at a time, or
  125. @ we will be in danger of overflowing.
  126. 1: cmp \divisor, #0x80000000
  127. cmplo \divisor, \dividend
  128. movlo \divisor, \divisor, lsl #1
  129. addlo \order, \order, #1
  130. blo 1b
  131. #endif
  132. @ Perform all needed subtractions to keep only the reminder.
  133. @ Do comparisons in batch of 4 first.
  134. subs \order, \order, #3 @ yes, 3 is intended here
  135. blt 2f
  136. 1: cmp \dividend, \divisor
  137. subhs \dividend, \dividend, \divisor
  138. cmp \dividend, \divisor, lsr #1
  139. subhs \dividend, \dividend, \divisor, lsr #1
  140. cmp \dividend, \divisor, lsr #2
  141. subhs \dividend, \dividend, \divisor, lsr #2
  142. cmp \dividend, \divisor, lsr #3
  143. subhs \dividend, \dividend, \divisor, lsr #3
  144. cmp \dividend, #1
  145. mov \divisor, \divisor, lsr #4
  146. subges \order, \order, #4
  147. bge 1b
  148. tst \order, #3
  149. teqne \dividend, #0
  150. beq 5f
  151. @ Either 1, 2 or 3 comparison/subtractions are left.
  152. 2: cmn \order, #2
  153. blt 4f
  154. beq 3f
  155. cmp \dividend, \divisor
  156. subhs \dividend, \dividend, \divisor
  157. mov \divisor, \divisor, lsr #1
  158. 3: cmp \dividend, \divisor
  159. subhs \dividend, \dividend, \divisor
  160. mov \divisor, \divisor, lsr #1
  161. 4: cmp \dividend, \divisor
  162. subhs \dividend, \dividend, \divisor
  163. 5:
  164. .endm
  165. #ifdef CONFIG_ARM_PATCH_IDIV
  166. .align 3
  167. #endif
  168. ENTRY(__udivsi3)
  169. ENTRY(__aeabi_uidiv)
  170. UNWIND(.fnstart)
  171. subs r2, r1, #1
  172. reteq lr
  173. bcc Ldiv0
  174. cmp r0, r1
  175. bls 11f
  176. tst r1, r2
  177. beq 12f
  178. ARM_DIV_BODY r0, r1, r2, r3
  179. mov r0, r2
  180. ret lr
  181. 11: moveq r0, #1
  182. movne r0, #0
  183. ret lr
  184. 12: ARM_DIV2_ORDER r1, r2
  185. mov r0, r0, lsr r2
  186. ret lr
  187. UNWIND(.fnend)
  188. ENDPROC(__udivsi3)
  189. ENDPROC(__aeabi_uidiv)
  190. EXPORT_SYMBOL(__udivsi3)
  191. EXPORT_SYMBOL(__aeabi_uidiv)
  192. ENTRY(__umodsi3)
  193. UNWIND(.fnstart)
  194. subs r2, r1, #1 @ compare divisor with 1
  195. bcc Ldiv0
  196. cmpne r0, r1 @ compare dividend with divisor
  197. moveq r0, #0
  198. tsthi r1, r2 @ see if divisor is power of 2
  199. andeq r0, r0, r2
  200. retls lr
  201. ARM_MOD_BODY r0, r1, r2, r3
  202. ret lr
  203. UNWIND(.fnend)
  204. ENDPROC(__umodsi3)
  205. EXPORT_SYMBOL(__umodsi3)
  206. #ifdef CONFIG_ARM_PATCH_IDIV
  207. .align 3
  208. #endif
  209. ENTRY(__divsi3)
  210. ENTRY(__aeabi_idiv)
  211. UNWIND(.fnstart)
  212. cmp r1, #0
  213. eor ip, r0, r1 @ save the sign of the result.
  214. beq Ldiv0
  215. rsbmi r1, r1, #0 @ loops below use unsigned.
  216. subs r2, r1, #1 @ division by 1 or -1 ?
  217. beq 10f
  218. movs r3, r0
  219. rsbmi r3, r0, #0 @ positive dividend value
  220. cmp r3, r1
  221. bls 11f
  222. tst r1, r2 @ divisor is power of 2 ?
  223. beq 12f
  224. ARM_DIV_BODY r3, r1, r0, r2
  225. cmp ip, #0
  226. rsbmi r0, r0, #0
  227. ret lr
  228. 10: teq ip, r0 @ same sign ?
  229. rsbmi r0, r0, #0
  230. ret lr
  231. 11: movlo r0, #0
  232. moveq r0, ip, asr #31
  233. orreq r0, r0, #1
  234. ret lr
  235. 12: ARM_DIV2_ORDER r1, r2
  236. cmp ip, #0
  237. mov r0, r3, lsr r2
  238. rsbmi r0, r0, #0
  239. ret lr
  240. UNWIND(.fnend)
  241. ENDPROC(__divsi3)
  242. ENDPROC(__aeabi_idiv)
  243. EXPORT_SYMBOL(__divsi3)
  244. EXPORT_SYMBOL(__aeabi_idiv)
  245. ENTRY(__modsi3)
  246. UNWIND(.fnstart)
  247. cmp r1, #0
  248. beq Ldiv0
  249. rsbmi r1, r1, #0 @ loops below use unsigned.
  250. movs ip, r0 @ preserve sign of dividend
  251. rsbmi r0, r0, #0 @ if negative make positive
  252. subs r2, r1, #1 @ compare divisor with 1
  253. cmpne r0, r1 @ compare dividend with divisor
  254. moveq r0, #0
  255. tsthi r1, r2 @ see if divisor is power of 2
  256. andeq r0, r0, r2
  257. bls 10f
  258. ARM_MOD_BODY r0, r1, r2, r3
  259. 10: cmp ip, #0
  260. rsbmi r0, r0, #0
  261. ret lr
  262. UNWIND(.fnend)
  263. ENDPROC(__modsi3)
  264. EXPORT_SYMBOL(__modsi3)
  265. #ifdef CONFIG_AEABI
  266. ENTRY(__aeabi_uidivmod)
  267. UNWIND(.fnstart)
  268. UNWIND(.save {r0, r1, ip, lr} )
  269. stmfd sp!, {r0, r1, ip, lr}
  270. bl __aeabi_uidiv
  271. ldmfd sp!, {r1, r2, ip, lr}
  272. mul r3, r0, r2
  273. sub r1, r1, r3
  274. ret lr
  275. UNWIND(.fnend)
  276. ENDPROC(__aeabi_uidivmod)
  277. EXPORT_SYMBOL(__aeabi_uidivmod)
  278. ENTRY(__aeabi_idivmod)
  279. UNWIND(.fnstart)
  280. UNWIND(.save {r0, r1, ip, lr} )
  281. stmfd sp!, {r0, r1, ip, lr}
  282. bl __aeabi_idiv
  283. ldmfd sp!, {r1, r2, ip, lr}
  284. mul r3, r0, r2
  285. sub r1, r1, r3
  286. ret lr
  287. UNWIND(.fnend)
  288. ENDPROC(__aeabi_idivmod)
  289. EXPORT_SYMBOL(__aeabi_idivmod)
  290. #endif
  291. Ldiv0:
  292. UNWIND(.fnstart)
  293. UNWIND(.pad #4)
  294. UNWIND(.save {lr})
  295. str lr, [sp, #-8]!
  296. bl __div0
  297. mov r0, #0 @ About as wrong as it could be.
  298. ldr pc, [sp], #8
  299. UNWIND(.fnend)
  300. ENDPROC(Ldiv0)