bitops.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef _ASM_X86_BITOPS_H
  3. #define _ASM_X86_BITOPS_H
  4. /*
  5. * Copyright 1992, Linus Torvalds.
  6. *
  7. * Note: inlines with more than a single statement should be marked
  8. * __always_inline to avoid problems with older gcc's inlining heuristics.
  9. */
  10. #ifndef _LINUX_BITOPS_H
  11. #error only <linux/bitops.h> can be included directly
  12. #endif
  13. #include <linux/compiler.h>
  14. #include <asm/alternative.h>
  15. #include <asm/rmwcc.h>
  16. #include <asm/barrier.h>
  17. #if BITS_PER_LONG == 32
  18. # define _BITOPS_LONG_SHIFT 5
  19. #elif BITS_PER_LONG == 64
  20. # define _BITOPS_LONG_SHIFT 6
  21. #else
  22. # error "Unexpected BITS_PER_LONG"
  23. #endif
  24. #define BIT_64(n) (U64_C(1) << (n))
  25. /*
  26. * These have to be done with inline assembly: that way the bit-setting
  27. * is guaranteed to be atomic. All bit operations return 0 if the bit
  28. * was cleared before the operation and != 0 if it was not.
  29. *
  30. * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
  31. */
  32. #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
  33. /* Technically wrong, but this avoids compilation errors on some gcc
  34. versions. */
  35. #define BITOP_ADDR(x) "=m" (*(volatile long *) (x))
  36. #else
  37. #define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
  38. #endif
  39. #define ADDR BITOP_ADDR(addr)
  40. /*
  41. * We do the locked ops that don't return the old value as
  42. * a mask operation on a byte.
  43. */
  44. #define IS_IMMEDIATE(nr) (__builtin_constant_p(nr))
  45. #define CONST_MASK_ADDR(nr, addr) BITOP_ADDR((void *)(addr) + ((nr)>>3))
  46. #define CONST_MASK(nr) (1 << ((nr) & 7))
  47. /**
  48. * set_bit - Atomically set a bit in memory
  49. * @nr: the bit to set
  50. * @addr: the address to start counting from
  51. *
  52. * This function is atomic and may not be reordered. See __set_bit()
  53. * if you do not require the atomic guarantees.
  54. *
  55. * Note: there are no guarantees that this function will not be reordered
  56. * on non x86 architectures, so if you are writing portable code,
  57. * make sure not to rely on its reordering guarantees.
  58. *
  59. * Note that @nr may be almost arbitrarily large; this function is not
  60. * restricted to acting on a single-word quantity.
  61. */
  62. static __always_inline void
  63. set_bit(long nr, volatile unsigned long *addr)
  64. {
  65. if (IS_IMMEDIATE(nr)) {
  66. asm volatile(LOCK_PREFIX "orb %1,%0"
  67. : CONST_MASK_ADDR(nr, addr)
  68. : "iq" ((u8)CONST_MASK(nr))
  69. : "memory");
  70. } else {
  71. asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
  72. : BITOP_ADDR(addr) : "Ir" (nr) : "memory");
  73. }
  74. }
  75. /**
  76. * __set_bit - Set a bit in memory
  77. * @nr: the bit to set
  78. * @addr: the address to start counting from
  79. *
  80. * Unlike set_bit(), this function is non-atomic and may be reordered.
  81. * If it's called on the same region of memory simultaneously, the effect
  82. * may be that only one operation succeeds.
  83. */
  84. static __always_inline void __set_bit(long nr, volatile unsigned long *addr)
  85. {
  86. asm volatile(__ASM_SIZE(bts) " %1,%0" : ADDR : "Ir" (nr) : "memory");
  87. }
  88. /**
  89. * clear_bit - Clears a bit in memory
  90. * @nr: Bit to clear
  91. * @addr: Address to start counting from
  92. *
  93. * clear_bit() is atomic and may not be reordered. However, it does
  94. * not contain a memory barrier, so if it is used for locking purposes,
  95. * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
  96. * in order to ensure changes are visible on other processors.
  97. */
  98. static __always_inline void
  99. clear_bit(long nr, volatile unsigned long *addr)
  100. {
  101. if (IS_IMMEDIATE(nr)) {
  102. asm volatile(LOCK_PREFIX "andb %1,%0"
  103. : CONST_MASK_ADDR(nr, addr)
  104. : "iq" ((u8)~CONST_MASK(nr)));
  105. } else {
  106. asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
  107. : BITOP_ADDR(addr)
  108. : "Ir" (nr));
  109. }
  110. }
  111. /*
  112. * clear_bit_unlock - Clears a bit in memory
  113. * @nr: Bit to clear
  114. * @addr: Address to start counting from
  115. *
  116. * clear_bit() is atomic and implies release semantics before the memory
  117. * operation. It can be used for an unlock.
  118. */
  119. static __always_inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
  120. {
  121. barrier();
  122. clear_bit(nr, addr);
  123. }
  124. static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
  125. {
  126. asm volatile(__ASM_SIZE(btr) " %1,%0" : ADDR : "Ir" (nr));
  127. }
  128. static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
  129. {
  130. bool negative;
  131. asm volatile(LOCK_PREFIX "andb %2,%1"
  132. CC_SET(s)
  133. : CC_OUT(s) (negative), ADDR
  134. : "ir" ((char) ~(1 << nr)) : "memory");
  135. return negative;
  136. }
  137. // Let everybody know we have it
  138. #define clear_bit_unlock_is_negative_byte clear_bit_unlock_is_negative_byte
  139. /*
  140. * __clear_bit_unlock - Clears a bit in memory
  141. * @nr: Bit to clear
  142. * @addr: Address to start counting from
  143. *
  144. * __clear_bit() is non-atomic and implies release semantics before the memory
  145. * operation. It can be used for an unlock if no other CPUs can concurrently
  146. * modify other bits in the word.
  147. *
  148. * No memory barrier is required here, because x86 cannot reorder stores past
  149. * older loads. Same principle as spin_unlock.
  150. */
  151. static __always_inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
  152. {
  153. barrier();
  154. __clear_bit(nr, addr);
  155. }
  156. /**
  157. * __change_bit - Toggle a bit in memory
  158. * @nr: the bit to change
  159. * @addr: the address to start counting from
  160. *
  161. * Unlike change_bit(), this function is non-atomic and may be reordered.
  162. * If it's called on the same region of memory simultaneously, the effect
  163. * may be that only one operation succeeds.
  164. */
  165. static __always_inline void __change_bit(long nr, volatile unsigned long *addr)
  166. {
  167. asm volatile(__ASM_SIZE(btc) " %1,%0" : ADDR : "Ir" (nr));
  168. }
  169. /**
  170. * change_bit - Toggle a bit in memory
  171. * @nr: Bit to change
  172. * @addr: Address to start counting from
  173. *
  174. * change_bit() is atomic and may not be reordered.
  175. * Note that @nr may be almost arbitrarily large; this function is not
  176. * restricted to acting on a single-word quantity.
  177. */
  178. static __always_inline void change_bit(long nr, volatile unsigned long *addr)
  179. {
  180. if (IS_IMMEDIATE(nr)) {
  181. asm volatile(LOCK_PREFIX "xorb %1,%0"
  182. : CONST_MASK_ADDR(nr, addr)
  183. : "iq" ((u8)CONST_MASK(nr)));
  184. } else {
  185. asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
  186. : BITOP_ADDR(addr)
  187. : "Ir" (nr));
  188. }
  189. }
  190. /**
  191. * test_and_set_bit - Set a bit and return its old value
  192. * @nr: Bit to set
  193. * @addr: Address to count from
  194. *
  195. * This operation is atomic and cannot be reordered.
  196. * It also implies a memory barrier.
  197. */
  198. static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr)
  199. {
  200. return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, c, "Ir", nr);
  201. }
  202. /**
  203. * test_and_set_bit_lock - Set a bit and return its old value for lock
  204. * @nr: Bit to set
  205. * @addr: Address to count from
  206. *
  207. * This is the same as test_and_set_bit on x86.
  208. */
  209. static __always_inline bool
  210. test_and_set_bit_lock(long nr, volatile unsigned long *addr)
  211. {
  212. return test_and_set_bit(nr, addr);
  213. }
  214. /**
  215. * __test_and_set_bit - Set a bit and return its old value
  216. * @nr: Bit to set
  217. * @addr: Address to count from
  218. *
  219. * This operation is non-atomic and can be reordered.
  220. * If two examples of this operation race, one can appear to succeed
  221. * but actually fail. You must protect multiple accesses with a lock.
  222. */
  223. static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr)
  224. {
  225. bool oldbit;
  226. asm(__ASM_SIZE(bts) " %2,%1"
  227. CC_SET(c)
  228. : CC_OUT(c) (oldbit), ADDR
  229. : "Ir" (nr));
  230. return oldbit;
  231. }
  232. /**
  233. * test_and_clear_bit - Clear a bit and return its old value
  234. * @nr: Bit to clear
  235. * @addr: Address to count from
  236. *
  237. * This operation is atomic and cannot be reordered.
  238. * It also implies a memory barrier.
  239. */
  240. static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr)
  241. {
  242. return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btr), *addr, c, "Ir", nr);
  243. }
  244. /**
  245. * __test_and_clear_bit - Clear a bit and return its old value
  246. * @nr: Bit to clear
  247. * @addr: Address to count from
  248. *
  249. * This operation is non-atomic and can be reordered.
  250. * If two examples of this operation race, one can appear to succeed
  251. * but actually fail. You must protect multiple accesses with a lock.
  252. *
  253. * Note: the operation is performed atomically with respect to
  254. * the local CPU, but not other CPUs. Portable code should not
  255. * rely on this behaviour.
  256. * KVM relies on this behaviour on x86 for modifying memory that is also
  257. * accessed from a hypervisor on the same CPU if running in a VM: don't change
  258. * this without also updating arch/x86/kernel/kvm.c
  259. */
  260. static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr)
  261. {
  262. bool oldbit;
  263. asm volatile(__ASM_SIZE(btr) " %2,%1"
  264. CC_SET(c)
  265. : CC_OUT(c) (oldbit), ADDR
  266. : "Ir" (nr));
  267. return oldbit;
  268. }
  269. /* WARNING: non atomic and it can be reordered! */
  270. static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr)
  271. {
  272. bool oldbit;
  273. asm volatile(__ASM_SIZE(btc) " %2,%1"
  274. CC_SET(c)
  275. : CC_OUT(c) (oldbit), ADDR
  276. : "Ir" (nr) : "memory");
  277. return oldbit;
  278. }
  279. /**
  280. * test_and_change_bit - Change a bit and return its old value
  281. * @nr: Bit to change
  282. * @addr: Address to count from
  283. *
  284. * This operation is atomic and cannot be reordered.
  285. * It also implies a memory barrier.
  286. */
  287. static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr)
  288. {
  289. return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr);
  290. }
  291. static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr)
  292. {
  293. return ((1UL << (nr & (BITS_PER_LONG-1))) &
  294. (addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
  295. }
  296. static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr)
  297. {
  298. bool oldbit;
  299. asm volatile(__ASM_SIZE(bt) " %2,%1"
  300. CC_SET(c)
  301. : CC_OUT(c) (oldbit)
  302. : "m" (*(unsigned long *)addr), "Ir" (nr));
  303. return oldbit;
  304. }
  305. #if 0 /* Fool kernel-doc since it doesn't do macros yet */
  306. /**
  307. * test_bit - Determine whether a bit is set
  308. * @nr: bit number to test
  309. * @addr: Address to start counting from
  310. */
  311. static bool test_bit(int nr, const volatile unsigned long *addr);
  312. #endif
  313. #define test_bit(nr, addr) \
  314. (__builtin_constant_p((nr)) \
  315. ? constant_test_bit((nr), (addr)) \
  316. : variable_test_bit((nr), (addr)))
  317. /**
  318. * __ffs - find first set bit in word
  319. * @word: The word to search
  320. *
  321. * Undefined if no bit exists, so code should check against 0 first.
  322. */
  323. static __always_inline unsigned long __ffs(unsigned long word)
  324. {
  325. asm("rep; bsf %1,%0"
  326. : "=r" (word)
  327. : "rm" (word));
  328. return word;
  329. }
  330. /**
  331. * ffz - find first zero bit in word
  332. * @word: The word to search
  333. *
  334. * Undefined if no zero exists, so code should check against ~0UL first.
  335. */
  336. static __always_inline unsigned long ffz(unsigned long word)
  337. {
  338. asm("rep; bsf %1,%0"
  339. : "=r" (word)
  340. : "r" (~word));
  341. return word;
  342. }
  343. /*
  344. * __fls: find last set bit in word
  345. * @word: The word to search
  346. *
  347. * Undefined if no set bit exists, so code should check against 0 first.
  348. */
  349. static __always_inline unsigned long __fls(unsigned long word)
  350. {
  351. asm("bsr %1,%0"
  352. : "=r" (word)
  353. : "rm" (word));
  354. return word;
  355. }
  356. #undef ADDR
  357. #ifdef __KERNEL__
  358. /**
  359. * ffs - find first set bit in word
  360. * @x: the word to search
  361. *
  362. * This is defined the same way as the libc and compiler builtin ffs
  363. * routines, therefore differs in spirit from the other bitops.
  364. *
  365. * ffs(value) returns 0 if value is 0 or the position of the first
  366. * set bit if value is nonzero. The first (least significant) bit
  367. * is at position 1.
  368. */
  369. static __always_inline int ffs(int x)
  370. {
  371. int r;
  372. #ifdef CONFIG_X86_64
  373. /*
  374. * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
  375. * dest reg is undefined if x==0, but their CPU architect says its
  376. * value is written to set it to the same as before, except that the
  377. * top 32 bits will be cleared.
  378. *
  379. * We cannot do this on 32 bits because at the very least some
  380. * 486 CPUs did not behave this way.
  381. */
  382. asm("bsfl %1,%0"
  383. : "=r" (r)
  384. : "rm" (x), "0" (-1));
  385. #elif defined(CONFIG_X86_CMOV)
  386. asm("bsfl %1,%0\n\t"
  387. "cmovzl %2,%0"
  388. : "=&r" (r) : "rm" (x), "r" (-1));
  389. #else
  390. asm("bsfl %1,%0\n\t"
  391. "jnz 1f\n\t"
  392. "movl $-1,%0\n"
  393. "1:" : "=r" (r) : "rm" (x));
  394. #endif
  395. return r + 1;
  396. }
  397. /**
  398. * fls - find last set bit in word
  399. * @x: the word to search
  400. *
  401. * This is defined in a similar way as the libc and compiler builtin
  402. * ffs, but returns the position of the most significant set bit.
  403. *
  404. * fls(value) returns 0 if value is 0 or the position of the last
  405. * set bit if value is nonzero. The last (most significant) bit is
  406. * at position 32.
  407. */
  408. static __always_inline int fls(int x)
  409. {
  410. int r;
  411. #ifdef CONFIG_X86_64
  412. /*
  413. * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
  414. * dest reg is undefined if x==0, but their CPU architect says its
  415. * value is written to set it to the same as before, except that the
  416. * top 32 bits will be cleared.
  417. *
  418. * We cannot do this on 32 bits because at the very least some
  419. * 486 CPUs did not behave this way.
  420. */
  421. asm("bsrl %1,%0"
  422. : "=r" (r)
  423. : "rm" (x), "0" (-1));
  424. #elif defined(CONFIG_X86_CMOV)
  425. asm("bsrl %1,%0\n\t"
  426. "cmovzl %2,%0"
  427. : "=&r" (r) : "rm" (x), "rm" (-1));
  428. #else
  429. asm("bsrl %1,%0\n\t"
  430. "jnz 1f\n\t"
  431. "movl $-1,%0\n"
  432. "1:" : "=r" (r) : "rm" (x));
  433. #endif
  434. return r + 1;
  435. }
  436. /**
  437. * fls64 - find last set bit in a 64-bit word
  438. * @x: the word to search
  439. *
  440. * This is defined in a similar way as the libc and compiler builtin
  441. * ffsll, but returns the position of the most significant set bit.
  442. *
  443. * fls64(value) returns 0 if value is 0 or the position of the last
  444. * set bit if value is nonzero. The last (most significant) bit is
  445. * at position 64.
  446. */
  447. #ifdef CONFIG_X86_64
  448. static __always_inline int fls64(__u64 x)
  449. {
  450. int bitpos = -1;
  451. /*
  452. * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
  453. * dest reg is undefined if x==0, but their CPU architect says its
  454. * value is written to set it to the same as before.
  455. */
  456. asm("bsrq %1,%q0"
  457. : "+r" (bitpos)
  458. : "rm" (x));
  459. return bitpos + 1;
  460. }
  461. #else
  462. #include <asm-generic/bitops/fls64.h>
  463. #endif
  464. #include <asm-generic/bitops/find.h>
  465. #include <asm-generic/bitops/sched.h>
  466. #include <asm/arch_hweight.h>
  467. #include <asm-generic/bitops/const_hweight.h>
  468. #include <asm-generic/bitops/le.h>
  469. #include <asm-generic/bitops/ext2-atomic-setbit.h>
  470. #endif /* __KERNEL__ */
  471. #endif /* _ASM_X86_BITOPS_H */