fixpt31_32.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700
  1. /*
  2. * Copyright 2012-15 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. * Authors: AMD
  23. *
  24. */
  25. #include "dm_services.h"
  26. #include "include/fixed31_32.h"
  27. static inline uint64_t abs_i64(
  28. int64_t arg)
  29. {
  30. if (arg > 0)
  31. return (uint64_t)arg;
  32. else
  33. return (uint64_t)(-arg);
  34. }
  35. /*
  36. * @brief
  37. * result = dividend / divisor
  38. * *remainder = dividend % divisor
  39. */
  40. static inline uint64_t complete_integer_division_u64(
  41. uint64_t dividend,
  42. uint64_t divisor,
  43. uint64_t *remainder)
  44. {
  45. uint64_t result;
  46. ASSERT(divisor);
  47. result = div64_u64_rem(dividend, divisor, remainder);
  48. return result;
  49. }
  50. #define BITS_PER_FRACTIONAL_PART \
  51. 32
  52. #define FRACTIONAL_PART_MASK \
  53. ((1ULL << BITS_PER_FRACTIONAL_PART) - 1)
  54. #define GET_INTEGER_PART(x) \
  55. ((x) >> BITS_PER_FRACTIONAL_PART)
  56. #define GET_FRACTIONAL_PART(x) \
  57. (FRACTIONAL_PART_MASK & (x))
  58. struct fixed31_32 dal_fixed31_32_from_fraction(
  59. int64_t numerator,
  60. int64_t denominator)
  61. {
  62. struct fixed31_32 res;
  63. bool arg1_negative = numerator < 0;
  64. bool arg2_negative = denominator < 0;
  65. uint64_t arg1_value = arg1_negative ? -numerator : numerator;
  66. uint64_t arg2_value = arg2_negative ? -denominator : denominator;
  67. uint64_t remainder;
  68. /* determine integer part */
  69. uint64_t res_value = complete_integer_division_u64(
  70. arg1_value, arg2_value, &remainder);
  71. ASSERT(res_value <= LONG_MAX);
  72. /* determine fractional part */
  73. {
  74. uint32_t i = BITS_PER_FRACTIONAL_PART;
  75. do {
  76. remainder <<= 1;
  77. res_value <<= 1;
  78. if (remainder >= arg2_value) {
  79. res_value |= 1;
  80. remainder -= arg2_value;
  81. }
  82. } while (--i != 0);
  83. }
  84. /* round up LSB */
  85. {
  86. uint64_t summand = (remainder << 1) >= arg2_value;
  87. ASSERT(res_value <= LLONG_MAX - summand);
  88. res_value += summand;
  89. }
  90. res.value = (int64_t)res_value;
  91. if (arg1_negative ^ arg2_negative)
  92. res.value = -res.value;
  93. return res;
  94. }
  95. struct fixed31_32 dal_fixed31_32_from_int(
  96. int64_t arg)
  97. {
  98. struct fixed31_32 res;
  99. ASSERT((LONG_MIN <= arg) && (arg <= LONG_MAX));
  100. res.value = arg << BITS_PER_FRACTIONAL_PART;
  101. return res;
  102. }
  103. struct fixed31_32 dal_fixed31_32_neg(
  104. struct fixed31_32 arg)
  105. {
  106. struct fixed31_32 res;
  107. res.value = -arg.value;
  108. return res;
  109. }
  110. struct fixed31_32 dal_fixed31_32_abs(
  111. struct fixed31_32 arg)
  112. {
  113. if (arg.value < 0)
  114. return dal_fixed31_32_neg(arg);
  115. else
  116. return arg;
  117. }
  118. bool dal_fixed31_32_lt(
  119. struct fixed31_32 arg1,
  120. struct fixed31_32 arg2)
  121. {
  122. return arg1.value < arg2.value;
  123. }
  124. bool dal_fixed31_32_le(
  125. struct fixed31_32 arg1,
  126. struct fixed31_32 arg2)
  127. {
  128. return arg1.value <= arg2.value;
  129. }
  130. bool dal_fixed31_32_eq(
  131. struct fixed31_32 arg1,
  132. struct fixed31_32 arg2)
  133. {
  134. return arg1.value == arg2.value;
  135. }
  136. struct fixed31_32 dal_fixed31_32_min(
  137. struct fixed31_32 arg1,
  138. struct fixed31_32 arg2)
  139. {
  140. if (arg1.value <= arg2.value)
  141. return arg1;
  142. else
  143. return arg2;
  144. }
  145. struct fixed31_32 dal_fixed31_32_max(
  146. struct fixed31_32 arg1,
  147. struct fixed31_32 arg2)
  148. {
  149. if (arg1.value <= arg2.value)
  150. return arg2;
  151. else
  152. return arg1;
  153. }
  154. struct fixed31_32 dal_fixed31_32_clamp(
  155. struct fixed31_32 arg,
  156. struct fixed31_32 min_value,
  157. struct fixed31_32 max_value)
  158. {
  159. if (dal_fixed31_32_le(arg, min_value))
  160. return min_value;
  161. else if (dal_fixed31_32_le(max_value, arg))
  162. return max_value;
  163. else
  164. return arg;
  165. }
  166. struct fixed31_32 dal_fixed31_32_shl(
  167. struct fixed31_32 arg,
  168. uint8_t shift)
  169. {
  170. struct fixed31_32 res;
  171. ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) ||
  172. ((arg.value < 0) && (arg.value >= LLONG_MIN >> shift)));
  173. res.value = arg.value << shift;
  174. return res;
  175. }
  176. struct fixed31_32 dal_fixed31_32_shr(
  177. struct fixed31_32 arg,
  178. uint8_t shift)
  179. {
  180. struct fixed31_32 res;
  181. ASSERT(shift < 64);
  182. res.value = arg.value >> shift;
  183. return res;
  184. }
  185. struct fixed31_32 dal_fixed31_32_add(
  186. struct fixed31_32 arg1,
  187. struct fixed31_32 arg2)
  188. {
  189. struct fixed31_32 res;
  190. ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) ||
  191. ((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value)));
  192. res.value = arg1.value + arg2.value;
  193. return res;
  194. }
  195. struct fixed31_32 dal_fixed31_32_add_int(
  196. struct fixed31_32 arg1,
  197. int32_t arg2)
  198. {
  199. return dal_fixed31_32_add(
  200. arg1,
  201. dal_fixed31_32_from_int(arg2));
  202. }
  203. struct fixed31_32 dal_fixed31_32_sub_int(
  204. struct fixed31_32 arg1,
  205. int32_t arg2)
  206. {
  207. return dal_fixed31_32_sub(
  208. arg1,
  209. dal_fixed31_32_from_int(arg2));
  210. }
  211. struct fixed31_32 dal_fixed31_32_sub(
  212. struct fixed31_32 arg1,
  213. struct fixed31_32 arg2)
  214. {
  215. struct fixed31_32 res;
  216. ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) ||
  217. ((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value)));
  218. res.value = arg1.value - arg2.value;
  219. return res;
  220. }
  221. struct fixed31_32 dal_fixed31_32_mul_int(
  222. struct fixed31_32 arg1,
  223. int32_t arg2)
  224. {
  225. return dal_fixed31_32_mul(
  226. arg1,
  227. dal_fixed31_32_from_int(arg2));
  228. }
  229. struct fixed31_32 dal_fixed31_32_mul(
  230. struct fixed31_32 arg1,
  231. struct fixed31_32 arg2)
  232. {
  233. struct fixed31_32 res;
  234. bool arg1_negative = arg1.value < 0;
  235. bool arg2_negative = arg2.value < 0;
  236. uint64_t arg1_value = arg1_negative ? -arg1.value : arg1.value;
  237. uint64_t arg2_value = arg2_negative ? -arg2.value : arg2.value;
  238. uint64_t arg1_int = GET_INTEGER_PART(arg1_value);
  239. uint64_t arg2_int = GET_INTEGER_PART(arg2_value);
  240. uint64_t arg1_fra = GET_FRACTIONAL_PART(arg1_value);
  241. uint64_t arg2_fra = GET_FRACTIONAL_PART(arg2_value);
  242. uint64_t tmp;
  243. res.value = arg1_int * arg2_int;
  244. ASSERT(res.value <= LONG_MAX);
  245. res.value <<= BITS_PER_FRACTIONAL_PART;
  246. tmp = arg1_int * arg2_fra;
  247. ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
  248. res.value += tmp;
  249. tmp = arg2_int * arg1_fra;
  250. ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
  251. res.value += tmp;
  252. tmp = arg1_fra * arg2_fra;
  253. tmp = (tmp >> BITS_PER_FRACTIONAL_PART) +
  254. (tmp >= (uint64_t)dal_fixed31_32_half.value);
  255. ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
  256. res.value += tmp;
  257. if (arg1_negative ^ arg2_negative)
  258. res.value = -res.value;
  259. return res;
  260. }
  261. struct fixed31_32 dal_fixed31_32_sqr(
  262. struct fixed31_32 arg)
  263. {
  264. struct fixed31_32 res;
  265. uint64_t arg_value = abs_i64(arg.value);
  266. uint64_t arg_int = GET_INTEGER_PART(arg_value);
  267. uint64_t arg_fra = GET_FRACTIONAL_PART(arg_value);
  268. uint64_t tmp;
  269. res.value = arg_int * arg_int;
  270. ASSERT(res.value <= LONG_MAX);
  271. res.value <<= BITS_PER_FRACTIONAL_PART;
  272. tmp = arg_int * arg_fra;
  273. ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
  274. res.value += tmp;
  275. ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
  276. res.value += tmp;
  277. tmp = arg_fra * arg_fra;
  278. tmp = (tmp >> BITS_PER_FRACTIONAL_PART) +
  279. (tmp >= (uint64_t)dal_fixed31_32_half.value);
  280. ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
  281. res.value += tmp;
  282. return res;
  283. }
  284. struct fixed31_32 dal_fixed31_32_div_int(
  285. struct fixed31_32 arg1,
  286. int64_t arg2)
  287. {
  288. return dal_fixed31_32_from_fraction(
  289. arg1.value,
  290. dal_fixed31_32_from_int(arg2).value);
  291. }
  292. struct fixed31_32 dal_fixed31_32_div(
  293. struct fixed31_32 arg1,
  294. struct fixed31_32 arg2)
  295. {
  296. return dal_fixed31_32_from_fraction(
  297. arg1.value,
  298. arg2.value);
  299. }
  300. struct fixed31_32 dal_fixed31_32_recip(
  301. struct fixed31_32 arg)
  302. {
  303. /*
  304. * @note
  305. * Good idea to use Newton's method
  306. */
  307. ASSERT(arg.value);
  308. return dal_fixed31_32_from_fraction(
  309. dal_fixed31_32_one.value,
  310. arg.value);
  311. }
  312. struct fixed31_32 dal_fixed31_32_sinc(
  313. struct fixed31_32 arg)
  314. {
  315. struct fixed31_32 square;
  316. struct fixed31_32 res = dal_fixed31_32_one;
  317. int32_t n = 27;
  318. struct fixed31_32 arg_norm = arg;
  319. if (dal_fixed31_32_le(
  320. dal_fixed31_32_two_pi,
  321. dal_fixed31_32_abs(arg))) {
  322. arg_norm = dal_fixed31_32_sub(
  323. arg_norm,
  324. dal_fixed31_32_mul_int(
  325. dal_fixed31_32_two_pi,
  326. (int32_t)div64_s64(
  327. arg_norm.value,
  328. dal_fixed31_32_two_pi.value)));
  329. }
  330. square = dal_fixed31_32_sqr(arg_norm);
  331. do {
  332. res = dal_fixed31_32_sub(
  333. dal_fixed31_32_one,
  334. dal_fixed31_32_div_int(
  335. dal_fixed31_32_mul(
  336. square,
  337. res),
  338. n * (n - 1)));
  339. n -= 2;
  340. } while (n > 2);
  341. if (arg.value != arg_norm.value)
  342. res = dal_fixed31_32_div(
  343. dal_fixed31_32_mul(res, arg_norm),
  344. arg);
  345. return res;
  346. }
  347. struct fixed31_32 dal_fixed31_32_sin(
  348. struct fixed31_32 arg)
  349. {
  350. return dal_fixed31_32_mul(
  351. arg,
  352. dal_fixed31_32_sinc(arg));
  353. }
  354. struct fixed31_32 dal_fixed31_32_cos(
  355. struct fixed31_32 arg)
  356. {
  357. /* TODO implement argument normalization */
  358. const struct fixed31_32 square = dal_fixed31_32_sqr(arg);
  359. struct fixed31_32 res = dal_fixed31_32_one;
  360. int32_t n = 26;
  361. do {
  362. res = dal_fixed31_32_sub(
  363. dal_fixed31_32_one,
  364. dal_fixed31_32_div_int(
  365. dal_fixed31_32_mul(
  366. square,
  367. res),
  368. n * (n - 1)));
  369. n -= 2;
  370. } while (n != 0);
  371. return res;
  372. }
  373. /*
  374. * @brief
  375. * result = exp(arg),
  376. * where abs(arg) < 1
  377. *
  378. * Calculated as Taylor series.
  379. */
  380. static struct fixed31_32 fixed31_32_exp_from_taylor_series(
  381. struct fixed31_32 arg)
  382. {
  383. uint32_t n = 9;
  384. struct fixed31_32 res = dal_fixed31_32_from_fraction(
  385. n + 2,
  386. n + 1);
  387. /* TODO find correct res */
  388. ASSERT(dal_fixed31_32_lt(arg, dal_fixed31_32_one));
  389. do
  390. res = dal_fixed31_32_add(
  391. dal_fixed31_32_one,
  392. dal_fixed31_32_div_int(
  393. dal_fixed31_32_mul(
  394. arg,
  395. res),
  396. n));
  397. while (--n != 1);
  398. return dal_fixed31_32_add(
  399. dal_fixed31_32_one,
  400. dal_fixed31_32_mul(
  401. arg,
  402. res));
  403. }
  404. struct fixed31_32 dal_fixed31_32_exp(
  405. struct fixed31_32 arg)
  406. {
  407. /*
  408. * @brief
  409. * Main equation is:
  410. * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r),
  411. * where m = round(x / ln(2)), r = x - m * ln(2)
  412. */
  413. if (dal_fixed31_32_le(
  414. dal_fixed31_32_ln2_div_2,
  415. dal_fixed31_32_abs(arg))) {
  416. int32_t m = dal_fixed31_32_round(
  417. dal_fixed31_32_div(
  418. arg,
  419. dal_fixed31_32_ln2));
  420. struct fixed31_32 r = dal_fixed31_32_sub(
  421. arg,
  422. dal_fixed31_32_mul_int(
  423. dal_fixed31_32_ln2,
  424. m));
  425. ASSERT(m != 0);
  426. ASSERT(dal_fixed31_32_lt(
  427. dal_fixed31_32_abs(r),
  428. dal_fixed31_32_one));
  429. if (m > 0)
  430. return dal_fixed31_32_shl(
  431. fixed31_32_exp_from_taylor_series(r),
  432. (uint8_t)m);
  433. else
  434. return dal_fixed31_32_div_int(
  435. fixed31_32_exp_from_taylor_series(r),
  436. 1LL << -m);
  437. } else if (arg.value != 0)
  438. return fixed31_32_exp_from_taylor_series(arg);
  439. else
  440. return dal_fixed31_32_one;
  441. }
  442. struct fixed31_32 dal_fixed31_32_log(
  443. struct fixed31_32 arg)
  444. {
  445. struct fixed31_32 res = dal_fixed31_32_neg(dal_fixed31_32_one);
  446. /* TODO improve 1st estimation */
  447. struct fixed31_32 error;
  448. ASSERT(arg.value > 0);
  449. /* TODO if arg is negative, return NaN */
  450. /* TODO if arg is zero, return -INF */
  451. do {
  452. struct fixed31_32 res1 = dal_fixed31_32_add(
  453. dal_fixed31_32_sub(
  454. res,
  455. dal_fixed31_32_one),
  456. dal_fixed31_32_div(
  457. arg,
  458. dal_fixed31_32_exp(res)));
  459. error = dal_fixed31_32_sub(
  460. res,
  461. res1);
  462. res = res1;
  463. /* TODO determine max_allowed_error based on quality of exp() */
  464. } while (abs_i64(error.value) > 100ULL);
  465. return res;
  466. }
  467. struct fixed31_32 dal_fixed31_32_pow(
  468. struct fixed31_32 arg1,
  469. struct fixed31_32 arg2)
  470. {
  471. return dal_fixed31_32_exp(
  472. dal_fixed31_32_mul(
  473. dal_fixed31_32_log(arg1),
  474. arg2));
  475. }
  476. int32_t dal_fixed31_32_floor(
  477. struct fixed31_32 arg)
  478. {
  479. uint64_t arg_value = abs_i64(arg.value);
  480. if (arg.value >= 0)
  481. return (int32_t)GET_INTEGER_PART(arg_value);
  482. else
  483. return -(int32_t)GET_INTEGER_PART(arg_value);
  484. }
  485. int32_t dal_fixed31_32_round(
  486. struct fixed31_32 arg)
  487. {
  488. uint64_t arg_value = abs_i64(arg.value);
  489. const int64_t summand = dal_fixed31_32_half.value;
  490. ASSERT(LLONG_MAX - (int64_t)arg_value >= summand);
  491. arg_value += summand;
  492. if (arg.value >= 0)
  493. return (int32_t)GET_INTEGER_PART(arg_value);
  494. else
  495. return -(int32_t)GET_INTEGER_PART(arg_value);
  496. }
  497. int32_t dal_fixed31_32_ceil(
  498. struct fixed31_32 arg)
  499. {
  500. uint64_t arg_value = abs_i64(arg.value);
  501. const int64_t summand = dal_fixed31_32_one.value -
  502. dal_fixed31_32_epsilon.value;
  503. ASSERT(LLONG_MAX - (int64_t)arg_value >= summand);
  504. arg_value += summand;
  505. if (arg.value >= 0)
  506. return (int32_t)GET_INTEGER_PART(arg_value);
  507. else
  508. return -(int32_t)GET_INTEGER_PART(arg_value);
  509. }
  510. /* this function is a generic helper to translate fixed point value to
  511. * specified integer format that will consist of integer_bits integer part and
  512. * fractional_bits fractional part. For example it is used in
  513. * dal_fixed31_32_u2d19 to receive 2 bits integer part and 19 bits fractional
  514. * part in 32 bits. It is used in hw programming (scaler)
  515. */
  516. static inline uint32_t ux_dy(
  517. int64_t value,
  518. uint32_t integer_bits,
  519. uint32_t fractional_bits)
  520. {
  521. /* 1. create mask of integer part */
  522. uint32_t result = (1 << integer_bits) - 1;
  523. /* 2. mask out fractional part */
  524. uint32_t fractional_part = FRACTIONAL_PART_MASK & value;
  525. /* 3. shrink fixed point integer part to be of integer_bits width*/
  526. result &= GET_INTEGER_PART(value);
  527. /* 4. make space for fractional part to be filled in after integer */
  528. result <<= fractional_bits;
  529. /* 5. shrink fixed point fractional part to of fractional_bits width*/
  530. fractional_part >>= BITS_PER_FRACTIONAL_PART - fractional_bits;
  531. /* 6. merge the result */
  532. return result | fractional_part;
  533. }
  534. uint32_t dal_fixed31_32_u2d19(
  535. struct fixed31_32 arg)
  536. {
  537. return ux_dy(arg.value, 2, 19);
  538. }
  539. uint32_t dal_fixed31_32_u0d19(
  540. struct fixed31_32 arg)
  541. {
  542. return ux_dy(arg.value, 0, 19);
  543. }