fixpt31_32.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691
  1. /*
  2. * Copyright 2012-15 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. * Authors: AMD
  23. *
  24. */
  25. #include "dm_services.h"
  26. #include "include/fixed31_32.h"
  27. static inline uint64_t abs_i64(
  28. int64_t arg)
  29. {
  30. if (arg > 0)
  31. return (uint64_t)arg;
  32. else
  33. return (uint64_t)(-arg);
  34. }
  35. /*
  36. * @brief
  37. * result = dividend / divisor
  38. * *remainder = dividend % divisor
  39. */
  40. static inline uint64_t complete_integer_division_u64(
  41. uint64_t dividend,
  42. uint64_t divisor,
  43. uint64_t *remainder)
  44. {
  45. uint64_t result;
  46. ASSERT(divisor);
  47. result = div64_u64_rem(dividend, divisor, remainder);
  48. return result;
  49. }
  50. #define BITS_PER_FRACTIONAL_PART \
  51. 32
  52. #define FRACTIONAL_PART_MASK \
  53. ((1ULL << BITS_PER_FRACTIONAL_PART) - 1)
  54. #define GET_INTEGER_PART(x) \
  55. ((x) >> BITS_PER_FRACTIONAL_PART)
  56. #define GET_FRACTIONAL_PART(x) \
  57. (FRACTIONAL_PART_MASK & (x))
  58. struct fixed31_32 dal_fixed31_32_from_fraction(
  59. int64_t numerator,
  60. int64_t denominator)
  61. {
  62. struct fixed31_32 res;
  63. bool arg1_negative = numerator < 0;
  64. bool arg2_negative = denominator < 0;
  65. uint64_t arg1_value = arg1_negative ? -numerator : numerator;
  66. uint64_t arg2_value = arg2_negative ? -denominator : denominator;
  67. uint64_t remainder;
  68. /* determine integer part */
  69. uint64_t res_value = complete_integer_division_u64(
  70. arg1_value, arg2_value, &remainder);
  71. ASSERT(res_value <= LONG_MAX);
  72. /* determine fractional part */
  73. {
  74. uint32_t i = BITS_PER_FRACTIONAL_PART;
  75. do {
  76. remainder <<= 1;
  77. res_value <<= 1;
  78. if (remainder >= arg2_value) {
  79. res_value |= 1;
  80. remainder -= arg2_value;
  81. }
  82. } while (--i != 0);
  83. }
  84. /* round up LSB */
  85. {
  86. uint64_t summand = (remainder << 1) >= arg2_value;
  87. ASSERT(res_value <= LLONG_MAX - summand);
  88. res_value += summand;
  89. }
  90. res.value = (int64_t)res_value;
  91. if (arg1_negative ^ arg2_negative)
  92. res.value = -res.value;
  93. return res;
  94. }
  95. struct fixed31_32 dal_fixed31_32_from_int(
  96. int64_t arg)
  97. {
  98. struct fixed31_32 res;
  99. ASSERT((LONG_MIN <= arg) && (arg <= LONG_MAX));
  100. res.value = arg << BITS_PER_FRACTIONAL_PART;
  101. return res;
  102. }
  103. struct fixed31_32 dal_fixed31_32_neg(
  104. struct fixed31_32 arg)
  105. {
  106. struct fixed31_32 res;
  107. res.value = -arg.value;
  108. return res;
  109. }
  110. struct fixed31_32 dal_fixed31_32_abs(
  111. struct fixed31_32 arg)
  112. {
  113. if (arg.value < 0)
  114. return dal_fixed31_32_neg(arg);
  115. else
  116. return arg;
  117. }
  118. bool dal_fixed31_32_lt(
  119. struct fixed31_32 arg1,
  120. struct fixed31_32 arg2)
  121. {
  122. return arg1.value < arg2.value;
  123. }
  124. bool dal_fixed31_32_le(
  125. struct fixed31_32 arg1,
  126. struct fixed31_32 arg2)
  127. {
  128. return arg1.value <= arg2.value;
  129. }
  130. bool dal_fixed31_32_eq(
  131. struct fixed31_32 arg1,
  132. struct fixed31_32 arg2)
  133. {
  134. return arg1.value == arg2.value;
  135. }
  136. struct fixed31_32 dal_fixed31_32_min(
  137. struct fixed31_32 arg1,
  138. struct fixed31_32 arg2)
  139. {
  140. if (arg1.value <= arg2.value)
  141. return arg1;
  142. else
  143. return arg2;
  144. }
  145. struct fixed31_32 dal_fixed31_32_max(
  146. struct fixed31_32 arg1,
  147. struct fixed31_32 arg2)
  148. {
  149. if (arg1.value <= arg2.value)
  150. return arg2;
  151. else
  152. return arg1;
  153. }
  154. struct fixed31_32 dal_fixed31_32_clamp(
  155. struct fixed31_32 arg,
  156. struct fixed31_32 min_value,
  157. struct fixed31_32 max_value)
  158. {
  159. if (dal_fixed31_32_le(arg, min_value))
  160. return min_value;
  161. else if (dal_fixed31_32_le(max_value, arg))
  162. return max_value;
  163. else
  164. return arg;
  165. }
  166. struct fixed31_32 dal_fixed31_32_shl(
  167. struct fixed31_32 arg,
  168. uint8_t shift)
  169. {
  170. struct fixed31_32 res;
  171. ASSERT(((arg.value >= 0) && (arg.value <= LLONG_MAX >> shift)) ||
  172. ((arg.value < 0) && (arg.value >= LLONG_MIN >> shift)));
  173. res.value = arg.value << shift;
  174. return res;
  175. }
  176. struct fixed31_32 dal_fixed31_32_shr(
  177. struct fixed31_32 arg,
  178. uint8_t shift)
  179. {
  180. struct fixed31_32 res;
  181. ASSERT(shift < 64);
  182. res.value = arg.value >> shift;
  183. return res;
  184. }
  185. struct fixed31_32 dal_fixed31_32_add(
  186. struct fixed31_32 arg1,
  187. struct fixed31_32 arg2)
  188. {
  189. struct fixed31_32 res;
  190. ASSERT(((arg1.value >= 0) && (LLONG_MAX - arg1.value >= arg2.value)) ||
  191. ((arg1.value < 0) && (LLONG_MIN - arg1.value <= arg2.value)));
  192. res.value = arg1.value + arg2.value;
  193. return res;
  194. }
  195. struct fixed31_32 dal_fixed31_32_sub_int(
  196. struct fixed31_32 arg1,
  197. int32_t arg2)
  198. {
  199. return dal_fixed31_32_sub(
  200. arg1,
  201. dal_fixed31_32_from_int(arg2));
  202. }
  203. struct fixed31_32 dal_fixed31_32_sub(
  204. struct fixed31_32 arg1,
  205. struct fixed31_32 arg2)
  206. {
  207. struct fixed31_32 res;
  208. ASSERT(((arg2.value >= 0) && (LLONG_MIN + arg2.value <= arg1.value)) ||
  209. ((arg2.value < 0) && (LLONG_MAX + arg2.value >= arg1.value)));
  210. res.value = arg1.value - arg2.value;
  211. return res;
  212. }
  213. struct fixed31_32 dal_fixed31_32_mul_int(
  214. struct fixed31_32 arg1,
  215. int32_t arg2)
  216. {
  217. return dal_fixed31_32_mul(
  218. arg1,
  219. dal_fixed31_32_from_int(arg2));
  220. }
  221. struct fixed31_32 dal_fixed31_32_mul(
  222. struct fixed31_32 arg1,
  223. struct fixed31_32 arg2)
  224. {
  225. struct fixed31_32 res;
  226. bool arg1_negative = arg1.value < 0;
  227. bool arg2_negative = arg2.value < 0;
  228. uint64_t arg1_value = arg1_negative ? -arg1.value : arg1.value;
  229. uint64_t arg2_value = arg2_negative ? -arg2.value : arg2.value;
  230. uint64_t arg1_int = GET_INTEGER_PART(arg1_value);
  231. uint64_t arg2_int = GET_INTEGER_PART(arg2_value);
  232. uint64_t arg1_fra = GET_FRACTIONAL_PART(arg1_value);
  233. uint64_t arg2_fra = GET_FRACTIONAL_PART(arg2_value);
  234. uint64_t tmp;
  235. res.value = arg1_int * arg2_int;
  236. ASSERT(res.value <= LONG_MAX);
  237. res.value <<= BITS_PER_FRACTIONAL_PART;
  238. tmp = arg1_int * arg2_fra;
  239. ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
  240. res.value += tmp;
  241. tmp = arg2_int * arg1_fra;
  242. ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
  243. res.value += tmp;
  244. tmp = arg1_fra * arg2_fra;
  245. tmp = (tmp >> BITS_PER_FRACTIONAL_PART) +
  246. (tmp >= (uint64_t)dal_fixed31_32_half.value);
  247. ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
  248. res.value += tmp;
  249. if (arg1_negative ^ arg2_negative)
  250. res.value = -res.value;
  251. return res;
  252. }
  253. struct fixed31_32 dal_fixed31_32_sqr(
  254. struct fixed31_32 arg)
  255. {
  256. struct fixed31_32 res;
  257. uint64_t arg_value = abs_i64(arg.value);
  258. uint64_t arg_int = GET_INTEGER_PART(arg_value);
  259. uint64_t arg_fra = GET_FRACTIONAL_PART(arg_value);
  260. uint64_t tmp;
  261. res.value = arg_int * arg_int;
  262. ASSERT(res.value <= LONG_MAX);
  263. res.value <<= BITS_PER_FRACTIONAL_PART;
  264. tmp = arg_int * arg_fra;
  265. ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
  266. res.value += tmp;
  267. ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
  268. res.value += tmp;
  269. tmp = arg_fra * arg_fra;
  270. tmp = (tmp >> BITS_PER_FRACTIONAL_PART) +
  271. (tmp >= (uint64_t)dal_fixed31_32_half.value);
  272. ASSERT(tmp <= (uint64_t)(LLONG_MAX - res.value));
  273. res.value += tmp;
  274. return res;
  275. }
  276. struct fixed31_32 dal_fixed31_32_div_int(
  277. struct fixed31_32 arg1,
  278. int64_t arg2)
  279. {
  280. return dal_fixed31_32_from_fraction(
  281. arg1.value,
  282. dal_fixed31_32_from_int(arg2).value);
  283. }
  284. struct fixed31_32 dal_fixed31_32_div(
  285. struct fixed31_32 arg1,
  286. struct fixed31_32 arg2)
  287. {
  288. return dal_fixed31_32_from_fraction(
  289. arg1.value,
  290. arg2.value);
  291. }
  292. struct fixed31_32 dal_fixed31_32_recip(
  293. struct fixed31_32 arg)
  294. {
  295. /*
  296. * @note
  297. * Good idea to use Newton's method
  298. */
  299. ASSERT(arg.value);
  300. return dal_fixed31_32_from_fraction(
  301. dal_fixed31_32_one.value,
  302. arg.value);
  303. }
  304. struct fixed31_32 dal_fixed31_32_sinc(
  305. struct fixed31_32 arg)
  306. {
  307. struct fixed31_32 square;
  308. struct fixed31_32 res = dal_fixed31_32_one;
  309. int32_t n = 27;
  310. struct fixed31_32 arg_norm = arg;
  311. if (dal_fixed31_32_le(
  312. dal_fixed31_32_two_pi,
  313. dal_fixed31_32_abs(arg))) {
  314. arg_norm = dal_fixed31_32_sub(
  315. arg_norm,
  316. dal_fixed31_32_mul_int(
  317. dal_fixed31_32_two_pi,
  318. (int32_t)div64_s64(
  319. arg_norm.value,
  320. dal_fixed31_32_two_pi.value)));
  321. }
  322. square = dal_fixed31_32_sqr(arg_norm);
  323. do {
  324. res = dal_fixed31_32_sub(
  325. dal_fixed31_32_one,
  326. dal_fixed31_32_div_int(
  327. dal_fixed31_32_mul(
  328. square,
  329. res),
  330. n * (n - 1)));
  331. n -= 2;
  332. } while (n > 2);
  333. if (arg.value != arg_norm.value)
  334. res = dal_fixed31_32_div(
  335. dal_fixed31_32_mul(res, arg_norm),
  336. arg);
  337. return res;
  338. }
  339. struct fixed31_32 dal_fixed31_32_sin(
  340. struct fixed31_32 arg)
  341. {
  342. return dal_fixed31_32_mul(
  343. arg,
  344. dal_fixed31_32_sinc(arg));
  345. }
  346. struct fixed31_32 dal_fixed31_32_cos(
  347. struct fixed31_32 arg)
  348. {
  349. /* TODO implement argument normalization */
  350. const struct fixed31_32 square = dal_fixed31_32_sqr(arg);
  351. struct fixed31_32 res = dal_fixed31_32_one;
  352. int32_t n = 26;
  353. do {
  354. res = dal_fixed31_32_sub(
  355. dal_fixed31_32_one,
  356. dal_fixed31_32_div_int(
  357. dal_fixed31_32_mul(
  358. square,
  359. res),
  360. n * (n - 1)));
  361. n -= 2;
  362. } while (n != 0);
  363. return res;
  364. }
  365. /*
  366. * @brief
  367. * result = exp(arg),
  368. * where abs(arg) < 1
  369. *
  370. * Calculated as Taylor series.
  371. */
  372. static struct fixed31_32 fixed31_32_exp_from_taylor_series(
  373. struct fixed31_32 arg)
  374. {
  375. uint32_t n = 9;
  376. struct fixed31_32 res = dal_fixed31_32_from_fraction(
  377. n + 2,
  378. n + 1);
  379. /* TODO find correct res */
  380. ASSERT(dal_fixed31_32_lt(arg, dal_fixed31_32_one));
  381. do
  382. res = dal_fixed31_32_add(
  383. dal_fixed31_32_one,
  384. dal_fixed31_32_div_int(
  385. dal_fixed31_32_mul(
  386. arg,
  387. res),
  388. n));
  389. while (--n != 1);
  390. return dal_fixed31_32_add(
  391. dal_fixed31_32_one,
  392. dal_fixed31_32_mul(
  393. arg,
  394. res));
  395. }
  396. struct fixed31_32 dal_fixed31_32_exp(
  397. struct fixed31_32 arg)
  398. {
  399. /*
  400. * @brief
  401. * Main equation is:
  402. * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r),
  403. * where m = round(x / ln(2)), r = x - m * ln(2)
  404. */
  405. if (dal_fixed31_32_le(
  406. dal_fixed31_32_ln2_div_2,
  407. dal_fixed31_32_abs(arg))) {
  408. int32_t m = dal_fixed31_32_round(
  409. dal_fixed31_32_div(
  410. arg,
  411. dal_fixed31_32_ln2));
  412. struct fixed31_32 r = dal_fixed31_32_sub(
  413. arg,
  414. dal_fixed31_32_mul_int(
  415. dal_fixed31_32_ln2,
  416. m));
  417. ASSERT(m != 0);
  418. ASSERT(dal_fixed31_32_lt(
  419. dal_fixed31_32_abs(r),
  420. dal_fixed31_32_one));
  421. if (m > 0)
  422. return dal_fixed31_32_shl(
  423. fixed31_32_exp_from_taylor_series(r),
  424. (uint8_t)m);
  425. else
  426. return dal_fixed31_32_div_int(
  427. fixed31_32_exp_from_taylor_series(r),
  428. 1LL << -m);
  429. } else if (arg.value != 0)
  430. return fixed31_32_exp_from_taylor_series(arg);
  431. else
  432. return dal_fixed31_32_one;
  433. }
  434. struct fixed31_32 dal_fixed31_32_log(
  435. struct fixed31_32 arg)
  436. {
  437. struct fixed31_32 res = dal_fixed31_32_neg(dal_fixed31_32_one);
  438. /* TODO improve 1st estimation */
  439. struct fixed31_32 error;
  440. ASSERT(arg.value > 0);
  441. /* TODO if arg is negative, return NaN */
  442. /* TODO if arg is zero, return -INF */
  443. do {
  444. struct fixed31_32 res1 = dal_fixed31_32_add(
  445. dal_fixed31_32_sub(
  446. res,
  447. dal_fixed31_32_one),
  448. dal_fixed31_32_div(
  449. arg,
  450. dal_fixed31_32_exp(res)));
  451. error = dal_fixed31_32_sub(
  452. res,
  453. res1);
  454. res = res1;
  455. /* TODO determine max_allowed_error based on quality of exp() */
  456. } while (abs_i64(error.value) > 100ULL);
  457. return res;
  458. }
  459. struct fixed31_32 dal_fixed31_32_pow(
  460. struct fixed31_32 arg1,
  461. struct fixed31_32 arg2)
  462. {
  463. return dal_fixed31_32_exp(
  464. dal_fixed31_32_mul(
  465. dal_fixed31_32_log(arg1),
  466. arg2));
  467. }
  468. int32_t dal_fixed31_32_floor(
  469. struct fixed31_32 arg)
  470. {
  471. uint64_t arg_value = abs_i64(arg.value);
  472. if (arg.value >= 0)
  473. return (int32_t)GET_INTEGER_PART(arg_value);
  474. else
  475. return -(int32_t)GET_INTEGER_PART(arg_value);
  476. }
  477. int32_t dal_fixed31_32_round(
  478. struct fixed31_32 arg)
  479. {
  480. uint64_t arg_value = abs_i64(arg.value);
  481. const int64_t summand = dal_fixed31_32_half.value;
  482. ASSERT(LLONG_MAX - (int64_t)arg_value >= summand);
  483. arg_value += summand;
  484. if (arg.value >= 0)
  485. return (int32_t)GET_INTEGER_PART(arg_value);
  486. else
  487. return -(int32_t)GET_INTEGER_PART(arg_value);
  488. }
  489. int32_t dal_fixed31_32_ceil(
  490. struct fixed31_32 arg)
  491. {
  492. uint64_t arg_value = abs_i64(arg.value);
  493. const int64_t summand = dal_fixed31_32_one.value -
  494. dal_fixed31_32_epsilon.value;
  495. ASSERT(LLONG_MAX - (int64_t)arg_value >= summand);
  496. arg_value += summand;
  497. if (arg.value >= 0)
  498. return (int32_t)GET_INTEGER_PART(arg_value);
  499. else
  500. return -(int32_t)GET_INTEGER_PART(arg_value);
  501. }
  502. /* this function is a generic helper to translate fixed point value to
  503. * specified integer format that will consist of integer_bits integer part and
  504. * fractional_bits fractional part. For example it is used in
  505. * dal_fixed31_32_u2d19 to receive 2 bits integer part and 19 bits fractional
  506. * part in 32 bits. It is used in hw programming (scaler)
  507. */
  508. static inline uint32_t ux_dy(
  509. int64_t value,
  510. uint32_t integer_bits,
  511. uint32_t fractional_bits)
  512. {
  513. /* 1. create mask of integer part */
  514. uint32_t result = (1 << integer_bits) - 1;
  515. /* 2. mask out fractional part */
  516. uint32_t fractional_part = FRACTIONAL_PART_MASK & value;
  517. /* 3. shrink fixed point integer part to be of integer_bits width*/
  518. result &= GET_INTEGER_PART(value);
  519. /* 4. make space for fractional part to be filled in after integer */
  520. result <<= fractional_bits;
  521. /* 5. shrink fixed point fractional part to of fractional_bits width*/
  522. fractional_part >>= BITS_PER_FRACTIONAL_PART - fractional_bits;
  523. /* 6. merge the result */
  524. return result | fractional_part;
  525. }
  526. uint32_t dal_fixed31_32_u2d19(
  527. struct fixed31_32 arg)
  528. {
  529. return ux_dy(arg.value, 2, 19);
  530. }
  531. uint32_t dal_fixed31_32_u0d19(
  532. struct fixed31_32 arg)
  533. {
  534. return ux_dy(arg.value, 0, 19);
  535. }