string_helpers.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536
  1. /*
  2. * Helpers for formatting and printing strings
  3. *
  4. * Copyright 31 August 2008 James Bottomley
  5. * Copyright (C) 2013, Intel Corporation
  6. */
  7. #include <linux/bug.h>
  8. #include <linux/kernel.h>
  9. #include <linux/math64.h>
  10. #include <linux/export.h>
  11. #include <linux/ctype.h>
  12. #include <linux/errno.h>
  13. #include <linux/string.h>
  14. #include <linux/string_helpers.h>
  15. /**
  16. * string_get_size - get the size in the specified units
  17. * @size: The size to be converted in blocks
  18. * @blk_size: Size of the block (use 1 for size in bytes)
  19. * @units: units to use (powers of 1000 or 1024)
  20. * @buf: buffer to format to
  21. * @len: length of buffer
  22. *
  23. * This function returns a string formatted to 3 significant figures
  24. * giving the size in the required units. @buf should have room for
  25. * at least 9 bytes and will always be zero terminated.
  26. *
  27. */
  28. void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
  29. char *buf, int len)
  30. {
  31. static const char *const units_10[] = {
  32. "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
  33. };
  34. static const char *const units_2[] = {
  35. "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
  36. };
  37. static const char *const *const units_str[] = {
  38. [STRING_UNITS_10] = units_10,
  39. [STRING_UNITS_2] = units_2,
  40. };
  41. static const unsigned int divisor[] = {
  42. [STRING_UNITS_10] = 1000,
  43. [STRING_UNITS_2] = 1024,
  44. };
  45. static const unsigned int rounding[] = { 500, 50, 5 };
  46. int i = 0, j;
  47. u32 remainder = 0, sf_cap;
  48. char tmp[8];
  49. const char *unit;
  50. tmp[0] = '\0';
  51. if (blk_size == 0)
  52. size = 0;
  53. if (size == 0)
  54. goto out;
  55. /* This is Napier's algorithm. Reduce the original block size to
  56. *
  57. * coefficient * divisor[units]^i
  58. *
  59. * we do the reduction so both coefficients are just under 32 bits so
  60. * that multiplying them together won't overflow 64 bits and we keep
  61. * as much precision as possible in the numbers.
  62. *
  63. * Note: it's safe to throw away the remainders here because all the
  64. * precision is in the coefficients.
  65. */
  66. while (blk_size >> 32) {
  67. do_div(blk_size, divisor[units]);
  68. i++;
  69. }
  70. while (size >> 32) {
  71. do_div(size, divisor[units]);
  72. i++;
  73. }
  74. /* now perform the actual multiplication keeping i as the sum of the
  75. * two logarithms */
  76. size *= blk_size;
  77. /* and logarithmically reduce it until it's just under the divisor */
  78. while (size >= divisor[units]) {
  79. remainder = do_div(size, divisor[units]);
  80. i++;
  81. }
  82. /* work out in j how many digits of precision we need from the
  83. * remainder */
  84. sf_cap = size;
  85. for (j = 0; sf_cap*10 < 1000; j++)
  86. sf_cap *= 10;
  87. if (units == STRING_UNITS_2) {
  88. /* express the remainder as a decimal. It's currently the
  89. * numerator of a fraction whose denominator is
  90. * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
  91. remainder *= 1000;
  92. remainder >>= 10;
  93. }
  94. /* add a 5 to the digit below what will be printed to ensure
  95. * an arithmetical round up and carry it through to size */
  96. remainder += rounding[j];
  97. if (remainder >= 1000) {
  98. remainder -= 1000;
  99. size += 1;
  100. }
  101. if (j) {
  102. snprintf(tmp, sizeof(tmp), ".%03u", remainder);
  103. tmp[j+1] = '\0';
  104. }
  105. out:
  106. if (i >= ARRAY_SIZE(units_2))
  107. unit = "UNK";
  108. else
  109. unit = units_str[units][i];
  110. snprintf(buf, len, "%u%s %s", (u32)size,
  111. tmp, unit);
  112. }
  113. EXPORT_SYMBOL(string_get_size);
  114. static bool unescape_space(char **src, char **dst)
  115. {
  116. char *p = *dst, *q = *src;
  117. switch (*q) {
  118. case 'n':
  119. *p = '\n';
  120. break;
  121. case 'r':
  122. *p = '\r';
  123. break;
  124. case 't':
  125. *p = '\t';
  126. break;
  127. case 'v':
  128. *p = '\v';
  129. break;
  130. case 'f':
  131. *p = '\f';
  132. break;
  133. default:
  134. return false;
  135. }
  136. *dst += 1;
  137. *src += 1;
  138. return true;
  139. }
  140. static bool unescape_octal(char **src, char **dst)
  141. {
  142. char *p = *dst, *q = *src;
  143. u8 num;
  144. if (isodigit(*q) == 0)
  145. return false;
  146. num = (*q++) & 7;
  147. while (num < 32 && isodigit(*q) && (q - *src < 3)) {
  148. num <<= 3;
  149. num += (*q++) & 7;
  150. }
  151. *p = num;
  152. *dst += 1;
  153. *src = q;
  154. return true;
  155. }
  156. static bool unescape_hex(char **src, char **dst)
  157. {
  158. char *p = *dst, *q = *src;
  159. int digit;
  160. u8 num;
  161. if (*q++ != 'x')
  162. return false;
  163. num = digit = hex_to_bin(*q++);
  164. if (digit < 0)
  165. return false;
  166. digit = hex_to_bin(*q);
  167. if (digit >= 0) {
  168. q++;
  169. num = (num << 4) | digit;
  170. }
  171. *p = num;
  172. *dst += 1;
  173. *src = q;
  174. return true;
  175. }
  176. static bool unescape_special(char **src, char **dst)
  177. {
  178. char *p = *dst, *q = *src;
  179. switch (*q) {
  180. case '\"':
  181. *p = '\"';
  182. break;
  183. case '\\':
  184. *p = '\\';
  185. break;
  186. case 'a':
  187. *p = '\a';
  188. break;
  189. case 'e':
  190. *p = '\e';
  191. break;
  192. default:
  193. return false;
  194. }
  195. *dst += 1;
  196. *src += 1;
  197. return true;
  198. }
  199. /**
  200. * string_unescape - unquote characters in the given string
  201. * @src: source buffer (escaped)
  202. * @dst: destination buffer (unescaped)
  203. * @size: size of the destination buffer (0 to unlimit)
  204. * @flags: combination of the flags (bitwise OR):
  205. * %UNESCAPE_SPACE:
  206. * '\f' - form feed
  207. * '\n' - new line
  208. * '\r' - carriage return
  209. * '\t' - horizontal tab
  210. * '\v' - vertical tab
  211. * %UNESCAPE_OCTAL:
  212. * '\NNN' - byte with octal value NNN (1 to 3 digits)
  213. * %UNESCAPE_HEX:
  214. * '\xHH' - byte with hexadecimal value HH (1 to 2 digits)
  215. * %UNESCAPE_SPECIAL:
  216. * '\"' - double quote
  217. * '\\' - backslash
  218. * '\a' - alert (BEL)
  219. * '\e' - escape
  220. * %UNESCAPE_ANY:
  221. * all previous together
  222. *
  223. * Description:
  224. * The function unquotes characters in the given string.
  225. *
  226. * Because the size of the output will be the same as or less than the size of
  227. * the input, the transformation may be performed in place.
  228. *
  229. * Caller must provide valid source and destination pointers. Be aware that
  230. * destination buffer will always be NULL-terminated. Source string must be
  231. * NULL-terminated as well.
  232. *
  233. * Return:
  234. * The amount of the characters processed to the destination buffer excluding
  235. * trailing '\0' is returned.
  236. */
  237. int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
  238. {
  239. char *out = dst;
  240. while (*src && --size) {
  241. if (src[0] == '\\' && src[1] != '\0' && size > 1) {
  242. src++;
  243. size--;
  244. if (flags & UNESCAPE_SPACE &&
  245. unescape_space(&src, &out))
  246. continue;
  247. if (flags & UNESCAPE_OCTAL &&
  248. unescape_octal(&src, &out))
  249. continue;
  250. if (flags & UNESCAPE_HEX &&
  251. unescape_hex(&src, &out))
  252. continue;
  253. if (flags & UNESCAPE_SPECIAL &&
  254. unescape_special(&src, &out))
  255. continue;
  256. *out++ = '\\';
  257. }
  258. *out++ = *src++;
  259. }
  260. *out = '\0';
  261. return out - dst;
  262. }
  263. EXPORT_SYMBOL(string_unescape);
  264. static bool escape_passthrough(unsigned char c, char **dst, char *end)
  265. {
  266. char *out = *dst;
  267. if (out < end)
  268. *out = c;
  269. *dst = out + 1;
  270. return true;
  271. }
  272. static bool escape_space(unsigned char c, char **dst, char *end)
  273. {
  274. char *out = *dst;
  275. unsigned char to;
  276. switch (c) {
  277. case '\n':
  278. to = 'n';
  279. break;
  280. case '\r':
  281. to = 'r';
  282. break;
  283. case '\t':
  284. to = 't';
  285. break;
  286. case '\v':
  287. to = 'v';
  288. break;
  289. case '\f':
  290. to = 'f';
  291. break;
  292. default:
  293. return false;
  294. }
  295. if (out < end)
  296. *out = '\\';
  297. ++out;
  298. if (out < end)
  299. *out = to;
  300. ++out;
  301. *dst = out;
  302. return true;
  303. }
  304. static bool escape_special(unsigned char c, char **dst, char *end)
  305. {
  306. char *out = *dst;
  307. unsigned char to;
  308. switch (c) {
  309. case '\\':
  310. to = '\\';
  311. break;
  312. case '\a':
  313. to = 'a';
  314. break;
  315. case '\e':
  316. to = 'e';
  317. break;
  318. default:
  319. return false;
  320. }
  321. if (out < end)
  322. *out = '\\';
  323. ++out;
  324. if (out < end)
  325. *out = to;
  326. ++out;
  327. *dst = out;
  328. return true;
  329. }
  330. static bool escape_null(unsigned char c, char **dst, char *end)
  331. {
  332. char *out = *dst;
  333. if (c)
  334. return false;
  335. if (out < end)
  336. *out = '\\';
  337. ++out;
  338. if (out < end)
  339. *out = '0';
  340. ++out;
  341. *dst = out;
  342. return true;
  343. }
  344. static bool escape_octal(unsigned char c, char **dst, char *end)
  345. {
  346. char *out = *dst;
  347. if (out < end)
  348. *out = '\\';
  349. ++out;
  350. if (out < end)
  351. *out = ((c >> 6) & 0x07) + '0';
  352. ++out;
  353. if (out < end)
  354. *out = ((c >> 3) & 0x07) + '0';
  355. ++out;
  356. if (out < end)
  357. *out = ((c >> 0) & 0x07) + '0';
  358. ++out;
  359. *dst = out;
  360. return true;
  361. }
  362. static bool escape_hex(unsigned char c, char **dst, char *end)
  363. {
  364. char *out = *dst;
  365. if (out < end)
  366. *out = '\\';
  367. ++out;
  368. if (out < end)
  369. *out = 'x';
  370. ++out;
  371. if (out < end)
  372. *out = hex_asc_hi(c);
  373. ++out;
  374. if (out < end)
  375. *out = hex_asc_lo(c);
  376. ++out;
  377. *dst = out;
  378. return true;
  379. }
  380. /**
  381. * string_escape_mem - quote characters in the given memory buffer
  382. * @src: source buffer (unescaped)
  383. * @isz: source buffer size
  384. * @dst: destination buffer (escaped)
  385. * @osz: destination buffer size
  386. * @flags: combination of the flags (bitwise OR):
  387. * %ESCAPE_SPACE: (special white space, not space itself)
  388. * '\f' - form feed
  389. * '\n' - new line
  390. * '\r' - carriage return
  391. * '\t' - horizontal tab
  392. * '\v' - vertical tab
  393. * %ESCAPE_SPECIAL:
  394. * '\\' - backslash
  395. * '\a' - alert (BEL)
  396. * '\e' - escape
  397. * %ESCAPE_NULL:
  398. * '\0' - null
  399. * %ESCAPE_OCTAL:
  400. * '\NNN' - byte with octal value NNN (3 digits)
  401. * %ESCAPE_ANY:
  402. * all previous together
  403. * %ESCAPE_NP:
  404. * escape only non-printable characters (checked by isprint)
  405. * %ESCAPE_ANY_NP:
  406. * all previous together
  407. * %ESCAPE_HEX:
  408. * '\xHH' - byte with hexadecimal value HH (2 digits)
  409. * @only: NULL-terminated string containing characters used to limit
  410. * the selected escape class. If characters are included in @only
  411. * that would not normally be escaped by the classes selected
  412. * in @flags, they will be copied to @dst unescaped.
  413. *
  414. * Description:
  415. * The process of escaping byte buffer includes several parts. They are applied
  416. * in the following sequence.
  417. * 1. The character is matched to the printable class, if asked, and in
  418. * case of match it passes through to the output.
  419. * 2. The character is not matched to the one from @only string and thus
  420. * must go as-is to the output.
  421. * 3. The character is checked if it falls into the class given by @flags.
  422. * %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
  423. * character. Note that they actually can't go together, otherwise
  424. * %ESCAPE_HEX will be ignored.
  425. *
  426. * Caller must provide valid source and destination pointers. Be aware that
  427. * destination buffer will not be NULL-terminated, thus caller have to append
  428. * it if needs.
  429. *
  430. * Return:
  431. * The total size of the escaped output that would be generated for
  432. * the given input and flags. To check whether the output was
  433. * truncated, compare the return value to osz. There is room left in
  434. * dst for a '\0' terminator if and only if ret < osz.
  435. */
  436. int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
  437. unsigned int flags, const char *only)
  438. {
  439. char *p = dst;
  440. char *end = p + osz;
  441. bool is_dict = only && *only;
  442. while (isz--) {
  443. unsigned char c = *src++;
  444. /*
  445. * Apply rules in the following sequence:
  446. * - the character is printable, when @flags has
  447. * %ESCAPE_NP bit set
  448. * - the @only string is supplied and does not contain a
  449. * character under question
  450. * - the character doesn't fall into a class of symbols
  451. * defined by given @flags
  452. * In these cases we just pass through a character to the
  453. * output buffer.
  454. */
  455. if ((flags & ESCAPE_NP && isprint(c)) ||
  456. (is_dict && !strchr(only, c))) {
  457. /* do nothing */
  458. } else {
  459. if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
  460. continue;
  461. if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
  462. continue;
  463. if (flags & ESCAPE_NULL && escape_null(c, &p, end))
  464. continue;
  465. /* ESCAPE_OCTAL and ESCAPE_HEX always go last */
  466. if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
  467. continue;
  468. if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
  469. continue;
  470. }
  471. escape_passthrough(c, &p, end);
  472. }
  473. return p - dst;
  474. }
  475. EXPORT_SYMBOL(string_escape_mem);