string_helpers.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598
  1. /*
  2. * Helpers for formatting and printing strings
  3. *
  4. * Copyright 31 August 2008 James Bottomley
  5. * Copyright (C) 2013, Intel Corporation
  6. */
  7. #include <linux/bug.h>
  8. #include <linux/kernel.h>
  9. #include <linux/math64.h>
  10. #include <linux/export.h>
  11. #include <linux/ctype.h>
  12. #include <linux/errno.h>
  13. #include <linux/mm.h>
  14. #include <linux/slab.h>
  15. #include <linux/string.h>
  16. #include <linux/string_helpers.h>
  17. /**
  18. * string_get_size - get the size in the specified units
  19. * @size: The size to be converted in blocks
  20. * @blk_size: Size of the block (use 1 for size in bytes)
  21. * @units: units to use (powers of 1000 or 1024)
  22. * @buf: buffer to format to
  23. * @len: length of buffer
  24. *
  25. * This function returns a string formatted to 3 significant figures
  26. * giving the size in the required units. @buf should have room for
  27. * at least 9 bytes and will always be zero terminated.
  28. *
  29. */
  30. void string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
  31. char *buf, int len)
  32. {
  33. static const char *const units_10[] = {
  34. "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
  35. };
  36. static const char *const units_2[] = {
  37. "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
  38. };
  39. static const char *const *const units_str[] = {
  40. [STRING_UNITS_10] = units_10,
  41. [STRING_UNITS_2] = units_2,
  42. };
  43. static const unsigned int divisor[] = {
  44. [STRING_UNITS_10] = 1000,
  45. [STRING_UNITS_2] = 1024,
  46. };
  47. static const unsigned int rounding[] = { 500, 50, 5 };
  48. int i = 0, j;
  49. u32 remainder = 0, sf_cap;
  50. char tmp[8];
  51. const char *unit;
  52. tmp[0] = '\0';
  53. if (blk_size == 0)
  54. size = 0;
  55. if (size == 0)
  56. goto out;
  57. /* This is Napier's algorithm. Reduce the original block size to
  58. *
  59. * coefficient * divisor[units]^i
  60. *
  61. * we do the reduction so both coefficients are just under 32 bits so
  62. * that multiplying them together won't overflow 64 bits and we keep
  63. * as much precision as possible in the numbers.
  64. *
  65. * Note: it's safe to throw away the remainders here because all the
  66. * precision is in the coefficients.
  67. */
  68. while (blk_size >> 32) {
  69. do_div(blk_size, divisor[units]);
  70. i++;
  71. }
  72. while (size >> 32) {
  73. do_div(size, divisor[units]);
  74. i++;
  75. }
  76. /* now perform the actual multiplication keeping i as the sum of the
  77. * two logarithms */
  78. size *= blk_size;
  79. /* and logarithmically reduce it until it's just under the divisor */
  80. while (size >= divisor[units]) {
  81. remainder = do_div(size, divisor[units]);
  82. i++;
  83. }
  84. /* work out in j how many digits of precision we need from the
  85. * remainder */
  86. sf_cap = size;
  87. for (j = 0; sf_cap*10 < 1000; j++)
  88. sf_cap *= 10;
  89. if (units == STRING_UNITS_2) {
  90. /* express the remainder as a decimal. It's currently the
  91. * numerator of a fraction whose denominator is
  92. * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
  93. remainder *= 1000;
  94. remainder >>= 10;
  95. }
  96. /* add a 5 to the digit below what will be printed to ensure
  97. * an arithmetical round up and carry it through to size */
  98. remainder += rounding[j];
  99. if (remainder >= 1000) {
  100. remainder -= 1000;
  101. size += 1;
  102. }
  103. if (j) {
  104. snprintf(tmp, sizeof(tmp), ".%03u", remainder);
  105. tmp[j+1] = '\0';
  106. }
  107. out:
  108. if (i >= ARRAY_SIZE(units_2))
  109. unit = "UNK";
  110. else
  111. unit = units_str[units][i];
  112. snprintf(buf, len, "%u%s %s", (u32)size,
  113. tmp, unit);
  114. }
  115. EXPORT_SYMBOL(string_get_size);
  116. static bool unescape_space(char **src, char **dst)
  117. {
  118. char *p = *dst, *q = *src;
  119. switch (*q) {
  120. case 'n':
  121. *p = '\n';
  122. break;
  123. case 'r':
  124. *p = '\r';
  125. break;
  126. case 't':
  127. *p = '\t';
  128. break;
  129. case 'v':
  130. *p = '\v';
  131. break;
  132. case 'f':
  133. *p = '\f';
  134. break;
  135. default:
  136. return false;
  137. }
  138. *dst += 1;
  139. *src += 1;
  140. return true;
  141. }
  142. static bool unescape_octal(char **src, char **dst)
  143. {
  144. char *p = *dst, *q = *src;
  145. u8 num;
  146. if (isodigit(*q) == 0)
  147. return false;
  148. num = (*q++) & 7;
  149. while (num < 32 && isodigit(*q) && (q - *src < 3)) {
  150. num <<= 3;
  151. num += (*q++) & 7;
  152. }
  153. *p = num;
  154. *dst += 1;
  155. *src = q;
  156. return true;
  157. }
  158. static bool unescape_hex(char **src, char **dst)
  159. {
  160. char *p = *dst, *q = *src;
  161. int digit;
  162. u8 num;
  163. if (*q++ != 'x')
  164. return false;
  165. num = digit = hex_to_bin(*q++);
  166. if (digit < 0)
  167. return false;
  168. digit = hex_to_bin(*q);
  169. if (digit >= 0) {
  170. q++;
  171. num = (num << 4) | digit;
  172. }
  173. *p = num;
  174. *dst += 1;
  175. *src = q;
  176. return true;
  177. }
  178. static bool unescape_special(char **src, char **dst)
  179. {
  180. char *p = *dst, *q = *src;
  181. switch (*q) {
  182. case '\"':
  183. *p = '\"';
  184. break;
  185. case '\\':
  186. *p = '\\';
  187. break;
  188. case 'a':
  189. *p = '\a';
  190. break;
  191. case 'e':
  192. *p = '\e';
  193. break;
  194. default:
  195. return false;
  196. }
  197. *dst += 1;
  198. *src += 1;
  199. return true;
  200. }
  201. /**
  202. * string_unescape - unquote characters in the given string
  203. * @src: source buffer (escaped)
  204. * @dst: destination buffer (unescaped)
  205. * @size: size of the destination buffer (0 to unlimit)
  206. * @flags: combination of the flags (bitwise OR):
  207. * %UNESCAPE_SPACE:
  208. * '\f' - form feed
  209. * '\n' - new line
  210. * '\r' - carriage return
  211. * '\t' - horizontal tab
  212. * '\v' - vertical tab
  213. * %UNESCAPE_OCTAL:
  214. * '\NNN' - byte with octal value NNN (1 to 3 digits)
  215. * %UNESCAPE_HEX:
  216. * '\xHH' - byte with hexadecimal value HH (1 to 2 digits)
  217. * %UNESCAPE_SPECIAL:
  218. * '\"' - double quote
  219. * '\\' - backslash
  220. * '\a' - alert (BEL)
  221. * '\e' - escape
  222. * %UNESCAPE_ANY:
  223. * all previous together
  224. *
  225. * Description:
  226. * The function unquotes characters in the given string.
  227. *
  228. * Because the size of the output will be the same as or less than the size of
  229. * the input, the transformation may be performed in place.
  230. *
  231. * Caller must provide valid source and destination pointers. Be aware that
  232. * destination buffer will always be NULL-terminated. Source string must be
  233. * NULL-terminated as well.
  234. *
  235. * Return:
  236. * The amount of the characters processed to the destination buffer excluding
  237. * trailing '\0' is returned.
  238. */
  239. int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
  240. {
  241. char *out = dst;
  242. while (*src && --size) {
  243. if (src[0] == '\\' && src[1] != '\0' && size > 1) {
  244. src++;
  245. size--;
  246. if (flags & UNESCAPE_SPACE &&
  247. unescape_space(&src, &out))
  248. continue;
  249. if (flags & UNESCAPE_OCTAL &&
  250. unescape_octal(&src, &out))
  251. continue;
  252. if (flags & UNESCAPE_HEX &&
  253. unescape_hex(&src, &out))
  254. continue;
  255. if (flags & UNESCAPE_SPECIAL &&
  256. unescape_special(&src, &out))
  257. continue;
  258. *out++ = '\\';
  259. }
  260. *out++ = *src++;
  261. }
  262. *out = '\0';
  263. return out - dst;
  264. }
  265. EXPORT_SYMBOL(string_unescape);
  266. static bool escape_passthrough(unsigned char c, char **dst, char *end)
  267. {
  268. char *out = *dst;
  269. if (out < end)
  270. *out = c;
  271. *dst = out + 1;
  272. return true;
  273. }
  274. static bool escape_space(unsigned char c, char **dst, char *end)
  275. {
  276. char *out = *dst;
  277. unsigned char to;
  278. switch (c) {
  279. case '\n':
  280. to = 'n';
  281. break;
  282. case '\r':
  283. to = 'r';
  284. break;
  285. case '\t':
  286. to = 't';
  287. break;
  288. case '\v':
  289. to = 'v';
  290. break;
  291. case '\f':
  292. to = 'f';
  293. break;
  294. default:
  295. return false;
  296. }
  297. if (out < end)
  298. *out = '\\';
  299. ++out;
  300. if (out < end)
  301. *out = to;
  302. ++out;
  303. *dst = out;
  304. return true;
  305. }
  306. static bool escape_special(unsigned char c, char **dst, char *end)
  307. {
  308. char *out = *dst;
  309. unsigned char to;
  310. switch (c) {
  311. case '\\':
  312. to = '\\';
  313. break;
  314. case '\a':
  315. to = 'a';
  316. break;
  317. case '\e':
  318. to = 'e';
  319. break;
  320. default:
  321. return false;
  322. }
  323. if (out < end)
  324. *out = '\\';
  325. ++out;
  326. if (out < end)
  327. *out = to;
  328. ++out;
  329. *dst = out;
  330. return true;
  331. }
  332. static bool escape_null(unsigned char c, char **dst, char *end)
  333. {
  334. char *out = *dst;
  335. if (c)
  336. return false;
  337. if (out < end)
  338. *out = '\\';
  339. ++out;
  340. if (out < end)
  341. *out = '0';
  342. ++out;
  343. *dst = out;
  344. return true;
  345. }
  346. static bool escape_octal(unsigned char c, char **dst, char *end)
  347. {
  348. char *out = *dst;
  349. if (out < end)
  350. *out = '\\';
  351. ++out;
  352. if (out < end)
  353. *out = ((c >> 6) & 0x07) + '0';
  354. ++out;
  355. if (out < end)
  356. *out = ((c >> 3) & 0x07) + '0';
  357. ++out;
  358. if (out < end)
  359. *out = ((c >> 0) & 0x07) + '0';
  360. ++out;
  361. *dst = out;
  362. return true;
  363. }
  364. static bool escape_hex(unsigned char c, char **dst, char *end)
  365. {
  366. char *out = *dst;
  367. if (out < end)
  368. *out = '\\';
  369. ++out;
  370. if (out < end)
  371. *out = 'x';
  372. ++out;
  373. if (out < end)
  374. *out = hex_asc_hi(c);
  375. ++out;
  376. if (out < end)
  377. *out = hex_asc_lo(c);
  378. ++out;
  379. *dst = out;
  380. return true;
  381. }
  382. /**
  383. * string_escape_mem - quote characters in the given memory buffer
  384. * @src: source buffer (unescaped)
  385. * @isz: source buffer size
  386. * @dst: destination buffer (escaped)
  387. * @osz: destination buffer size
  388. * @flags: combination of the flags (bitwise OR):
  389. * %ESCAPE_SPACE: (special white space, not space itself)
  390. * '\f' - form feed
  391. * '\n' - new line
  392. * '\r' - carriage return
  393. * '\t' - horizontal tab
  394. * '\v' - vertical tab
  395. * %ESCAPE_SPECIAL:
  396. * '\\' - backslash
  397. * '\a' - alert (BEL)
  398. * '\e' - escape
  399. * %ESCAPE_NULL:
  400. * '\0' - null
  401. * %ESCAPE_OCTAL:
  402. * '\NNN' - byte with octal value NNN (3 digits)
  403. * %ESCAPE_ANY:
  404. * all previous together
  405. * %ESCAPE_NP:
  406. * escape only non-printable characters (checked by isprint)
  407. * %ESCAPE_ANY_NP:
  408. * all previous together
  409. * %ESCAPE_HEX:
  410. * '\xHH' - byte with hexadecimal value HH (2 digits)
  411. * @only: NULL-terminated string containing characters used to limit
  412. * the selected escape class. If characters are included in @only
  413. * that would not normally be escaped by the classes selected
  414. * in @flags, they will be copied to @dst unescaped.
  415. *
  416. * Description:
  417. * The process of escaping byte buffer includes several parts. They are applied
  418. * in the following sequence.
  419. * 1. The character is matched to the printable class, if asked, and in
  420. * case of match it passes through to the output.
  421. * 2. The character is not matched to the one from @only string and thus
  422. * must go as-is to the output.
  423. * 3. The character is checked if it falls into the class given by @flags.
  424. * %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
  425. * character. Note that they actually can't go together, otherwise
  426. * %ESCAPE_HEX will be ignored.
  427. *
  428. * Caller must provide valid source and destination pointers. Be aware that
  429. * destination buffer will not be NULL-terminated, thus caller have to append
  430. * it if needs.
  431. *
  432. * Return:
  433. * The total size of the escaped output that would be generated for
  434. * the given input and flags. To check whether the output was
  435. * truncated, compare the return value to osz. There is room left in
  436. * dst for a '\0' terminator if and only if ret < osz.
  437. */
  438. int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
  439. unsigned int flags, const char *only)
  440. {
  441. char *p = dst;
  442. char *end = p + osz;
  443. bool is_dict = only && *only;
  444. while (isz--) {
  445. unsigned char c = *src++;
  446. /*
  447. * Apply rules in the following sequence:
  448. * - the character is printable, when @flags has
  449. * %ESCAPE_NP bit set
  450. * - the @only string is supplied and does not contain a
  451. * character under question
  452. * - the character doesn't fall into a class of symbols
  453. * defined by given @flags
  454. * In these cases we just pass through a character to the
  455. * output buffer.
  456. */
  457. if ((flags & ESCAPE_NP && isprint(c)) ||
  458. (is_dict && !strchr(only, c))) {
  459. /* do nothing */
  460. } else {
  461. if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
  462. continue;
  463. if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
  464. continue;
  465. if (flags & ESCAPE_NULL && escape_null(c, &p, end))
  466. continue;
  467. /* ESCAPE_OCTAL and ESCAPE_HEX always go last */
  468. if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
  469. continue;
  470. if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
  471. continue;
  472. }
  473. escape_passthrough(c, &p, end);
  474. }
  475. return p - dst;
  476. }
  477. EXPORT_SYMBOL(string_escape_mem);
  478. /*
  479. * Return an allocated string that has been escaped of special characters
  480. * and double quotes, making it safe to log in quotes.
  481. */
  482. char *kstrdup_quotable(const char *src, gfp_t gfp)
  483. {
  484. size_t slen, dlen;
  485. char *dst;
  486. const int flags = ESCAPE_HEX;
  487. const char esc[] = "\f\n\r\t\v\a\e\\\"";
  488. if (!src)
  489. return NULL;
  490. slen = strlen(src);
  491. dlen = string_escape_mem(src, slen, NULL, 0, flags, esc);
  492. dst = kmalloc(dlen + 1, gfp);
  493. if (!dst)
  494. return NULL;
  495. WARN_ON(string_escape_mem(src, slen, dst, dlen, flags, esc) != dlen);
  496. dst[dlen] = '\0';
  497. return dst;
  498. }
  499. EXPORT_SYMBOL_GPL(kstrdup_quotable);
  500. /*
  501. * Returns allocated NULL-terminated string containing process
  502. * command line, with inter-argument NULLs replaced with spaces,
  503. * and other special characters escaped.
  504. */
  505. char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp)
  506. {
  507. char *buffer, *quoted;
  508. int i, res;
  509. buffer = kmalloc(PAGE_SIZE, GFP_TEMPORARY);
  510. if (!buffer)
  511. return NULL;
  512. res = get_cmdline(task, buffer, PAGE_SIZE - 1);
  513. buffer[res] = '\0';
  514. /* Collapse trailing NULLs, leave res pointing to last non-NULL. */
  515. while (--res >= 0 && buffer[res] == '\0')
  516. ;
  517. /* Replace inter-argument NULLs. */
  518. for (i = 0; i <= res; i++)
  519. if (buffer[i] == '\0')
  520. buffer[i] = ' ';
  521. /* Make sure result is printable. */
  522. quoted = kstrdup_quotable(buffer, gfp);
  523. kfree(buffer);
  524. return quoted;
  525. }
  526. EXPORT_SYMBOL_GPL(kstrdup_quotable_cmdline);