string_helpers.c 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. /*
  2. * Helpers for formatting and printing strings
  3. *
  4. * Copyright 31 August 2008 James Bottomley
  5. * Copyright (C) 2013, Intel Corporation
  6. */
  7. #include <linux/kernel.h>
  8. #include <linux/math64.h>
  9. #include <linux/export.h>
  10. #include <linux/ctype.h>
  11. #include <linux/errno.h>
  12. #include <linux/string.h>
  13. #include <linux/string_helpers.h>
  14. /**
  15. * string_get_size - get the size in the specified units
  16. * @size: The size to be converted
  17. * @units: units to use (powers of 1000 or 1024)
  18. * @buf: buffer to format to
  19. * @len: length of buffer
  20. *
  21. * This function returns a string formatted to 3 significant figures
  22. * giving the size in the required units. @buf should have room for
  23. * at least 9 bytes and will always be zero terminated.
  24. *
  25. */
  26. void string_get_size(u64 size, const enum string_size_units units,
  27. char *buf, int len)
  28. {
  29. static const char *const units_10[] = {
  30. "B", "kB", "MB", "GB", "TB", "PB", "EB"
  31. };
  32. static const char *const units_2[] = {
  33. "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"
  34. };
  35. static const char *const *const units_str[] = {
  36. [STRING_UNITS_10] = units_10,
  37. [STRING_UNITS_2] = units_2,
  38. };
  39. static const unsigned int divisor[] = {
  40. [STRING_UNITS_10] = 1000,
  41. [STRING_UNITS_2] = 1024,
  42. };
  43. int i, j;
  44. u32 remainder = 0, sf_cap;
  45. char tmp[8];
  46. tmp[0] = '\0';
  47. i = 0;
  48. if (size >= divisor[units]) {
  49. while (size >= divisor[units]) {
  50. remainder = do_div(size, divisor[units]);
  51. i++;
  52. }
  53. sf_cap = size;
  54. for (j = 0; sf_cap*10 < 1000; j++)
  55. sf_cap *= 10;
  56. if (j) {
  57. remainder *= 1000;
  58. remainder /= divisor[units];
  59. snprintf(tmp, sizeof(tmp), ".%03u", remainder);
  60. tmp[j+1] = '\0';
  61. }
  62. }
  63. snprintf(buf, len, "%u%s %s", (u32)size,
  64. tmp, units_str[units][i]);
  65. }
  66. EXPORT_SYMBOL(string_get_size);
  67. static bool unescape_space(char **src, char **dst)
  68. {
  69. char *p = *dst, *q = *src;
  70. switch (*q) {
  71. case 'n':
  72. *p = '\n';
  73. break;
  74. case 'r':
  75. *p = '\r';
  76. break;
  77. case 't':
  78. *p = '\t';
  79. break;
  80. case 'v':
  81. *p = '\v';
  82. break;
  83. case 'f':
  84. *p = '\f';
  85. break;
  86. default:
  87. return false;
  88. }
  89. *dst += 1;
  90. *src += 1;
  91. return true;
  92. }
  93. static bool unescape_octal(char **src, char **dst)
  94. {
  95. char *p = *dst, *q = *src;
  96. u8 num;
  97. if (isodigit(*q) == 0)
  98. return false;
  99. num = (*q++) & 7;
  100. while (num < 32 && isodigit(*q) && (q - *src < 3)) {
  101. num <<= 3;
  102. num += (*q++) & 7;
  103. }
  104. *p = num;
  105. *dst += 1;
  106. *src = q;
  107. return true;
  108. }
  109. static bool unescape_hex(char **src, char **dst)
  110. {
  111. char *p = *dst, *q = *src;
  112. int digit;
  113. u8 num;
  114. if (*q++ != 'x')
  115. return false;
  116. num = digit = hex_to_bin(*q++);
  117. if (digit < 0)
  118. return false;
  119. digit = hex_to_bin(*q);
  120. if (digit >= 0) {
  121. q++;
  122. num = (num << 4) | digit;
  123. }
  124. *p = num;
  125. *dst += 1;
  126. *src = q;
  127. return true;
  128. }
  129. static bool unescape_special(char **src, char **dst)
  130. {
  131. char *p = *dst, *q = *src;
  132. switch (*q) {
  133. case '\"':
  134. *p = '\"';
  135. break;
  136. case '\\':
  137. *p = '\\';
  138. break;
  139. case 'a':
  140. *p = '\a';
  141. break;
  142. case 'e':
  143. *p = '\e';
  144. break;
  145. default:
  146. return false;
  147. }
  148. *dst += 1;
  149. *src += 1;
  150. return true;
  151. }
  152. /**
  153. * string_unescape - unquote characters in the given string
  154. * @src: source buffer (escaped)
  155. * @dst: destination buffer (unescaped)
  156. * @size: size of the destination buffer (0 to unlimit)
  157. * @flags: combination of the flags (bitwise OR):
  158. * %UNESCAPE_SPACE:
  159. * '\f' - form feed
  160. * '\n' - new line
  161. * '\r' - carriage return
  162. * '\t' - horizontal tab
  163. * '\v' - vertical tab
  164. * %UNESCAPE_OCTAL:
  165. * '\NNN' - byte with octal value NNN (1 to 3 digits)
  166. * %UNESCAPE_HEX:
  167. * '\xHH' - byte with hexadecimal value HH (1 to 2 digits)
  168. * %UNESCAPE_SPECIAL:
  169. * '\"' - double quote
  170. * '\\' - backslash
  171. * '\a' - alert (BEL)
  172. * '\e' - escape
  173. * %UNESCAPE_ANY:
  174. * all previous together
  175. *
  176. * Description:
  177. * The function unquotes characters in the given string.
  178. *
  179. * Because the size of the output will be the same as or less than the size of
  180. * the input, the transformation may be performed in place.
  181. *
  182. * Caller must provide valid source and destination pointers. Be aware that
  183. * destination buffer will always be NULL-terminated. Source string must be
  184. * NULL-terminated as well.
  185. *
  186. * Return:
  187. * The amount of the characters processed to the destination buffer excluding
  188. * trailing '\0' is returned.
  189. */
  190. int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
  191. {
  192. char *out = dst;
  193. while (*src && --size) {
  194. if (src[0] == '\\' && src[1] != '\0' && size > 1) {
  195. src++;
  196. size--;
  197. if (flags & UNESCAPE_SPACE &&
  198. unescape_space(&src, &out))
  199. continue;
  200. if (flags & UNESCAPE_OCTAL &&
  201. unescape_octal(&src, &out))
  202. continue;
  203. if (flags & UNESCAPE_HEX &&
  204. unescape_hex(&src, &out))
  205. continue;
  206. if (flags & UNESCAPE_SPECIAL &&
  207. unescape_special(&src, &out))
  208. continue;
  209. *out++ = '\\';
  210. }
  211. *out++ = *src++;
  212. }
  213. *out = '\0';
  214. return out - dst;
  215. }
  216. EXPORT_SYMBOL(string_unescape);
  217. static bool escape_passthrough(unsigned char c, char **dst, char *end)
  218. {
  219. char *out = *dst;
  220. if (out < end)
  221. *out = c;
  222. *dst = out + 1;
  223. return true;
  224. }
  225. static bool escape_space(unsigned char c, char **dst, char *end)
  226. {
  227. char *out = *dst;
  228. unsigned char to;
  229. switch (c) {
  230. case '\n':
  231. to = 'n';
  232. break;
  233. case '\r':
  234. to = 'r';
  235. break;
  236. case '\t':
  237. to = 't';
  238. break;
  239. case '\v':
  240. to = 'v';
  241. break;
  242. case '\f':
  243. to = 'f';
  244. break;
  245. default:
  246. return false;
  247. }
  248. if (out < end)
  249. *out = '\\';
  250. ++out;
  251. if (out < end)
  252. *out = to;
  253. ++out;
  254. *dst = out;
  255. return true;
  256. }
  257. static bool escape_special(unsigned char c, char **dst, char *end)
  258. {
  259. char *out = *dst;
  260. unsigned char to;
  261. switch (c) {
  262. case '\\':
  263. to = '\\';
  264. break;
  265. case '\a':
  266. to = 'a';
  267. break;
  268. case '\e':
  269. to = 'e';
  270. break;
  271. default:
  272. return false;
  273. }
  274. if (out < end)
  275. *out = '\\';
  276. ++out;
  277. if (out < end)
  278. *out = to;
  279. ++out;
  280. *dst = out;
  281. return true;
  282. }
  283. static bool escape_null(unsigned char c, char **dst, char *end)
  284. {
  285. char *out = *dst;
  286. if (c)
  287. return false;
  288. if (out < end)
  289. *out = '\\';
  290. ++out;
  291. if (out < end)
  292. *out = '0';
  293. ++out;
  294. *dst = out;
  295. return true;
  296. }
  297. static bool escape_octal(unsigned char c, char **dst, char *end)
  298. {
  299. char *out = *dst;
  300. if (out < end)
  301. *out = '\\';
  302. ++out;
  303. if (out < end)
  304. *out = ((c >> 6) & 0x07) + '0';
  305. ++out;
  306. if (out < end)
  307. *out = ((c >> 3) & 0x07) + '0';
  308. ++out;
  309. if (out < end)
  310. *out = ((c >> 0) & 0x07) + '0';
  311. ++out;
  312. *dst = out;
  313. return true;
  314. }
  315. static bool escape_hex(unsigned char c, char **dst, char *end)
  316. {
  317. char *out = *dst;
  318. if (out < end)
  319. *out = '\\';
  320. ++out;
  321. if (out < end)
  322. *out = 'x';
  323. ++out;
  324. if (out < end)
  325. *out = hex_asc_hi(c);
  326. ++out;
  327. if (out < end)
  328. *out = hex_asc_lo(c);
  329. ++out;
  330. *dst = out;
  331. return true;
  332. }
  333. /**
  334. * string_escape_mem - quote characters in the given memory buffer
  335. * @src: source buffer (unescaped)
  336. * @isz: source buffer size
  337. * @dst: destination buffer (escaped)
  338. * @osz: destination buffer size
  339. * @flags: combination of the flags (bitwise OR):
  340. * %ESCAPE_SPACE:
  341. * '\f' - form feed
  342. * '\n' - new line
  343. * '\r' - carriage return
  344. * '\t' - horizontal tab
  345. * '\v' - vertical tab
  346. * %ESCAPE_SPECIAL:
  347. * '\\' - backslash
  348. * '\a' - alert (BEL)
  349. * '\e' - escape
  350. * %ESCAPE_NULL:
  351. * '\0' - null
  352. * %ESCAPE_OCTAL:
  353. * '\NNN' - byte with octal value NNN (3 digits)
  354. * %ESCAPE_ANY:
  355. * all previous together
  356. * %ESCAPE_NP:
  357. * escape only non-printable characters (checked by isprint)
  358. * %ESCAPE_ANY_NP:
  359. * all previous together
  360. * %ESCAPE_HEX:
  361. * '\xHH' - byte with hexadecimal value HH (2 digits)
  362. * @esc: NULL-terminated string of characters any of which, if found in
  363. * the source, has to be escaped
  364. *
  365. * Description:
  366. * The process of escaping byte buffer includes several parts. They are applied
  367. * in the following sequence.
  368. * 1. The character is matched to the printable class, if asked, and in
  369. * case of match it passes through to the output.
  370. * 2. The character is not matched to the one from @esc string and thus
  371. * must go as is to the output.
  372. * 3. The character is checked if it falls into the class given by @flags.
  373. * %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
  374. * character. Note that they actually can't go together, otherwise
  375. * %ESCAPE_HEX will be ignored.
  376. *
  377. * Caller must provide valid source and destination pointers. Be aware that
  378. * destination buffer will not be NULL-terminated, thus caller have to append
  379. * it if needs.
  380. *
  381. * Return:
  382. * The total size of the escaped output that would be generated for
  383. * the given input and flags. To check whether the output was
  384. * truncated, compare the return value to osz. There is room left in
  385. * dst for a '\0' terminator if and only if ret < osz.
  386. */
  387. int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz,
  388. unsigned int flags, const char *esc)
  389. {
  390. char *p = dst;
  391. char *end = p + osz;
  392. bool is_dict = esc && *esc;
  393. while (isz--) {
  394. unsigned char c = *src++;
  395. /*
  396. * Apply rules in the following sequence:
  397. * - the character is printable, when @flags has
  398. * %ESCAPE_NP bit set
  399. * - the @esc string is supplied and does not contain a
  400. * character under question
  401. * - the character doesn't fall into a class of symbols
  402. * defined by given @flags
  403. * In these cases we just pass through a character to the
  404. * output buffer.
  405. */
  406. if ((flags & ESCAPE_NP && isprint(c)) ||
  407. (is_dict && !strchr(esc, c))) {
  408. /* do nothing */
  409. } else {
  410. if (flags & ESCAPE_SPACE && escape_space(c, &p, end))
  411. continue;
  412. if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end))
  413. continue;
  414. if (flags & ESCAPE_NULL && escape_null(c, &p, end))
  415. continue;
  416. /* ESCAPE_OCTAL and ESCAPE_HEX always go last */
  417. if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end))
  418. continue;
  419. if (flags & ESCAPE_HEX && escape_hex(c, &p, end))
  420. continue;
  421. }
  422. escape_passthrough(c, &p, end);
  423. }
  424. return p - dst;
  425. }
  426. EXPORT_SYMBOL(string_escape_mem);