xattr.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191
  1. // SPDX-License-Identifier: GPL-2.0
  2. #include <linux/ceph/ceph_debug.h>
  3. #include <linux/ceph/pagelist.h>
  4. #include "super.h"
  5. #include "mds_client.h"
  6. #include <linux/ceph/decode.h>
  7. #include <linux/xattr.h>
  8. #include <linux/posix_acl_xattr.h>
  9. #include <linux/slab.h>
  10. #define XATTR_CEPH_PREFIX "ceph."
  11. #define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
  12. static int __remove_xattr(struct ceph_inode_info *ci,
  13. struct ceph_inode_xattr *xattr);
  14. static const struct xattr_handler ceph_other_xattr_handler;
  15. /*
  16. * List of handlers for synthetic system.* attributes. Other
  17. * attributes are handled directly.
  18. */
  19. const struct xattr_handler *ceph_xattr_handlers[] = {
  20. #ifdef CONFIG_CEPH_FS_POSIX_ACL
  21. &posix_acl_access_xattr_handler,
  22. &posix_acl_default_xattr_handler,
  23. #endif
  24. &ceph_other_xattr_handler,
  25. NULL,
  26. };
  27. static bool ceph_is_valid_xattr(const char *name)
  28. {
  29. return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
  30. !strncmp(name, XATTR_SECURITY_PREFIX,
  31. XATTR_SECURITY_PREFIX_LEN) ||
  32. !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
  33. !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
  34. }
  35. /*
  36. * These define virtual xattrs exposing the recursive directory
  37. * statistics and layout metadata.
  38. */
  39. struct ceph_vxattr {
  40. char *name;
  41. size_t name_size; /* strlen(name) + 1 (for '\0') */
  42. size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
  43. size_t size);
  44. bool (*exists_cb)(struct ceph_inode_info *ci);
  45. unsigned int flags;
  46. };
  47. #define VXATTR_FLAG_READONLY (1<<0)
  48. #define VXATTR_FLAG_HIDDEN (1<<1)
  49. #define VXATTR_FLAG_RSTAT (1<<2)
  50. /* layouts */
  51. static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
  52. {
  53. struct ceph_file_layout *fl = &ci->i_layout;
  54. return (fl->stripe_unit > 0 || fl->stripe_count > 0 ||
  55. fl->object_size > 0 || fl->pool_id >= 0 ||
  56. rcu_dereference_raw(fl->pool_ns) != NULL);
  57. }
  58. static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
  59. size_t size)
  60. {
  61. struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
  62. struct ceph_osd_client *osdc = &fsc->client->osdc;
  63. struct ceph_string *pool_ns;
  64. s64 pool = ci->i_layout.pool_id;
  65. const char *pool_name;
  66. const char *ns_field = " pool_namespace=";
  67. char buf[128];
  68. size_t len, total_len = 0;
  69. int ret;
  70. pool_ns = ceph_try_get_string(ci->i_layout.pool_ns);
  71. dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
  72. down_read(&osdc->lock);
  73. pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
  74. if (pool_name) {
  75. len = snprintf(buf, sizeof(buf),
  76. "stripe_unit=%u stripe_count=%u object_size=%u pool=",
  77. ci->i_layout.stripe_unit, ci->i_layout.stripe_count,
  78. ci->i_layout.object_size);
  79. total_len = len + strlen(pool_name);
  80. } else {
  81. len = snprintf(buf, sizeof(buf),
  82. "stripe_unit=%u stripe_count=%u object_size=%u pool=%lld",
  83. ci->i_layout.stripe_unit, ci->i_layout.stripe_count,
  84. ci->i_layout.object_size, (unsigned long long)pool);
  85. total_len = len;
  86. }
  87. if (pool_ns)
  88. total_len += strlen(ns_field) + pool_ns->len;
  89. if (!size) {
  90. ret = total_len;
  91. } else if (total_len > size) {
  92. ret = -ERANGE;
  93. } else {
  94. memcpy(val, buf, len);
  95. ret = len;
  96. if (pool_name) {
  97. len = strlen(pool_name);
  98. memcpy(val + ret, pool_name, len);
  99. ret += len;
  100. }
  101. if (pool_ns) {
  102. len = strlen(ns_field);
  103. memcpy(val + ret, ns_field, len);
  104. ret += len;
  105. memcpy(val + ret, pool_ns->str, pool_ns->len);
  106. ret += pool_ns->len;
  107. }
  108. }
  109. up_read(&osdc->lock);
  110. ceph_put_string(pool_ns);
  111. return ret;
  112. }
  113. static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
  114. char *val, size_t size)
  115. {
  116. return snprintf(val, size, "%u", ci->i_layout.stripe_unit);
  117. }
  118. static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
  119. char *val, size_t size)
  120. {
  121. return snprintf(val, size, "%u", ci->i_layout.stripe_count);
  122. }
  123. static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
  124. char *val, size_t size)
  125. {
  126. return snprintf(val, size, "%u", ci->i_layout.object_size);
  127. }
  128. static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
  129. char *val, size_t size)
  130. {
  131. int ret;
  132. struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
  133. struct ceph_osd_client *osdc = &fsc->client->osdc;
  134. s64 pool = ci->i_layout.pool_id;
  135. const char *pool_name;
  136. down_read(&osdc->lock);
  137. pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
  138. if (pool_name)
  139. ret = snprintf(val, size, "%s", pool_name);
  140. else
  141. ret = snprintf(val, size, "%lld", (unsigned long long)pool);
  142. up_read(&osdc->lock);
  143. return ret;
  144. }
  145. static size_t ceph_vxattrcb_layout_pool_namespace(struct ceph_inode_info *ci,
  146. char *val, size_t size)
  147. {
  148. int ret = 0;
  149. struct ceph_string *ns = ceph_try_get_string(ci->i_layout.pool_ns);
  150. if (ns) {
  151. ret = snprintf(val, size, "%.*s", (int)ns->len, ns->str);
  152. ceph_put_string(ns);
  153. }
  154. return ret;
  155. }
  156. /* directories */
  157. static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
  158. size_t size)
  159. {
  160. return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
  161. }
  162. static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
  163. size_t size)
  164. {
  165. return snprintf(val, size, "%lld", ci->i_files);
  166. }
  167. static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
  168. size_t size)
  169. {
  170. return snprintf(val, size, "%lld", ci->i_subdirs);
  171. }
  172. static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
  173. size_t size)
  174. {
  175. return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
  176. }
  177. static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
  178. size_t size)
  179. {
  180. return snprintf(val, size, "%lld", ci->i_rfiles);
  181. }
  182. static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
  183. size_t size)
  184. {
  185. return snprintf(val, size, "%lld", ci->i_rsubdirs);
  186. }
  187. static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
  188. size_t size)
  189. {
  190. return snprintf(val, size, "%lld", ci->i_rbytes);
  191. }
  192. static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
  193. size_t size)
  194. {
  195. return snprintf(val, size, "%lld.09%ld", ci->i_rctime.tv_sec,
  196. ci->i_rctime.tv_nsec);
  197. }
  198. /* quotas */
  199. static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci)
  200. {
  201. bool ret = false;
  202. spin_lock(&ci->i_ceph_lock);
  203. if ((ci->i_max_files || ci->i_max_bytes) &&
  204. ci->i_vino.snap == CEPH_NOSNAP &&
  205. ci->i_snap_realm &&
  206. ci->i_snap_realm->ino == ci->i_vino.ino)
  207. ret = true;
  208. spin_unlock(&ci->i_ceph_lock);
  209. return ret;
  210. }
  211. static size_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val,
  212. size_t size)
  213. {
  214. return snprintf(val, size, "max_bytes=%llu max_files=%llu",
  215. ci->i_max_bytes, ci->i_max_files);
  216. }
  217. static size_t ceph_vxattrcb_quota_max_bytes(struct ceph_inode_info *ci,
  218. char *val, size_t size)
  219. {
  220. return snprintf(val, size, "%llu", ci->i_max_bytes);
  221. }
  222. static size_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci,
  223. char *val, size_t size)
  224. {
  225. return snprintf(val, size, "%llu", ci->i_max_files);
  226. }
  227. #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
  228. #define CEPH_XATTR_NAME2(_type, _name, _name2) \
  229. XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
  230. #define XATTR_NAME_CEPH(_type, _name, _flags) \
  231. { \
  232. .name = CEPH_XATTR_NAME(_type, _name), \
  233. .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
  234. .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
  235. .exists_cb = NULL, \
  236. .flags = (VXATTR_FLAG_READONLY | _flags), \
  237. }
  238. #define XATTR_RSTAT_FIELD(_type, _name) \
  239. XATTR_NAME_CEPH(_type, _name, VXATTR_FLAG_RSTAT)
  240. #define XATTR_LAYOUT_FIELD(_type, _name, _field) \
  241. { \
  242. .name = CEPH_XATTR_NAME2(_type, _name, _field), \
  243. .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
  244. .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
  245. .exists_cb = ceph_vxattrcb_layout_exists, \
  246. .flags = VXATTR_FLAG_HIDDEN, \
  247. }
  248. #define XATTR_QUOTA_FIELD(_type, _name) \
  249. { \
  250. .name = CEPH_XATTR_NAME(_type, _name), \
  251. .name_size = sizeof(CEPH_XATTR_NAME(_type, _name)), \
  252. .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
  253. .exists_cb = ceph_vxattrcb_quota_exists, \
  254. .flags = VXATTR_FLAG_HIDDEN, \
  255. }
  256. static struct ceph_vxattr ceph_dir_vxattrs[] = {
  257. {
  258. .name = "ceph.dir.layout",
  259. .name_size = sizeof("ceph.dir.layout"),
  260. .getxattr_cb = ceph_vxattrcb_layout,
  261. .exists_cb = ceph_vxattrcb_layout_exists,
  262. .flags = VXATTR_FLAG_HIDDEN,
  263. },
  264. XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
  265. XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
  266. XATTR_LAYOUT_FIELD(dir, layout, object_size),
  267. XATTR_LAYOUT_FIELD(dir, layout, pool),
  268. XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
  269. XATTR_NAME_CEPH(dir, entries, 0),
  270. XATTR_NAME_CEPH(dir, files, 0),
  271. XATTR_NAME_CEPH(dir, subdirs, 0),
  272. XATTR_RSTAT_FIELD(dir, rentries),
  273. XATTR_RSTAT_FIELD(dir, rfiles),
  274. XATTR_RSTAT_FIELD(dir, rsubdirs),
  275. XATTR_RSTAT_FIELD(dir, rbytes),
  276. XATTR_RSTAT_FIELD(dir, rctime),
  277. {
  278. .name = "ceph.quota",
  279. .name_size = sizeof("ceph.quota"),
  280. .getxattr_cb = ceph_vxattrcb_quota,
  281. .exists_cb = ceph_vxattrcb_quota_exists,
  282. .flags = VXATTR_FLAG_HIDDEN,
  283. },
  284. XATTR_QUOTA_FIELD(quota, max_bytes),
  285. XATTR_QUOTA_FIELD(quota, max_files),
  286. { .name = NULL, 0 } /* Required table terminator */
  287. };
  288. static size_t ceph_dir_vxattrs_name_size; /* total size of all names */
  289. /* files */
  290. static struct ceph_vxattr ceph_file_vxattrs[] = {
  291. {
  292. .name = "ceph.file.layout",
  293. .name_size = sizeof("ceph.file.layout"),
  294. .getxattr_cb = ceph_vxattrcb_layout,
  295. .exists_cb = ceph_vxattrcb_layout_exists,
  296. .flags = VXATTR_FLAG_HIDDEN,
  297. },
  298. XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
  299. XATTR_LAYOUT_FIELD(file, layout, stripe_count),
  300. XATTR_LAYOUT_FIELD(file, layout, object_size),
  301. XATTR_LAYOUT_FIELD(file, layout, pool),
  302. XATTR_LAYOUT_FIELD(file, layout, pool_namespace),
  303. { .name = NULL, 0 } /* Required table terminator */
  304. };
  305. static size_t ceph_file_vxattrs_name_size; /* total size of all names */
  306. static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
  307. {
  308. if (S_ISDIR(inode->i_mode))
  309. return ceph_dir_vxattrs;
  310. else if (S_ISREG(inode->i_mode))
  311. return ceph_file_vxattrs;
  312. return NULL;
  313. }
  314. static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
  315. {
  316. if (vxattrs == ceph_dir_vxattrs)
  317. return ceph_dir_vxattrs_name_size;
  318. if (vxattrs == ceph_file_vxattrs)
  319. return ceph_file_vxattrs_name_size;
  320. BUG_ON(vxattrs);
  321. return 0;
  322. }
  323. /*
  324. * Compute the aggregate size (including terminating '\0') of all
  325. * virtual extended attribute names in the given vxattr table.
  326. */
  327. static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
  328. {
  329. struct ceph_vxattr *vxattr;
  330. size_t size = 0;
  331. for (vxattr = vxattrs; vxattr->name; vxattr++) {
  332. if (!(vxattr->flags & VXATTR_FLAG_HIDDEN))
  333. size += vxattr->name_size;
  334. }
  335. return size;
  336. }
  337. /* Routines called at initialization and exit time */
  338. void __init ceph_xattr_init(void)
  339. {
  340. ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
  341. ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
  342. }
  343. void ceph_xattr_exit(void)
  344. {
  345. ceph_dir_vxattrs_name_size = 0;
  346. ceph_file_vxattrs_name_size = 0;
  347. }
  348. static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
  349. const char *name)
  350. {
  351. struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
  352. if (vxattr) {
  353. while (vxattr->name) {
  354. if (!strcmp(vxattr->name, name))
  355. return vxattr;
  356. vxattr++;
  357. }
  358. }
  359. return NULL;
  360. }
  361. static int __set_xattr(struct ceph_inode_info *ci,
  362. const char *name, int name_len,
  363. const char *val, int val_len,
  364. int flags, int update_xattr,
  365. struct ceph_inode_xattr **newxattr)
  366. {
  367. struct rb_node **p;
  368. struct rb_node *parent = NULL;
  369. struct ceph_inode_xattr *xattr = NULL;
  370. int c;
  371. int new = 0;
  372. p = &ci->i_xattrs.index.rb_node;
  373. while (*p) {
  374. parent = *p;
  375. xattr = rb_entry(parent, struct ceph_inode_xattr, node);
  376. c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
  377. if (c < 0)
  378. p = &(*p)->rb_left;
  379. else if (c > 0)
  380. p = &(*p)->rb_right;
  381. else {
  382. if (name_len == xattr->name_len)
  383. break;
  384. else if (name_len < xattr->name_len)
  385. p = &(*p)->rb_left;
  386. else
  387. p = &(*p)->rb_right;
  388. }
  389. xattr = NULL;
  390. }
  391. if (update_xattr) {
  392. int err = 0;
  393. if (xattr && (flags & XATTR_CREATE))
  394. err = -EEXIST;
  395. else if (!xattr && (flags & XATTR_REPLACE))
  396. err = -ENODATA;
  397. if (err) {
  398. kfree(name);
  399. kfree(val);
  400. kfree(*newxattr);
  401. return err;
  402. }
  403. if (update_xattr < 0) {
  404. if (xattr)
  405. __remove_xattr(ci, xattr);
  406. kfree(name);
  407. kfree(*newxattr);
  408. return 0;
  409. }
  410. }
  411. if (!xattr) {
  412. new = 1;
  413. xattr = *newxattr;
  414. xattr->name = name;
  415. xattr->name_len = name_len;
  416. xattr->should_free_name = update_xattr;
  417. ci->i_xattrs.count++;
  418. dout("__set_xattr count=%d\n", ci->i_xattrs.count);
  419. } else {
  420. kfree(*newxattr);
  421. *newxattr = NULL;
  422. if (xattr->should_free_val)
  423. kfree((void *)xattr->val);
  424. if (update_xattr) {
  425. kfree((void *)name);
  426. name = xattr->name;
  427. }
  428. ci->i_xattrs.names_size -= xattr->name_len;
  429. ci->i_xattrs.vals_size -= xattr->val_len;
  430. }
  431. ci->i_xattrs.names_size += name_len;
  432. ci->i_xattrs.vals_size += val_len;
  433. if (val)
  434. xattr->val = val;
  435. else
  436. xattr->val = "";
  437. xattr->val_len = val_len;
  438. xattr->dirty = update_xattr;
  439. xattr->should_free_val = (val && update_xattr);
  440. if (new) {
  441. rb_link_node(&xattr->node, parent, p);
  442. rb_insert_color(&xattr->node, &ci->i_xattrs.index);
  443. dout("__set_xattr_val p=%p\n", p);
  444. }
  445. dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
  446. ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
  447. return 0;
  448. }
  449. static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
  450. const char *name)
  451. {
  452. struct rb_node **p;
  453. struct rb_node *parent = NULL;
  454. struct ceph_inode_xattr *xattr = NULL;
  455. int name_len = strlen(name);
  456. int c;
  457. p = &ci->i_xattrs.index.rb_node;
  458. while (*p) {
  459. parent = *p;
  460. xattr = rb_entry(parent, struct ceph_inode_xattr, node);
  461. c = strncmp(name, xattr->name, xattr->name_len);
  462. if (c == 0 && name_len > xattr->name_len)
  463. c = 1;
  464. if (c < 0)
  465. p = &(*p)->rb_left;
  466. else if (c > 0)
  467. p = &(*p)->rb_right;
  468. else {
  469. dout("__get_xattr %s: found %.*s\n", name,
  470. xattr->val_len, xattr->val);
  471. return xattr;
  472. }
  473. }
  474. dout("__get_xattr %s: not found\n", name);
  475. return NULL;
  476. }
  477. static void __free_xattr(struct ceph_inode_xattr *xattr)
  478. {
  479. BUG_ON(!xattr);
  480. if (xattr->should_free_name)
  481. kfree((void *)xattr->name);
  482. if (xattr->should_free_val)
  483. kfree((void *)xattr->val);
  484. kfree(xattr);
  485. }
  486. static int __remove_xattr(struct ceph_inode_info *ci,
  487. struct ceph_inode_xattr *xattr)
  488. {
  489. if (!xattr)
  490. return -ENODATA;
  491. rb_erase(&xattr->node, &ci->i_xattrs.index);
  492. if (xattr->should_free_name)
  493. kfree((void *)xattr->name);
  494. if (xattr->should_free_val)
  495. kfree((void *)xattr->val);
  496. ci->i_xattrs.names_size -= xattr->name_len;
  497. ci->i_xattrs.vals_size -= xattr->val_len;
  498. ci->i_xattrs.count--;
  499. kfree(xattr);
  500. return 0;
  501. }
  502. static char *__copy_xattr_names(struct ceph_inode_info *ci,
  503. char *dest)
  504. {
  505. struct rb_node *p;
  506. struct ceph_inode_xattr *xattr = NULL;
  507. p = rb_first(&ci->i_xattrs.index);
  508. dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
  509. while (p) {
  510. xattr = rb_entry(p, struct ceph_inode_xattr, node);
  511. memcpy(dest, xattr->name, xattr->name_len);
  512. dest[xattr->name_len] = '\0';
  513. dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
  514. xattr->name_len, ci->i_xattrs.names_size);
  515. dest += xattr->name_len + 1;
  516. p = rb_next(p);
  517. }
  518. return dest;
  519. }
  520. void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
  521. {
  522. struct rb_node *p, *tmp;
  523. struct ceph_inode_xattr *xattr = NULL;
  524. p = rb_first(&ci->i_xattrs.index);
  525. dout("__ceph_destroy_xattrs p=%p\n", p);
  526. while (p) {
  527. xattr = rb_entry(p, struct ceph_inode_xattr, node);
  528. tmp = p;
  529. p = rb_next(tmp);
  530. dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
  531. xattr->name_len, xattr->name);
  532. rb_erase(tmp, &ci->i_xattrs.index);
  533. __free_xattr(xattr);
  534. }
  535. ci->i_xattrs.names_size = 0;
  536. ci->i_xattrs.vals_size = 0;
  537. ci->i_xattrs.index_version = 0;
  538. ci->i_xattrs.count = 0;
  539. ci->i_xattrs.index = RB_ROOT;
  540. }
  541. static int __build_xattrs(struct inode *inode)
  542. __releases(ci->i_ceph_lock)
  543. __acquires(ci->i_ceph_lock)
  544. {
  545. u32 namelen;
  546. u32 numattr = 0;
  547. void *p, *end;
  548. u32 len;
  549. const char *name, *val;
  550. struct ceph_inode_info *ci = ceph_inode(inode);
  551. int xattr_version;
  552. struct ceph_inode_xattr **xattrs = NULL;
  553. int err = 0;
  554. int i;
  555. dout("__build_xattrs() len=%d\n",
  556. ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
  557. if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
  558. return 0; /* already built */
  559. __ceph_destroy_xattrs(ci);
  560. start:
  561. /* updated internal xattr rb tree */
  562. if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
  563. p = ci->i_xattrs.blob->vec.iov_base;
  564. end = p + ci->i_xattrs.blob->vec.iov_len;
  565. ceph_decode_32_safe(&p, end, numattr, bad);
  566. xattr_version = ci->i_xattrs.version;
  567. spin_unlock(&ci->i_ceph_lock);
  568. xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *),
  569. GFP_NOFS);
  570. err = -ENOMEM;
  571. if (!xattrs)
  572. goto bad_lock;
  573. for (i = 0; i < numattr; i++) {
  574. xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
  575. GFP_NOFS);
  576. if (!xattrs[i])
  577. goto bad_lock;
  578. }
  579. spin_lock(&ci->i_ceph_lock);
  580. if (ci->i_xattrs.version != xattr_version) {
  581. /* lost a race, retry */
  582. for (i = 0; i < numattr; i++)
  583. kfree(xattrs[i]);
  584. kfree(xattrs);
  585. xattrs = NULL;
  586. goto start;
  587. }
  588. err = -EIO;
  589. while (numattr--) {
  590. ceph_decode_32_safe(&p, end, len, bad);
  591. namelen = len;
  592. name = p;
  593. p += len;
  594. ceph_decode_32_safe(&p, end, len, bad);
  595. val = p;
  596. p += len;
  597. err = __set_xattr(ci, name, namelen, val, len,
  598. 0, 0, &xattrs[numattr]);
  599. if (err < 0)
  600. goto bad;
  601. }
  602. kfree(xattrs);
  603. }
  604. ci->i_xattrs.index_version = ci->i_xattrs.version;
  605. ci->i_xattrs.dirty = false;
  606. return err;
  607. bad_lock:
  608. spin_lock(&ci->i_ceph_lock);
  609. bad:
  610. if (xattrs) {
  611. for (i = 0; i < numattr; i++)
  612. kfree(xattrs[i]);
  613. kfree(xattrs);
  614. }
  615. ci->i_xattrs.names_size = 0;
  616. return err;
  617. }
  618. static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
  619. int val_size)
  620. {
  621. /*
  622. * 4 bytes for the length, and additional 4 bytes per each xattr name,
  623. * 4 bytes per each value
  624. */
  625. int size = 4 + ci->i_xattrs.count*(4 + 4) +
  626. ci->i_xattrs.names_size +
  627. ci->i_xattrs.vals_size;
  628. dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
  629. ci->i_xattrs.count, ci->i_xattrs.names_size,
  630. ci->i_xattrs.vals_size);
  631. if (name_size)
  632. size += 4 + 4 + name_size + val_size;
  633. return size;
  634. }
  635. /*
  636. * If there are dirty xattrs, reencode xattrs into the prealloc_blob
  637. * and swap into place.
  638. */
  639. void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
  640. {
  641. struct rb_node *p;
  642. struct ceph_inode_xattr *xattr = NULL;
  643. void *dest;
  644. dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
  645. if (ci->i_xattrs.dirty) {
  646. int need = __get_required_blob_size(ci, 0, 0);
  647. BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
  648. p = rb_first(&ci->i_xattrs.index);
  649. dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
  650. ceph_encode_32(&dest, ci->i_xattrs.count);
  651. while (p) {
  652. xattr = rb_entry(p, struct ceph_inode_xattr, node);
  653. ceph_encode_32(&dest, xattr->name_len);
  654. memcpy(dest, xattr->name, xattr->name_len);
  655. dest += xattr->name_len;
  656. ceph_encode_32(&dest, xattr->val_len);
  657. memcpy(dest, xattr->val, xattr->val_len);
  658. dest += xattr->val_len;
  659. p = rb_next(p);
  660. }
  661. /* adjust buffer len; it may be larger than we need */
  662. ci->i_xattrs.prealloc_blob->vec.iov_len =
  663. dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
  664. if (ci->i_xattrs.blob)
  665. ceph_buffer_put(ci->i_xattrs.blob);
  666. ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
  667. ci->i_xattrs.prealloc_blob = NULL;
  668. ci->i_xattrs.dirty = false;
  669. ci->i_xattrs.version++;
  670. }
  671. }
  672. static inline int __get_request_mask(struct inode *in) {
  673. struct ceph_mds_request *req = current->journal_info;
  674. int mask = 0;
  675. if (req && req->r_target_inode == in) {
  676. if (req->r_op == CEPH_MDS_OP_LOOKUP ||
  677. req->r_op == CEPH_MDS_OP_LOOKUPINO ||
  678. req->r_op == CEPH_MDS_OP_LOOKUPPARENT ||
  679. req->r_op == CEPH_MDS_OP_GETATTR) {
  680. mask = le32_to_cpu(req->r_args.getattr.mask);
  681. } else if (req->r_op == CEPH_MDS_OP_OPEN ||
  682. req->r_op == CEPH_MDS_OP_CREATE) {
  683. mask = le32_to_cpu(req->r_args.open.mask);
  684. }
  685. }
  686. return mask;
  687. }
  688. ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
  689. size_t size)
  690. {
  691. struct ceph_inode_info *ci = ceph_inode(inode);
  692. struct ceph_inode_xattr *xattr;
  693. struct ceph_vxattr *vxattr = NULL;
  694. int req_mask;
  695. int err;
  696. /* let's see if a virtual xattr was requested */
  697. vxattr = ceph_match_vxattr(inode, name);
  698. if (vxattr) {
  699. int mask = 0;
  700. if (vxattr->flags & VXATTR_FLAG_RSTAT)
  701. mask |= CEPH_STAT_RSTAT;
  702. err = ceph_do_getattr(inode, mask, true);
  703. if (err)
  704. return err;
  705. err = -ENODATA;
  706. if (!(vxattr->exists_cb && !vxattr->exists_cb(ci)))
  707. err = vxattr->getxattr_cb(ci, value, size);
  708. return err;
  709. }
  710. req_mask = __get_request_mask(inode);
  711. spin_lock(&ci->i_ceph_lock);
  712. dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
  713. ci->i_xattrs.version, ci->i_xattrs.index_version);
  714. if (ci->i_xattrs.version == 0 ||
  715. !((req_mask & CEPH_CAP_XATTR_SHARED) ||
  716. __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
  717. spin_unlock(&ci->i_ceph_lock);
  718. /* security module gets xattr while filling trace */
  719. if (current->journal_info) {
  720. pr_warn_ratelimited("sync getxattr %p "
  721. "during filling trace\n", inode);
  722. return -EBUSY;
  723. }
  724. /* get xattrs from mds (if we don't already have them) */
  725. err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
  726. if (err)
  727. return err;
  728. spin_lock(&ci->i_ceph_lock);
  729. }
  730. err = __build_xattrs(inode);
  731. if (err < 0)
  732. goto out;
  733. err = -ENODATA; /* == ENOATTR */
  734. xattr = __get_xattr(ci, name);
  735. if (!xattr)
  736. goto out;
  737. err = -ERANGE;
  738. if (size && size < xattr->val_len)
  739. goto out;
  740. err = xattr->val_len;
  741. if (size == 0)
  742. goto out;
  743. memcpy(value, xattr->val, xattr->val_len);
  744. if (current->journal_info &&
  745. !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
  746. ci->i_ceph_flags |= CEPH_I_SEC_INITED;
  747. out:
  748. spin_unlock(&ci->i_ceph_lock);
  749. return err;
  750. }
  751. ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
  752. {
  753. struct inode *inode = d_inode(dentry);
  754. struct ceph_inode_info *ci = ceph_inode(inode);
  755. struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
  756. u32 vir_namelen = 0;
  757. u32 namelen;
  758. int err;
  759. u32 len;
  760. int i;
  761. spin_lock(&ci->i_ceph_lock);
  762. dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
  763. ci->i_xattrs.version, ci->i_xattrs.index_version);
  764. if (ci->i_xattrs.version == 0 ||
  765. !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
  766. spin_unlock(&ci->i_ceph_lock);
  767. err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
  768. if (err)
  769. return err;
  770. spin_lock(&ci->i_ceph_lock);
  771. }
  772. err = __build_xattrs(inode);
  773. if (err < 0)
  774. goto out;
  775. /*
  776. * Start with virtual dir xattr names (if any) (including
  777. * terminating '\0' characters for each).
  778. */
  779. vir_namelen = ceph_vxattrs_name_size(vxattrs);
  780. /* adding 1 byte per each variable due to the null termination */
  781. namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
  782. err = -ERANGE;
  783. if (size && vir_namelen + namelen > size)
  784. goto out;
  785. err = namelen + vir_namelen;
  786. if (size == 0)
  787. goto out;
  788. names = __copy_xattr_names(ci, names);
  789. /* virtual xattr names, too */
  790. err = namelen;
  791. if (vxattrs) {
  792. for (i = 0; vxattrs[i].name; i++) {
  793. if (!(vxattrs[i].flags & VXATTR_FLAG_HIDDEN) &&
  794. !(vxattrs[i].exists_cb &&
  795. !vxattrs[i].exists_cb(ci))) {
  796. len = sprintf(names, "%s", vxattrs[i].name);
  797. names += len + 1;
  798. err += len + 1;
  799. }
  800. }
  801. }
  802. out:
  803. spin_unlock(&ci->i_ceph_lock);
  804. return err;
  805. }
  806. static int ceph_sync_setxattr(struct inode *inode, const char *name,
  807. const char *value, size_t size, int flags)
  808. {
  809. struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
  810. struct ceph_inode_info *ci = ceph_inode(inode);
  811. struct ceph_mds_request *req;
  812. struct ceph_mds_client *mdsc = fsc->mdsc;
  813. struct ceph_pagelist *pagelist = NULL;
  814. int op = CEPH_MDS_OP_SETXATTR;
  815. int err;
  816. if (size > 0) {
  817. /* copy value into pagelist */
  818. pagelist = ceph_pagelist_alloc(GFP_NOFS);
  819. if (!pagelist)
  820. return -ENOMEM;
  821. err = ceph_pagelist_append(pagelist, value, size);
  822. if (err)
  823. goto out;
  824. } else if (!value) {
  825. if (flags & CEPH_XATTR_REPLACE)
  826. op = CEPH_MDS_OP_RMXATTR;
  827. else
  828. flags |= CEPH_XATTR_REMOVE;
  829. }
  830. dout("setxattr value=%.*s\n", (int)size, value);
  831. /* do request */
  832. req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
  833. if (IS_ERR(req)) {
  834. err = PTR_ERR(req);
  835. goto out;
  836. }
  837. req->r_path2 = kstrdup(name, GFP_NOFS);
  838. if (!req->r_path2) {
  839. ceph_mdsc_put_request(req);
  840. err = -ENOMEM;
  841. goto out;
  842. }
  843. if (op == CEPH_MDS_OP_SETXATTR) {
  844. req->r_args.setxattr.flags = cpu_to_le32(flags);
  845. req->r_pagelist = pagelist;
  846. pagelist = NULL;
  847. }
  848. req->r_inode = inode;
  849. ihold(inode);
  850. req->r_num_caps = 1;
  851. req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
  852. dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
  853. err = ceph_mdsc_do_request(mdsc, NULL, req);
  854. ceph_mdsc_put_request(req);
  855. dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
  856. out:
  857. if (pagelist)
  858. ceph_pagelist_release(pagelist);
  859. return err;
  860. }
  861. int __ceph_setxattr(struct inode *inode, const char *name,
  862. const void *value, size_t size, int flags)
  863. {
  864. struct ceph_vxattr *vxattr;
  865. struct ceph_inode_info *ci = ceph_inode(inode);
  866. struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
  867. struct ceph_cap_flush *prealloc_cf = NULL;
  868. int issued;
  869. int err;
  870. int dirty = 0;
  871. int name_len = strlen(name);
  872. int val_len = size;
  873. char *newname = NULL;
  874. char *newval = NULL;
  875. struct ceph_inode_xattr *xattr = NULL;
  876. int required_blob_size;
  877. bool check_realm = false;
  878. bool lock_snap_rwsem = false;
  879. if (ceph_snap(inode) != CEPH_NOSNAP)
  880. return -EROFS;
  881. vxattr = ceph_match_vxattr(inode, name);
  882. if (vxattr) {
  883. if (vxattr->flags & VXATTR_FLAG_READONLY)
  884. return -EOPNOTSUPP;
  885. if (value && !strncmp(vxattr->name, "ceph.quota", 10))
  886. check_realm = true;
  887. }
  888. /* pass any unhandled ceph.* xattrs through to the MDS */
  889. if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
  890. goto do_sync_unlocked;
  891. /* preallocate memory for xattr name, value, index node */
  892. err = -ENOMEM;
  893. newname = kmemdup(name, name_len + 1, GFP_NOFS);
  894. if (!newname)
  895. goto out;
  896. if (val_len) {
  897. newval = kmemdup(value, val_len, GFP_NOFS);
  898. if (!newval)
  899. goto out;
  900. }
  901. xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
  902. if (!xattr)
  903. goto out;
  904. prealloc_cf = ceph_alloc_cap_flush();
  905. if (!prealloc_cf)
  906. goto out;
  907. spin_lock(&ci->i_ceph_lock);
  908. retry:
  909. issued = __ceph_caps_issued(ci, NULL);
  910. if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
  911. goto do_sync;
  912. if (!lock_snap_rwsem && !ci->i_head_snapc) {
  913. lock_snap_rwsem = true;
  914. if (!down_read_trylock(&mdsc->snap_rwsem)) {
  915. spin_unlock(&ci->i_ceph_lock);
  916. down_read(&mdsc->snap_rwsem);
  917. spin_lock(&ci->i_ceph_lock);
  918. goto retry;
  919. }
  920. }
  921. dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
  922. __build_xattrs(inode);
  923. required_blob_size = __get_required_blob_size(ci, name_len, val_len);
  924. if (!ci->i_xattrs.prealloc_blob ||
  925. required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
  926. struct ceph_buffer *blob;
  927. spin_unlock(&ci->i_ceph_lock);
  928. dout(" preaallocating new blob size=%d\n", required_blob_size);
  929. blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
  930. if (!blob)
  931. goto do_sync_unlocked;
  932. spin_lock(&ci->i_ceph_lock);
  933. if (ci->i_xattrs.prealloc_blob)
  934. ceph_buffer_put(ci->i_xattrs.prealloc_blob);
  935. ci->i_xattrs.prealloc_blob = blob;
  936. goto retry;
  937. }
  938. err = __set_xattr(ci, newname, name_len, newval, val_len,
  939. flags, value ? 1 : -1, &xattr);
  940. if (!err) {
  941. dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
  942. &prealloc_cf);
  943. ci->i_xattrs.dirty = true;
  944. inode->i_ctime = current_time(inode);
  945. }
  946. spin_unlock(&ci->i_ceph_lock);
  947. if (lock_snap_rwsem)
  948. up_read(&mdsc->snap_rwsem);
  949. if (dirty)
  950. __mark_inode_dirty(inode, dirty);
  951. ceph_free_cap_flush(prealloc_cf);
  952. return err;
  953. do_sync:
  954. spin_unlock(&ci->i_ceph_lock);
  955. do_sync_unlocked:
  956. if (lock_snap_rwsem)
  957. up_read(&mdsc->snap_rwsem);
  958. /* security module set xattr while filling trace */
  959. if (current->journal_info) {
  960. pr_warn_ratelimited("sync setxattr %p "
  961. "during filling trace\n", inode);
  962. err = -EBUSY;
  963. } else {
  964. err = ceph_sync_setxattr(inode, name, value, size, flags);
  965. if (err >= 0 && check_realm) {
  966. /* check if snaprealm was created for quota inode */
  967. spin_lock(&ci->i_ceph_lock);
  968. if ((ci->i_max_files || ci->i_max_bytes) &&
  969. !(ci->i_snap_realm &&
  970. ci->i_snap_realm->ino == ci->i_vino.ino))
  971. err = -EOPNOTSUPP;
  972. spin_unlock(&ci->i_ceph_lock);
  973. }
  974. }
  975. out:
  976. ceph_free_cap_flush(prealloc_cf);
  977. kfree(newname);
  978. kfree(newval);
  979. kfree(xattr);
  980. return err;
  981. }
  982. static int ceph_get_xattr_handler(const struct xattr_handler *handler,
  983. struct dentry *dentry, struct inode *inode,
  984. const char *name, void *value, size_t size)
  985. {
  986. if (!ceph_is_valid_xattr(name))
  987. return -EOPNOTSUPP;
  988. return __ceph_getxattr(inode, name, value, size);
  989. }
  990. static int ceph_set_xattr_handler(const struct xattr_handler *handler,
  991. struct dentry *unused, struct inode *inode,
  992. const char *name, const void *value,
  993. size_t size, int flags)
  994. {
  995. if (!ceph_is_valid_xattr(name))
  996. return -EOPNOTSUPP;
  997. return __ceph_setxattr(inode, name, value, size, flags);
  998. }
  999. static const struct xattr_handler ceph_other_xattr_handler = {
  1000. .prefix = "", /* match any name => handlers called with full name */
  1001. .get = ceph_get_xattr_handler,
  1002. .set = ceph_set_xattr_handler,
  1003. };
  1004. #ifdef CONFIG_SECURITY
  1005. bool ceph_security_xattr_wanted(struct inode *in)
  1006. {
  1007. return in->i_security != NULL;
  1008. }
  1009. bool ceph_security_xattr_deadlock(struct inode *in)
  1010. {
  1011. struct ceph_inode_info *ci;
  1012. bool ret;
  1013. if (!in->i_security)
  1014. return false;
  1015. ci = ceph_inode(in);
  1016. spin_lock(&ci->i_ceph_lock);
  1017. ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
  1018. !(ci->i_xattrs.version > 0 &&
  1019. __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
  1020. spin_unlock(&ci->i_ceph_lock);
  1021. return ret;
  1022. }
  1023. #endif