xattr.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092
  1. #include <linux/ceph/ceph_debug.h>
  2. #include "super.h"
  3. #include "mds_client.h"
  4. #include <linux/ceph/decode.h>
  5. #include <linux/xattr.h>
  6. #include <linux/posix_acl_xattr.h>
  7. #include <linux/slab.h>
  8. #define XATTR_CEPH_PREFIX "ceph."
  9. #define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
  10. /*
  11. * List of handlers for synthetic system.* attributes. Other
  12. * attributes are handled directly.
  13. */
  14. const struct xattr_handler *ceph_xattr_handlers[] = {
  15. #ifdef CONFIG_CEPH_FS_POSIX_ACL
  16. &posix_acl_access_xattr_handler,
  17. &posix_acl_default_xattr_handler,
  18. #endif
  19. NULL,
  20. };
  21. static bool ceph_is_valid_xattr(const char *name)
  22. {
  23. return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
  24. !strncmp(name, XATTR_SECURITY_PREFIX,
  25. XATTR_SECURITY_PREFIX_LEN) ||
  26. !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
  27. !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
  28. !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
  29. }
  30. /*
  31. * These define virtual xattrs exposing the recursive directory
  32. * statistics and layout metadata.
  33. */
  34. struct ceph_vxattr {
  35. char *name;
  36. size_t name_size; /* strlen(name) + 1 (for '\0') */
  37. size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
  38. size_t size);
  39. bool readonly, hidden;
  40. bool (*exists_cb)(struct ceph_inode_info *ci);
  41. };
  42. /* layouts */
  43. static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
  44. {
  45. size_t s;
  46. char *p = (char *)&ci->i_layout;
  47. for (s = 0; s < sizeof(ci->i_layout); s++, p++)
  48. if (*p)
  49. return true;
  50. return false;
  51. }
  52. static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
  53. size_t size)
  54. {
  55. int ret;
  56. struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
  57. struct ceph_osd_client *osdc = &fsc->client->osdc;
  58. s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
  59. const char *pool_name;
  60. dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
  61. down_read(&osdc->map_sem);
  62. pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
  63. if (pool_name)
  64. ret = snprintf(val, size,
  65. "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s",
  66. (unsigned long long)ceph_file_layout_su(ci->i_layout),
  67. (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
  68. (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
  69. pool_name);
  70. else
  71. ret = snprintf(val, size,
  72. "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
  73. (unsigned long long)ceph_file_layout_su(ci->i_layout),
  74. (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
  75. (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
  76. (unsigned long long)pool);
  77. up_read(&osdc->map_sem);
  78. return ret;
  79. }
  80. static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
  81. char *val, size_t size)
  82. {
  83. return snprintf(val, size, "%lld",
  84. (unsigned long long)ceph_file_layout_su(ci->i_layout));
  85. }
  86. static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
  87. char *val, size_t size)
  88. {
  89. return snprintf(val, size, "%lld",
  90. (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout));
  91. }
  92. static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
  93. char *val, size_t size)
  94. {
  95. return snprintf(val, size, "%lld",
  96. (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
  97. }
  98. static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
  99. char *val, size_t size)
  100. {
  101. int ret;
  102. struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
  103. struct ceph_osd_client *osdc = &fsc->client->osdc;
  104. s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
  105. const char *pool_name;
  106. down_read(&osdc->map_sem);
  107. pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
  108. if (pool_name)
  109. ret = snprintf(val, size, "%s", pool_name);
  110. else
  111. ret = snprintf(val, size, "%lld", (unsigned long long)pool);
  112. up_read(&osdc->map_sem);
  113. return ret;
  114. }
  115. /* directories */
  116. static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
  117. size_t size)
  118. {
  119. return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
  120. }
  121. static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
  122. size_t size)
  123. {
  124. return snprintf(val, size, "%lld", ci->i_files);
  125. }
  126. static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
  127. size_t size)
  128. {
  129. return snprintf(val, size, "%lld", ci->i_subdirs);
  130. }
  131. static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
  132. size_t size)
  133. {
  134. return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
  135. }
  136. static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
  137. size_t size)
  138. {
  139. return snprintf(val, size, "%lld", ci->i_rfiles);
  140. }
  141. static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
  142. size_t size)
  143. {
  144. return snprintf(val, size, "%lld", ci->i_rsubdirs);
  145. }
  146. static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
  147. size_t size)
  148. {
  149. return snprintf(val, size, "%lld", ci->i_rbytes);
  150. }
  151. static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
  152. size_t size)
  153. {
  154. return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
  155. (long)ci->i_rctime.tv_nsec);
  156. }
  157. #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
  158. #define CEPH_XATTR_NAME2(_type, _name, _name2) \
  159. XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
  160. #define XATTR_NAME_CEPH(_type, _name) \
  161. { \
  162. .name = CEPH_XATTR_NAME(_type, _name), \
  163. .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
  164. .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
  165. .readonly = true, \
  166. .hidden = false, \
  167. .exists_cb = NULL, \
  168. }
  169. #define XATTR_LAYOUT_FIELD(_type, _name, _field) \
  170. { \
  171. .name = CEPH_XATTR_NAME2(_type, _name, _field), \
  172. .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
  173. .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
  174. .readonly = false, \
  175. .hidden = true, \
  176. .exists_cb = ceph_vxattrcb_layout_exists, \
  177. }
  178. static struct ceph_vxattr ceph_dir_vxattrs[] = {
  179. {
  180. .name = "ceph.dir.layout",
  181. .name_size = sizeof("ceph.dir.layout"),
  182. .getxattr_cb = ceph_vxattrcb_layout,
  183. .readonly = false,
  184. .hidden = false,
  185. .exists_cb = ceph_vxattrcb_layout_exists,
  186. },
  187. XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
  188. XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
  189. XATTR_LAYOUT_FIELD(dir, layout, object_size),
  190. XATTR_LAYOUT_FIELD(dir, layout, pool),
  191. XATTR_NAME_CEPH(dir, entries),
  192. XATTR_NAME_CEPH(dir, files),
  193. XATTR_NAME_CEPH(dir, subdirs),
  194. XATTR_NAME_CEPH(dir, rentries),
  195. XATTR_NAME_CEPH(dir, rfiles),
  196. XATTR_NAME_CEPH(dir, rsubdirs),
  197. XATTR_NAME_CEPH(dir, rbytes),
  198. XATTR_NAME_CEPH(dir, rctime),
  199. { .name = NULL, 0 } /* Required table terminator */
  200. };
  201. static size_t ceph_dir_vxattrs_name_size; /* total size of all names */
  202. /* files */
  203. static struct ceph_vxattr ceph_file_vxattrs[] = {
  204. {
  205. .name = "ceph.file.layout",
  206. .name_size = sizeof("ceph.file.layout"),
  207. .getxattr_cb = ceph_vxattrcb_layout,
  208. .readonly = false,
  209. .hidden = false,
  210. .exists_cb = ceph_vxattrcb_layout_exists,
  211. },
  212. XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
  213. XATTR_LAYOUT_FIELD(file, layout, stripe_count),
  214. XATTR_LAYOUT_FIELD(file, layout, object_size),
  215. XATTR_LAYOUT_FIELD(file, layout, pool),
  216. { .name = NULL, 0 } /* Required table terminator */
  217. };
  218. static size_t ceph_file_vxattrs_name_size; /* total size of all names */
  219. static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
  220. {
  221. if (S_ISDIR(inode->i_mode))
  222. return ceph_dir_vxattrs;
  223. else if (S_ISREG(inode->i_mode))
  224. return ceph_file_vxattrs;
  225. return NULL;
  226. }
  227. static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
  228. {
  229. if (vxattrs == ceph_dir_vxattrs)
  230. return ceph_dir_vxattrs_name_size;
  231. if (vxattrs == ceph_file_vxattrs)
  232. return ceph_file_vxattrs_name_size;
  233. BUG();
  234. return 0;
  235. }
  236. /*
  237. * Compute the aggregate size (including terminating '\0') of all
  238. * virtual extended attribute names in the given vxattr table.
  239. */
  240. static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
  241. {
  242. struct ceph_vxattr *vxattr;
  243. size_t size = 0;
  244. for (vxattr = vxattrs; vxattr->name; vxattr++)
  245. if (!vxattr->hidden)
  246. size += vxattr->name_size;
  247. return size;
  248. }
  249. /* Routines called at initialization and exit time */
  250. void __init ceph_xattr_init(void)
  251. {
  252. ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
  253. ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
  254. }
  255. void ceph_xattr_exit(void)
  256. {
  257. ceph_dir_vxattrs_name_size = 0;
  258. ceph_file_vxattrs_name_size = 0;
  259. }
  260. static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
  261. const char *name)
  262. {
  263. struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
  264. if (vxattr) {
  265. while (vxattr->name) {
  266. if (!strcmp(vxattr->name, name))
  267. return vxattr;
  268. vxattr++;
  269. }
  270. }
  271. return NULL;
  272. }
  273. static int __set_xattr(struct ceph_inode_info *ci,
  274. const char *name, int name_len,
  275. const char *val, int val_len,
  276. int dirty,
  277. int should_free_name, int should_free_val,
  278. struct ceph_inode_xattr **newxattr)
  279. {
  280. struct rb_node **p;
  281. struct rb_node *parent = NULL;
  282. struct ceph_inode_xattr *xattr = NULL;
  283. int c;
  284. int new = 0;
  285. p = &ci->i_xattrs.index.rb_node;
  286. while (*p) {
  287. parent = *p;
  288. xattr = rb_entry(parent, struct ceph_inode_xattr, node);
  289. c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
  290. if (c < 0)
  291. p = &(*p)->rb_left;
  292. else if (c > 0)
  293. p = &(*p)->rb_right;
  294. else {
  295. if (name_len == xattr->name_len)
  296. break;
  297. else if (name_len < xattr->name_len)
  298. p = &(*p)->rb_left;
  299. else
  300. p = &(*p)->rb_right;
  301. }
  302. xattr = NULL;
  303. }
  304. if (!xattr) {
  305. new = 1;
  306. xattr = *newxattr;
  307. xattr->name = name;
  308. xattr->name_len = name_len;
  309. xattr->should_free_name = should_free_name;
  310. ci->i_xattrs.count++;
  311. dout("__set_xattr count=%d\n", ci->i_xattrs.count);
  312. } else {
  313. kfree(*newxattr);
  314. *newxattr = NULL;
  315. if (xattr->should_free_val)
  316. kfree((void *)xattr->val);
  317. if (should_free_name) {
  318. kfree((void *)name);
  319. name = xattr->name;
  320. }
  321. ci->i_xattrs.names_size -= xattr->name_len;
  322. ci->i_xattrs.vals_size -= xattr->val_len;
  323. }
  324. ci->i_xattrs.names_size += name_len;
  325. ci->i_xattrs.vals_size += val_len;
  326. if (val)
  327. xattr->val = val;
  328. else
  329. xattr->val = "";
  330. xattr->val_len = val_len;
  331. xattr->dirty = dirty;
  332. xattr->should_free_val = (val && should_free_val);
  333. if (new) {
  334. rb_link_node(&xattr->node, parent, p);
  335. rb_insert_color(&xattr->node, &ci->i_xattrs.index);
  336. dout("__set_xattr_val p=%p\n", p);
  337. }
  338. dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
  339. ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
  340. return 0;
  341. }
  342. static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
  343. const char *name)
  344. {
  345. struct rb_node **p;
  346. struct rb_node *parent = NULL;
  347. struct ceph_inode_xattr *xattr = NULL;
  348. int name_len = strlen(name);
  349. int c;
  350. p = &ci->i_xattrs.index.rb_node;
  351. while (*p) {
  352. parent = *p;
  353. xattr = rb_entry(parent, struct ceph_inode_xattr, node);
  354. c = strncmp(name, xattr->name, xattr->name_len);
  355. if (c == 0 && name_len > xattr->name_len)
  356. c = 1;
  357. if (c < 0)
  358. p = &(*p)->rb_left;
  359. else if (c > 0)
  360. p = &(*p)->rb_right;
  361. else {
  362. dout("__get_xattr %s: found %.*s\n", name,
  363. xattr->val_len, xattr->val);
  364. return xattr;
  365. }
  366. }
  367. dout("__get_xattr %s: not found\n", name);
  368. return NULL;
  369. }
  370. static void __free_xattr(struct ceph_inode_xattr *xattr)
  371. {
  372. BUG_ON(!xattr);
  373. if (xattr->should_free_name)
  374. kfree((void *)xattr->name);
  375. if (xattr->should_free_val)
  376. kfree((void *)xattr->val);
  377. kfree(xattr);
  378. }
  379. static int __remove_xattr(struct ceph_inode_info *ci,
  380. struct ceph_inode_xattr *xattr)
  381. {
  382. if (!xattr)
  383. return -EOPNOTSUPP;
  384. rb_erase(&xattr->node, &ci->i_xattrs.index);
  385. if (xattr->should_free_name)
  386. kfree((void *)xattr->name);
  387. if (xattr->should_free_val)
  388. kfree((void *)xattr->val);
  389. ci->i_xattrs.names_size -= xattr->name_len;
  390. ci->i_xattrs.vals_size -= xattr->val_len;
  391. ci->i_xattrs.count--;
  392. kfree(xattr);
  393. return 0;
  394. }
  395. static int __remove_xattr_by_name(struct ceph_inode_info *ci,
  396. const char *name)
  397. {
  398. struct rb_node **p;
  399. struct ceph_inode_xattr *xattr;
  400. int err;
  401. p = &ci->i_xattrs.index.rb_node;
  402. xattr = __get_xattr(ci, name);
  403. err = __remove_xattr(ci, xattr);
  404. return err;
  405. }
  406. static char *__copy_xattr_names(struct ceph_inode_info *ci,
  407. char *dest)
  408. {
  409. struct rb_node *p;
  410. struct ceph_inode_xattr *xattr = NULL;
  411. p = rb_first(&ci->i_xattrs.index);
  412. dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
  413. while (p) {
  414. xattr = rb_entry(p, struct ceph_inode_xattr, node);
  415. memcpy(dest, xattr->name, xattr->name_len);
  416. dest[xattr->name_len] = '\0';
  417. dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
  418. xattr->name_len, ci->i_xattrs.names_size);
  419. dest += xattr->name_len + 1;
  420. p = rb_next(p);
  421. }
  422. return dest;
  423. }
  424. void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
  425. {
  426. struct rb_node *p, *tmp;
  427. struct ceph_inode_xattr *xattr = NULL;
  428. p = rb_first(&ci->i_xattrs.index);
  429. dout("__ceph_destroy_xattrs p=%p\n", p);
  430. while (p) {
  431. xattr = rb_entry(p, struct ceph_inode_xattr, node);
  432. tmp = p;
  433. p = rb_next(tmp);
  434. dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
  435. xattr->name_len, xattr->name);
  436. rb_erase(tmp, &ci->i_xattrs.index);
  437. __free_xattr(xattr);
  438. }
  439. ci->i_xattrs.names_size = 0;
  440. ci->i_xattrs.vals_size = 0;
  441. ci->i_xattrs.index_version = 0;
  442. ci->i_xattrs.count = 0;
  443. ci->i_xattrs.index = RB_ROOT;
  444. }
  445. static int __build_xattrs(struct inode *inode)
  446. __releases(ci->i_ceph_lock)
  447. __acquires(ci->i_ceph_lock)
  448. {
  449. u32 namelen;
  450. u32 numattr = 0;
  451. void *p, *end;
  452. u32 len;
  453. const char *name, *val;
  454. struct ceph_inode_info *ci = ceph_inode(inode);
  455. int xattr_version;
  456. struct ceph_inode_xattr **xattrs = NULL;
  457. int err = 0;
  458. int i;
  459. dout("__build_xattrs() len=%d\n",
  460. ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
  461. if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
  462. return 0; /* already built */
  463. __ceph_destroy_xattrs(ci);
  464. start:
  465. /* updated internal xattr rb tree */
  466. if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
  467. p = ci->i_xattrs.blob->vec.iov_base;
  468. end = p + ci->i_xattrs.blob->vec.iov_len;
  469. ceph_decode_32_safe(&p, end, numattr, bad);
  470. xattr_version = ci->i_xattrs.version;
  471. spin_unlock(&ci->i_ceph_lock);
  472. xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
  473. GFP_NOFS);
  474. err = -ENOMEM;
  475. if (!xattrs)
  476. goto bad_lock;
  477. memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *));
  478. for (i = 0; i < numattr; i++) {
  479. xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
  480. GFP_NOFS);
  481. if (!xattrs[i])
  482. goto bad_lock;
  483. }
  484. spin_lock(&ci->i_ceph_lock);
  485. if (ci->i_xattrs.version != xattr_version) {
  486. /* lost a race, retry */
  487. for (i = 0; i < numattr; i++)
  488. kfree(xattrs[i]);
  489. kfree(xattrs);
  490. xattrs = NULL;
  491. goto start;
  492. }
  493. err = -EIO;
  494. while (numattr--) {
  495. ceph_decode_32_safe(&p, end, len, bad);
  496. namelen = len;
  497. name = p;
  498. p += len;
  499. ceph_decode_32_safe(&p, end, len, bad);
  500. val = p;
  501. p += len;
  502. err = __set_xattr(ci, name, namelen, val, len,
  503. 0, 0, 0, &xattrs[numattr]);
  504. if (err < 0)
  505. goto bad;
  506. }
  507. kfree(xattrs);
  508. }
  509. ci->i_xattrs.index_version = ci->i_xattrs.version;
  510. ci->i_xattrs.dirty = false;
  511. return err;
  512. bad_lock:
  513. spin_lock(&ci->i_ceph_lock);
  514. bad:
  515. if (xattrs) {
  516. for (i = 0; i < numattr; i++)
  517. kfree(xattrs[i]);
  518. kfree(xattrs);
  519. }
  520. ci->i_xattrs.names_size = 0;
  521. return err;
  522. }
  523. static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
  524. int val_size)
  525. {
  526. /*
  527. * 4 bytes for the length, and additional 4 bytes per each xattr name,
  528. * 4 bytes per each value
  529. */
  530. int size = 4 + ci->i_xattrs.count*(4 + 4) +
  531. ci->i_xattrs.names_size +
  532. ci->i_xattrs.vals_size;
  533. dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
  534. ci->i_xattrs.count, ci->i_xattrs.names_size,
  535. ci->i_xattrs.vals_size);
  536. if (name_size)
  537. size += 4 + 4 + name_size + val_size;
  538. return size;
  539. }
  540. /*
  541. * If there are dirty xattrs, reencode xattrs into the prealloc_blob
  542. * and swap into place.
  543. */
  544. void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
  545. {
  546. struct rb_node *p;
  547. struct ceph_inode_xattr *xattr = NULL;
  548. void *dest;
  549. dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
  550. if (ci->i_xattrs.dirty) {
  551. int need = __get_required_blob_size(ci, 0, 0);
  552. BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
  553. p = rb_first(&ci->i_xattrs.index);
  554. dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
  555. ceph_encode_32(&dest, ci->i_xattrs.count);
  556. while (p) {
  557. xattr = rb_entry(p, struct ceph_inode_xattr, node);
  558. ceph_encode_32(&dest, xattr->name_len);
  559. memcpy(dest, xattr->name, xattr->name_len);
  560. dest += xattr->name_len;
  561. ceph_encode_32(&dest, xattr->val_len);
  562. memcpy(dest, xattr->val, xattr->val_len);
  563. dest += xattr->val_len;
  564. p = rb_next(p);
  565. }
  566. /* adjust buffer len; it may be larger than we need */
  567. ci->i_xattrs.prealloc_blob->vec.iov_len =
  568. dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
  569. if (ci->i_xattrs.blob)
  570. ceph_buffer_put(ci->i_xattrs.blob);
  571. ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
  572. ci->i_xattrs.prealloc_blob = NULL;
  573. ci->i_xattrs.dirty = false;
  574. ci->i_xattrs.version++;
  575. }
  576. }
  577. ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
  578. size_t size)
  579. {
  580. struct ceph_inode_info *ci = ceph_inode(inode);
  581. int err;
  582. struct ceph_inode_xattr *xattr;
  583. struct ceph_vxattr *vxattr = NULL;
  584. if (!ceph_is_valid_xattr(name))
  585. return -ENODATA;
  586. /* let's see if a virtual xattr was requested */
  587. vxattr = ceph_match_vxattr(inode, name);
  588. if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
  589. err = vxattr->getxattr_cb(ci, value, size);
  590. return err;
  591. }
  592. spin_lock(&ci->i_ceph_lock);
  593. dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
  594. ci->i_xattrs.version, ci->i_xattrs.index_version);
  595. if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
  596. (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
  597. goto get_xattr;
  598. } else {
  599. spin_unlock(&ci->i_ceph_lock);
  600. /* get xattrs from mds (if we don't already have them) */
  601. err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
  602. if (err)
  603. return err;
  604. }
  605. spin_lock(&ci->i_ceph_lock);
  606. err = __build_xattrs(inode);
  607. if (err < 0)
  608. goto out;
  609. get_xattr:
  610. err = -ENODATA; /* == ENOATTR */
  611. xattr = __get_xattr(ci, name);
  612. if (!xattr)
  613. goto out;
  614. err = -ERANGE;
  615. if (size && size < xattr->val_len)
  616. goto out;
  617. err = xattr->val_len;
  618. if (size == 0)
  619. goto out;
  620. memcpy(value, xattr->val, xattr->val_len);
  621. out:
  622. spin_unlock(&ci->i_ceph_lock);
  623. return err;
  624. }
  625. ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
  626. size_t size)
  627. {
  628. if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
  629. return generic_getxattr(dentry, name, value, size);
  630. return __ceph_getxattr(dentry->d_inode, name, value, size);
  631. }
  632. ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
  633. {
  634. struct inode *inode = dentry->d_inode;
  635. struct ceph_inode_info *ci = ceph_inode(inode);
  636. struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
  637. u32 vir_namelen = 0;
  638. u32 namelen;
  639. int err;
  640. u32 len;
  641. int i;
  642. spin_lock(&ci->i_ceph_lock);
  643. dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
  644. ci->i_xattrs.version, ci->i_xattrs.index_version);
  645. if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
  646. (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
  647. goto list_xattr;
  648. } else {
  649. spin_unlock(&ci->i_ceph_lock);
  650. err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
  651. if (err)
  652. return err;
  653. }
  654. spin_lock(&ci->i_ceph_lock);
  655. err = __build_xattrs(inode);
  656. if (err < 0)
  657. goto out;
  658. list_xattr:
  659. /*
  660. * Start with virtual dir xattr names (if any) (including
  661. * terminating '\0' characters for each).
  662. */
  663. vir_namelen = ceph_vxattrs_name_size(vxattrs);
  664. /* adding 1 byte per each variable due to the null termination */
  665. namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
  666. err = -ERANGE;
  667. if (size && vir_namelen + namelen > size)
  668. goto out;
  669. err = namelen + vir_namelen;
  670. if (size == 0)
  671. goto out;
  672. names = __copy_xattr_names(ci, names);
  673. /* virtual xattr names, too */
  674. err = namelen;
  675. if (vxattrs) {
  676. for (i = 0; vxattrs[i].name; i++) {
  677. if (!vxattrs[i].hidden &&
  678. !(vxattrs[i].exists_cb &&
  679. !vxattrs[i].exists_cb(ci))) {
  680. len = sprintf(names, "%s", vxattrs[i].name);
  681. names += len + 1;
  682. err += len + 1;
  683. }
  684. }
  685. }
  686. out:
  687. spin_unlock(&ci->i_ceph_lock);
  688. return err;
  689. }
  690. static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
  691. const char *value, size_t size, int flags)
  692. {
  693. struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
  694. struct inode *inode = dentry->d_inode;
  695. struct ceph_inode_info *ci = ceph_inode(inode);
  696. struct inode *parent_inode;
  697. struct ceph_mds_request *req;
  698. struct ceph_mds_client *mdsc = fsc->mdsc;
  699. int err;
  700. int i, nr_pages;
  701. struct page **pages = NULL;
  702. void *kaddr;
  703. /* copy value into some pages */
  704. nr_pages = calc_pages_for(0, size);
  705. if (nr_pages) {
  706. pages = kmalloc(sizeof(pages[0])*nr_pages, GFP_NOFS);
  707. if (!pages)
  708. return -ENOMEM;
  709. err = -ENOMEM;
  710. for (i = 0; i < nr_pages; i++) {
  711. pages[i] = __page_cache_alloc(GFP_NOFS);
  712. if (!pages[i]) {
  713. nr_pages = i;
  714. goto out;
  715. }
  716. kaddr = kmap(pages[i]);
  717. memcpy(kaddr, value + i*PAGE_CACHE_SIZE,
  718. min(PAGE_CACHE_SIZE, size-i*PAGE_CACHE_SIZE));
  719. }
  720. }
  721. dout("setxattr value=%.*s\n", (int)size, value);
  722. /* do request */
  723. req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR,
  724. USE_AUTH_MDS);
  725. if (IS_ERR(req)) {
  726. err = PTR_ERR(req);
  727. goto out;
  728. }
  729. req->r_inode = inode;
  730. ihold(inode);
  731. req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
  732. req->r_num_caps = 1;
  733. req->r_args.setxattr.flags = cpu_to_le32(flags);
  734. req->r_path2 = kstrdup(name, GFP_NOFS);
  735. req->r_pages = pages;
  736. req->r_num_pages = nr_pages;
  737. req->r_data_len = size;
  738. dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
  739. parent_inode = ceph_get_dentry_parent_inode(dentry);
  740. err = ceph_mdsc_do_request(mdsc, parent_inode, req);
  741. iput(parent_inode);
  742. ceph_mdsc_put_request(req);
  743. dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
  744. out:
  745. if (pages) {
  746. for (i = 0; i < nr_pages; i++)
  747. __free_page(pages[i]);
  748. kfree(pages);
  749. }
  750. return err;
  751. }
  752. int __ceph_setxattr(struct dentry *dentry, const char *name,
  753. const void *value, size_t size, int flags)
  754. {
  755. struct inode *inode = dentry->d_inode;
  756. struct ceph_vxattr *vxattr;
  757. struct ceph_inode_info *ci = ceph_inode(inode);
  758. int issued;
  759. int err;
  760. int dirty;
  761. int name_len = strlen(name);
  762. int val_len = size;
  763. char *newname = NULL;
  764. char *newval = NULL;
  765. struct ceph_inode_xattr *xattr = NULL;
  766. int required_blob_size;
  767. if (!ceph_is_valid_xattr(name))
  768. return -EOPNOTSUPP;
  769. vxattr = ceph_match_vxattr(inode, name);
  770. if (vxattr && vxattr->readonly)
  771. return -EOPNOTSUPP;
  772. /* pass any unhandled ceph.* xattrs through to the MDS */
  773. if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
  774. goto do_sync_unlocked;
  775. /* preallocate memory for xattr name, value, index node */
  776. err = -ENOMEM;
  777. newname = kmemdup(name, name_len + 1, GFP_NOFS);
  778. if (!newname)
  779. goto out;
  780. if (val_len) {
  781. newval = kmemdup(value, val_len, GFP_NOFS);
  782. if (!newval)
  783. goto out;
  784. }
  785. xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
  786. if (!xattr)
  787. goto out;
  788. spin_lock(&ci->i_ceph_lock);
  789. retry:
  790. issued = __ceph_caps_issued(ci, NULL);
  791. dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
  792. if (!(issued & CEPH_CAP_XATTR_EXCL))
  793. goto do_sync;
  794. __build_xattrs(inode);
  795. required_blob_size = __get_required_blob_size(ci, name_len, val_len);
  796. if (!ci->i_xattrs.prealloc_blob ||
  797. required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
  798. struct ceph_buffer *blob;
  799. spin_unlock(&ci->i_ceph_lock);
  800. dout(" preaallocating new blob size=%d\n", required_blob_size);
  801. blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
  802. if (!blob)
  803. goto out;
  804. spin_lock(&ci->i_ceph_lock);
  805. if (ci->i_xattrs.prealloc_blob)
  806. ceph_buffer_put(ci->i_xattrs.prealloc_blob);
  807. ci->i_xattrs.prealloc_blob = blob;
  808. goto retry;
  809. }
  810. err = __set_xattr(ci, newname, name_len, newval,
  811. val_len, 1, 1, 1, &xattr);
  812. dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
  813. ci->i_xattrs.dirty = true;
  814. inode->i_ctime = CURRENT_TIME;
  815. spin_unlock(&ci->i_ceph_lock);
  816. if (dirty)
  817. __mark_inode_dirty(inode, dirty);
  818. return err;
  819. do_sync:
  820. spin_unlock(&ci->i_ceph_lock);
  821. do_sync_unlocked:
  822. err = ceph_sync_setxattr(dentry, name, value, size, flags);
  823. out:
  824. kfree(newname);
  825. kfree(newval);
  826. kfree(xattr);
  827. return err;
  828. }
  829. int ceph_setxattr(struct dentry *dentry, const char *name,
  830. const void *value, size_t size, int flags)
  831. {
  832. if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
  833. return -EROFS;
  834. if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
  835. return generic_setxattr(dentry, name, value, size, flags);
  836. return __ceph_setxattr(dentry, name, value, size, flags);
  837. }
  838. static int ceph_send_removexattr(struct dentry *dentry, const char *name)
  839. {
  840. struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
  841. struct ceph_mds_client *mdsc = fsc->mdsc;
  842. struct inode *inode = dentry->d_inode;
  843. struct inode *parent_inode;
  844. struct ceph_mds_request *req;
  845. int err;
  846. req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR,
  847. USE_AUTH_MDS);
  848. if (IS_ERR(req))
  849. return PTR_ERR(req);
  850. req->r_inode = inode;
  851. ihold(inode);
  852. req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
  853. req->r_num_caps = 1;
  854. req->r_path2 = kstrdup(name, GFP_NOFS);
  855. parent_inode = ceph_get_dentry_parent_inode(dentry);
  856. err = ceph_mdsc_do_request(mdsc, parent_inode, req);
  857. iput(parent_inode);
  858. ceph_mdsc_put_request(req);
  859. return err;
  860. }
  861. int __ceph_removexattr(struct dentry *dentry, const char *name)
  862. {
  863. struct inode *inode = dentry->d_inode;
  864. struct ceph_vxattr *vxattr;
  865. struct ceph_inode_info *ci = ceph_inode(inode);
  866. int issued;
  867. int err;
  868. int required_blob_size;
  869. int dirty;
  870. if (!ceph_is_valid_xattr(name))
  871. return -EOPNOTSUPP;
  872. vxattr = ceph_match_vxattr(inode, name);
  873. if (vxattr && vxattr->readonly)
  874. return -EOPNOTSUPP;
  875. /* pass any unhandled ceph.* xattrs through to the MDS */
  876. if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
  877. goto do_sync_unlocked;
  878. err = -ENOMEM;
  879. spin_lock(&ci->i_ceph_lock);
  880. retry:
  881. issued = __ceph_caps_issued(ci, NULL);
  882. dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
  883. if (!(issued & CEPH_CAP_XATTR_EXCL))
  884. goto do_sync;
  885. __build_xattrs(inode);
  886. required_blob_size = __get_required_blob_size(ci, 0, 0);
  887. if (!ci->i_xattrs.prealloc_blob ||
  888. required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
  889. struct ceph_buffer *blob;
  890. spin_unlock(&ci->i_ceph_lock);
  891. dout(" preaallocating new blob size=%d\n", required_blob_size);
  892. blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
  893. if (!blob)
  894. goto out;
  895. spin_lock(&ci->i_ceph_lock);
  896. if (ci->i_xattrs.prealloc_blob)
  897. ceph_buffer_put(ci->i_xattrs.prealloc_blob);
  898. ci->i_xattrs.prealloc_blob = blob;
  899. goto retry;
  900. }
  901. err = __remove_xattr_by_name(ceph_inode(inode), name);
  902. dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
  903. ci->i_xattrs.dirty = true;
  904. inode->i_ctime = CURRENT_TIME;
  905. spin_unlock(&ci->i_ceph_lock);
  906. if (dirty)
  907. __mark_inode_dirty(inode, dirty);
  908. return err;
  909. do_sync:
  910. spin_unlock(&ci->i_ceph_lock);
  911. do_sync_unlocked:
  912. err = ceph_send_removexattr(dentry, name);
  913. out:
  914. return err;
  915. }
  916. int ceph_removexattr(struct dentry *dentry, const char *name)
  917. {
  918. if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
  919. return -EROFS;
  920. if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
  921. return generic_removexattr(dentry, name);
  922. return __ceph_removexattr(dentry, name);
  923. }