xattr.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118
  1. #include <linux/ceph/ceph_debug.h>
  2. #include "super.h"
  3. #include "mds_client.h"
  4. #include <linux/ceph/decode.h>
  5. #include <linux/xattr.h>
  6. #include <linux/posix_acl_xattr.h>
  7. #include <linux/slab.h>
  8. #define XATTR_CEPH_PREFIX "ceph."
  9. #define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
  10. static int __remove_xattr(struct ceph_inode_info *ci,
  11. struct ceph_inode_xattr *xattr);
  12. /*
  13. * List of handlers for synthetic system.* attributes. Other
  14. * attributes are handled directly.
  15. */
  16. const struct xattr_handler *ceph_xattr_handlers[] = {
  17. #ifdef CONFIG_CEPH_FS_POSIX_ACL
  18. &posix_acl_access_xattr_handler,
  19. &posix_acl_default_xattr_handler,
  20. #endif
  21. NULL,
  22. };
  23. static bool ceph_is_valid_xattr(const char *name)
  24. {
  25. return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
  26. !strncmp(name, XATTR_SECURITY_PREFIX,
  27. XATTR_SECURITY_PREFIX_LEN) ||
  28. !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
  29. !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
  30. !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
  31. }
  32. /*
  33. * These define virtual xattrs exposing the recursive directory
  34. * statistics and layout metadata.
  35. */
  36. struct ceph_vxattr {
  37. char *name;
  38. size_t name_size; /* strlen(name) + 1 (for '\0') */
  39. size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
  40. size_t size);
  41. bool readonly, hidden;
  42. bool (*exists_cb)(struct ceph_inode_info *ci);
  43. };
  44. /* layouts */
  45. static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
  46. {
  47. size_t s;
  48. char *p = (char *)&ci->i_layout;
  49. for (s = 0; s < sizeof(ci->i_layout); s++, p++)
  50. if (*p)
  51. return true;
  52. return false;
  53. }
  54. static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
  55. size_t size)
  56. {
  57. int ret;
  58. struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
  59. struct ceph_osd_client *osdc = &fsc->client->osdc;
  60. s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
  61. const char *pool_name;
  62. dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
  63. down_read(&osdc->map_sem);
  64. pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
  65. if (pool_name)
  66. ret = snprintf(val, size,
  67. "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s",
  68. (unsigned long long)ceph_file_layout_su(ci->i_layout),
  69. (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
  70. (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
  71. pool_name);
  72. else
  73. ret = snprintf(val, size,
  74. "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
  75. (unsigned long long)ceph_file_layout_su(ci->i_layout),
  76. (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
  77. (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
  78. (unsigned long long)pool);
  79. up_read(&osdc->map_sem);
  80. return ret;
  81. }
  82. static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
  83. char *val, size_t size)
  84. {
  85. return snprintf(val, size, "%lld",
  86. (unsigned long long)ceph_file_layout_su(ci->i_layout));
  87. }
  88. static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
  89. char *val, size_t size)
  90. {
  91. return snprintf(val, size, "%lld",
  92. (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout));
  93. }
  94. static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
  95. char *val, size_t size)
  96. {
  97. return snprintf(val, size, "%lld",
  98. (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
  99. }
  100. static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
  101. char *val, size_t size)
  102. {
  103. int ret;
  104. struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
  105. struct ceph_osd_client *osdc = &fsc->client->osdc;
  106. s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
  107. const char *pool_name;
  108. down_read(&osdc->map_sem);
  109. pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
  110. if (pool_name)
  111. ret = snprintf(val, size, "%s", pool_name);
  112. else
  113. ret = snprintf(val, size, "%lld", (unsigned long long)pool);
  114. up_read(&osdc->map_sem);
  115. return ret;
  116. }
  117. /* directories */
  118. static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
  119. size_t size)
  120. {
  121. return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
  122. }
  123. static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
  124. size_t size)
  125. {
  126. return snprintf(val, size, "%lld", ci->i_files);
  127. }
  128. static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
  129. size_t size)
  130. {
  131. return snprintf(val, size, "%lld", ci->i_subdirs);
  132. }
  133. static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
  134. size_t size)
  135. {
  136. return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
  137. }
  138. static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
  139. size_t size)
  140. {
  141. return snprintf(val, size, "%lld", ci->i_rfiles);
  142. }
  143. static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
  144. size_t size)
  145. {
  146. return snprintf(val, size, "%lld", ci->i_rsubdirs);
  147. }
  148. static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
  149. size_t size)
  150. {
  151. return snprintf(val, size, "%lld", ci->i_rbytes);
  152. }
  153. static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
  154. size_t size)
  155. {
  156. return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
  157. (long)ci->i_rctime.tv_nsec);
  158. }
  159. #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
  160. #define CEPH_XATTR_NAME2(_type, _name, _name2) \
  161. XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
  162. #define XATTR_NAME_CEPH(_type, _name) \
  163. { \
  164. .name = CEPH_XATTR_NAME(_type, _name), \
  165. .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
  166. .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
  167. .readonly = true, \
  168. .hidden = false, \
  169. .exists_cb = NULL, \
  170. }
  171. #define XATTR_LAYOUT_FIELD(_type, _name, _field) \
  172. { \
  173. .name = CEPH_XATTR_NAME2(_type, _name, _field), \
  174. .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
  175. .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
  176. .readonly = false, \
  177. .hidden = true, \
  178. .exists_cb = ceph_vxattrcb_layout_exists, \
  179. }
  180. static struct ceph_vxattr ceph_dir_vxattrs[] = {
  181. {
  182. .name = "ceph.dir.layout",
  183. .name_size = sizeof("ceph.dir.layout"),
  184. .getxattr_cb = ceph_vxattrcb_layout,
  185. .readonly = false,
  186. .hidden = false,
  187. .exists_cb = ceph_vxattrcb_layout_exists,
  188. },
  189. XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
  190. XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
  191. XATTR_LAYOUT_FIELD(dir, layout, object_size),
  192. XATTR_LAYOUT_FIELD(dir, layout, pool),
  193. XATTR_NAME_CEPH(dir, entries),
  194. XATTR_NAME_CEPH(dir, files),
  195. XATTR_NAME_CEPH(dir, subdirs),
  196. XATTR_NAME_CEPH(dir, rentries),
  197. XATTR_NAME_CEPH(dir, rfiles),
  198. XATTR_NAME_CEPH(dir, rsubdirs),
  199. XATTR_NAME_CEPH(dir, rbytes),
  200. XATTR_NAME_CEPH(dir, rctime),
  201. { .name = NULL, 0 } /* Required table terminator */
  202. };
  203. static size_t ceph_dir_vxattrs_name_size; /* total size of all names */
  204. /* files */
  205. static struct ceph_vxattr ceph_file_vxattrs[] = {
  206. {
  207. .name = "ceph.file.layout",
  208. .name_size = sizeof("ceph.file.layout"),
  209. .getxattr_cb = ceph_vxattrcb_layout,
  210. .readonly = false,
  211. .hidden = false,
  212. .exists_cb = ceph_vxattrcb_layout_exists,
  213. },
  214. XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
  215. XATTR_LAYOUT_FIELD(file, layout, stripe_count),
  216. XATTR_LAYOUT_FIELD(file, layout, object_size),
  217. XATTR_LAYOUT_FIELD(file, layout, pool),
  218. { .name = NULL, 0 } /* Required table terminator */
  219. };
  220. static size_t ceph_file_vxattrs_name_size; /* total size of all names */
  221. static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
  222. {
  223. if (S_ISDIR(inode->i_mode))
  224. return ceph_dir_vxattrs;
  225. else if (S_ISREG(inode->i_mode))
  226. return ceph_file_vxattrs;
  227. return NULL;
  228. }
  229. static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
  230. {
  231. if (vxattrs == ceph_dir_vxattrs)
  232. return ceph_dir_vxattrs_name_size;
  233. if (vxattrs == ceph_file_vxattrs)
  234. return ceph_file_vxattrs_name_size;
  235. BUG();
  236. return 0;
  237. }
  238. /*
  239. * Compute the aggregate size (including terminating '\0') of all
  240. * virtual extended attribute names in the given vxattr table.
  241. */
  242. static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
  243. {
  244. struct ceph_vxattr *vxattr;
  245. size_t size = 0;
  246. for (vxattr = vxattrs; vxattr->name; vxattr++)
  247. if (!vxattr->hidden)
  248. size += vxattr->name_size;
  249. return size;
  250. }
  251. /* Routines called at initialization and exit time */
  252. void __init ceph_xattr_init(void)
  253. {
  254. ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
  255. ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
  256. }
  257. void ceph_xattr_exit(void)
  258. {
  259. ceph_dir_vxattrs_name_size = 0;
  260. ceph_file_vxattrs_name_size = 0;
  261. }
  262. static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
  263. const char *name)
  264. {
  265. struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
  266. if (vxattr) {
  267. while (vxattr->name) {
  268. if (!strcmp(vxattr->name, name))
  269. return vxattr;
  270. vxattr++;
  271. }
  272. }
  273. return NULL;
  274. }
  275. static int __set_xattr(struct ceph_inode_info *ci,
  276. const char *name, int name_len,
  277. const char *val, int val_len,
  278. int flags, int update_xattr,
  279. struct ceph_inode_xattr **newxattr)
  280. {
  281. struct rb_node **p;
  282. struct rb_node *parent = NULL;
  283. struct ceph_inode_xattr *xattr = NULL;
  284. int c;
  285. int new = 0;
  286. p = &ci->i_xattrs.index.rb_node;
  287. while (*p) {
  288. parent = *p;
  289. xattr = rb_entry(parent, struct ceph_inode_xattr, node);
  290. c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
  291. if (c < 0)
  292. p = &(*p)->rb_left;
  293. else if (c > 0)
  294. p = &(*p)->rb_right;
  295. else {
  296. if (name_len == xattr->name_len)
  297. break;
  298. else if (name_len < xattr->name_len)
  299. p = &(*p)->rb_left;
  300. else
  301. p = &(*p)->rb_right;
  302. }
  303. xattr = NULL;
  304. }
  305. if (update_xattr) {
  306. int err = 0;
  307. if (xattr && (flags & XATTR_CREATE))
  308. err = -EEXIST;
  309. else if (!xattr && (flags & XATTR_REPLACE))
  310. err = -ENODATA;
  311. if (err) {
  312. kfree(name);
  313. kfree(val);
  314. return err;
  315. }
  316. if (update_xattr < 0) {
  317. if (xattr)
  318. __remove_xattr(ci, xattr);
  319. kfree(name);
  320. return 0;
  321. }
  322. }
  323. if (!xattr) {
  324. new = 1;
  325. xattr = *newxattr;
  326. xattr->name = name;
  327. xattr->name_len = name_len;
  328. xattr->should_free_name = update_xattr;
  329. ci->i_xattrs.count++;
  330. dout("__set_xattr count=%d\n", ci->i_xattrs.count);
  331. } else {
  332. kfree(*newxattr);
  333. *newxattr = NULL;
  334. if (xattr->should_free_val)
  335. kfree((void *)xattr->val);
  336. if (update_xattr) {
  337. kfree((void *)name);
  338. name = xattr->name;
  339. }
  340. ci->i_xattrs.names_size -= xattr->name_len;
  341. ci->i_xattrs.vals_size -= xattr->val_len;
  342. }
  343. ci->i_xattrs.names_size += name_len;
  344. ci->i_xattrs.vals_size += val_len;
  345. if (val)
  346. xattr->val = val;
  347. else
  348. xattr->val = "";
  349. xattr->val_len = val_len;
  350. xattr->dirty = update_xattr;
  351. xattr->should_free_val = (val && update_xattr);
  352. if (new) {
  353. rb_link_node(&xattr->node, parent, p);
  354. rb_insert_color(&xattr->node, &ci->i_xattrs.index);
  355. dout("__set_xattr_val p=%p\n", p);
  356. }
  357. dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
  358. ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);
  359. return 0;
  360. }
  361. static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
  362. const char *name)
  363. {
  364. struct rb_node **p;
  365. struct rb_node *parent = NULL;
  366. struct ceph_inode_xattr *xattr = NULL;
  367. int name_len = strlen(name);
  368. int c;
  369. p = &ci->i_xattrs.index.rb_node;
  370. while (*p) {
  371. parent = *p;
  372. xattr = rb_entry(parent, struct ceph_inode_xattr, node);
  373. c = strncmp(name, xattr->name, xattr->name_len);
  374. if (c == 0 && name_len > xattr->name_len)
  375. c = 1;
  376. if (c < 0)
  377. p = &(*p)->rb_left;
  378. else if (c > 0)
  379. p = &(*p)->rb_right;
  380. else {
  381. dout("__get_xattr %s: found %.*s\n", name,
  382. xattr->val_len, xattr->val);
  383. return xattr;
  384. }
  385. }
  386. dout("__get_xattr %s: not found\n", name);
  387. return NULL;
  388. }
  389. static void __free_xattr(struct ceph_inode_xattr *xattr)
  390. {
  391. BUG_ON(!xattr);
  392. if (xattr->should_free_name)
  393. kfree((void *)xattr->name);
  394. if (xattr->should_free_val)
  395. kfree((void *)xattr->val);
  396. kfree(xattr);
  397. }
  398. static int __remove_xattr(struct ceph_inode_info *ci,
  399. struct ceph_inode_xattr *xattr)
  400. {
  401. if (!xattr)
  402. return -ENODATA;
  403. rb_erase(&xattr->node, &ci->i_xattrs.index);
  404. if (xattr->should_free_name)
  405. kfree((void *)xattr->name);
  406. if (xattr->should_free_val)
  407. kfree((void *)xattr->val);
  408. ci->i_xattrs.names_size -= xattr->name_len;
  409. ci->i_xattrs.vals_size -= xattr->val_len;
  410. ci->i_xattrs.count--;
  411. kfree(xattr);
  412. return 0;
  413. }
  414. static int __remove_xattr_by_name(struct ceph_inode_info *ci,
  415. const char *name)
  416. {
  417. struct rb_node **p;
  418. struct ceph_inode_xattr *xattr;
  419. int err;
  420. p = &ci->i_xattrs.index.rb_node;
  421. xattr = __get_xattr(ci, name);
  422. err = __remove_xattr(ci, xattr);
  423. return err;
  424. }
  425. static char *__copy_xattr_names(struct ceph_inode_info *ci,
  426. char *dest)
  427. {
  428. struct rb_node *p;
  429. struct ceph_inode_xattr *xattr = NULL;
  430. p = rb_first(&ci->i_xattrs.index);
  431. dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
  432. while (p) {
  433. xattr = rb_entry(p, struct ceph_inode_xattr, node);
  434. memcpy(dest, xattr->name, xattr->name_len);
  435. dest[xattr->name_len] = '\0';
  436. dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
  437. xattr->name_len, ci->i_xattrs.names_size);
  438. dest += xattr->name_len + 1;
  439. p = rb_next(p);
  440. }
  441. return dest;
  442. }
  443. void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
  444. {
  445. struct rb_node *p, *tmp;
  446. struct ceph_inode_xattr *xattr = NULL;
  447. p = rb_first(&ci->i_xattrs.index);
  448. dout("__ceph_destroy_xattrs p=%p\n", p);
  449. while (p) {
  450. xattr = rb_entry(p, struct ceph_inode_xattr, node);
  451. tmp = p;
  452. p = rb_next(tmp);
  453. dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
  454. xattr->name_len, xattr->name);
  455. rb_erase(tmp, &ci->i_xattrs.index);
  456. __free_xattr(xattr);
  457. }
  458. ci->i_xattrs.names_size = 0;
  459. ci->i_xattrs.vals_size = 0;
  460. ci->i_xattrs.index_version = 0;
  461. ci->i_xattrs.count = 0;
  462. ci->i_xattrs.index = RB_ROOT;
  463. }
  464. static int __build_xattrs(struct inode *inode)
  465. __releases(ci->i_ceph_lock)
  466. __acquires(ci->i_ceph_lock)
  467. {
  468. u32 namelen;
  469. u32 numattr = 0;
  470. void *p, *end;
  471. u32 len;
  472. const char *name, *val;
  473. struct ceph_inode_info *ci = ceph_inode(inode);
  474. int xattr_version;
  475. struct ceph_inode_xattr **xattrs = NULL;
  476. int err = 0;
  477. int i;
  478. dout("__build_xattrs() len=%d\n",
  479. ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
  480. if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
  481. return 0; /* already built */
  482. __ceph_destroy_xattrs(ci);
  483. start:
  484. /* updated internal xattr rb tree */
  485. if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
  486. p = ci->i_xattrs.blob->vec.iov_base;
  487. end = p + ci->i_xattrs.blob->vec.iov_len;
  488. ceph_decode_32_safe(&p, end, numattr, bad);
  489. xattr_version = ci->i_xattrs.version;
  490. spin_unlock(&ci->i_ceph_lock);
  491. xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
  492. GFP_NOFS);
  493. err = -ENOMEM;
  494. if (!xattrs)
  495. goto bad_lock;
  496. memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *));
  497. for (i = 0; i < numattr; i++) {
  498. xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
  499. GFP_NOFS);
  500. if (!xattrs[i])
  501. goto bad_lock;
  502. }
  503. spin_lock(&ci->i_ceph_lock);
  504. if (ci->i_xattrs.version != xattr_version) {
  505. /* lost a race, retry */
  506. for (i = 0; i < numattr; i++)
  507. kfree(xattrs[i]);
  508. kfree(xattrs);
  509. xattrs = NULL;
  510. goto start;
  511. }
  512. err = -EIO;
  513. while (numattr--) {
  514. ceph_decode_32_safe(&p, end, len, bad);
  515. namelen = len;
  516. name = p;
  517. p += len;
  518. ceph_decode_32_safe(&p, end, len, bad);
  519. val = p;
  520. p += len;
  521. err = __set_xattr(ci, name, namelen, val, len,
  522. 0, 0, &xattrs[numattr]);
  523. if (err < 0)
  524. goto bad;
  525. }
  526. kfree(xattrs);
  527. }
  528. ci->i_xattrs.index_version = ci->i_xattrs.version;
  529. ci->i_xattrs.dirty = false;
  530. return err;
  531. bad_lock:
  532. spin_lock(&ci->i_ceph_lock);
  533. bad:
  534. if (xattrs) {
  535. for (i = 0; i < numattr; i++)
  536. kfree(xattrs[i]);
  537. kfree(xattrs);
  538. }
  539. ci->i_xattrs.names_size = 0;
  540. return err;
  541. }
  542. static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
  543. int val_size)
  544. {
  545. /*
  546. * 4 bytes for the length, and additional 4 bytes per each xattr name,
  547. * 4 bytes per each value
  548. */
  549. int size = 4 + ci->i_xattrs.count*(4 + 4) +
  550. ci->i_xattrs.names_size +
  551. ci->i_xattrs.vals_size;
  552. dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
  553. ci->i_xattrs.count, ci->i_xattrs.names_size,
  554. ci->i_xattrs.vals_size);
  555. if (name_size)
  556. size += 4 + 4 + name_size + val_size;
  557. return size;
  558. }
  559. /*
  560. * If there are dirty xattrs, reencode xattrs into the prealloc_blob
  561. * and swap into place.
  562. */
  563. void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
  564. {
  565. struct rb_node *p;
  566. struct ceph_inode_xattr *xattr = NULL;
  567. void *dest;
  568. dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
  569. if (ci->i_xattrs.dirty) {
  570. int need = __get_required_blob_size(ci, 0, 0);
  571. BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
  572. p = rb_first(&ci->i_xattrs.index);
  573. dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
  574. ceph_encode_32(&dest, ci->i_xattrs.count);
  575. while (p) {
  576. xattr = rb_entry(p, struct ceph_inode_xattr, node);
  577. ceph_encode_32(&dest, xattr->name_len);
  578. memcpy(dest, xattr->name, xattr->name_len);
  579. dest += xattr->name_len;
  580. ceph_encode_32(&dest, xattr->val_len);
  581. memcpy(dest, xattr->val, xattr->val_len);
  582. dest += xattr->val_len;
  583. p = rb_next(p);
  584. }
  585. /* adjust buffer len; it may be larger than we need */
  586. ci->i_xattrs.prealloc_blob->vec.iov_len =
  587. dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
  588. if (ci->i_xattrs.blob)
  589. ceph_buffer_put(ci->i_xattrs.blob);
  590. ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
  591. ci->i_xattrs.prealloc_blob = NULL;
  592. ci->i_xattrs.dirty = false;
  593. ci->i_xattrs.version++;
  594. }
  595. }
  596. ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
  597. size_t size)
  598. {
  599. struct ceph_inode_info *ci = ceph_inode(inode);
  600. int err;
  601. struct ceph_inode_xattr *xattr;
  602. struct ceph_vxattr *vxattr = NULL;
  603. if (!ceph_is_valid_xattr(name))
  604. return -ENODATA;
  605. /* let's see if a virtual xattr was requested */
  606. vxattr = ceph_match_vxattr(inode, name);
  607. if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
  608. err = vxattr->getxattr_cb(ci, value, size);
  609. return err;
  610. }
  611. spin_lock(&ci->i_ceph_lock);
  612. dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
  613. ci->i_xattrs.version, ci->i_xattrs.index_version);
  614. if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
  615. (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
  616. goto get_xattr;
  617. } else {
  618. spin_unlock(&ci->i_ceph_lock);
  619. /* get xattrs from mds (if we don't already have them) */
  620. err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
  621. if (err)
  622. return err;
  623. }
  624. spin_lock(&ci->i_ceph_lock);
  625. err = __build_xattrs(inode);
  626. if (err < 0)
  627. goto out;
  628. get_xattr:
  629. err = -ENODATA; /* == ENOATTR */
  630. xattr = __get_xattr(ci, name);
  631. if (!xattr)
  632. goto out;
  633. err = -ERANGE;
  634. if (size && size < xattr->val_len)
  635. goto out;
  636. err = xattr->val_len;
  637. if (size == 0)
  638. goto out;
  639. memcpy(value, xattr->val, xattr->val_len);
  640. out:
  641. spin_unlock(&ci->i_ceph_lock);
  642. return err;
  643. }
  644. ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
  645. size_t size)
  646. {
  647. if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
  648. return generic_getxattr(dentry, name, value, size);
  649. return __ceph_getxattr(dentry->d_inode, name, value, size);
  650. }
  651. ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
  652. {
  653. struct inode *inode = dentry->d_inode;
  654. struct ceph_inode_info *ci = ceph_inode(inode);
  655. struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
  656. u32 vir_namelen = 0;
  657. u32 namelen;
  658. int err;
  659. u32 len;
  660. int i;
  661. spin_lock(&ci->i_ceph_lock);
  662. dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
  663. ci->i_xattrs.version, ci->i_xattrs.index_version);
  664. if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
  665. (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
  666. goto list_xattr;
  667. } else {
  668. spin_unlock(&ci->i_ceph_lock);
  669. err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
  670. if (err)
  671. return err;
  672. }
  673. spin_lock(&ci->i_ceph_lock);
  674. err = __build_xattrs(inode);
  675. if (err < 0)
  676. goto out;
  677. list_xattr:
  678. /*
  679. * Start with virtual dir xattr names (if any) (including
  680. * terminating '\0' characters for each).
  681. */
  682. vir_namelen = ceph_vxattrs_name_size(vxattrs);
  683. /* adding 1 byte per each variable due to the null termination */
  684. namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
  685. err = -ERANGE;
  686. if (size && vir_namelen + namelen > size)
  687. goto out;
  688. err = namelen + vir_namelen;
  689. if (size == 0)
  690. goto out;
  691. names = __copy_xattr_names(ci, names);
  692. /* virtual xattr names, too */
  693. err = namelen;
  694. if (vxattrs) {
  695. for (i = 0; vxattrs[i].name; i++) {
  696. if (!vxattrs[i].hidden &&
  697. !(vxattrs[i].exists_cb &&
  698. !vxattrs[i].exists_cb(ci))) {
  699. len = sprintf(names, "%s", vxattrs[i].name);
  700. names += len + 1;
  701. err += len + 1;
  702. }
  703. }
  704. }
  705. out:
  706. spin_unlock(&ci->i_ceph_lock);
  707. return err;
  708. }
  709. static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
  710. const char *value, size_t size, int flags)
  711. {
  712. struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
  713. struct inode *inode = dentry->d_inode;
  714. struct ceph_inode_info *ci = ceph_inode(inode);
  715. struct inode *parent_inode;
  716. struct ceph_mds_request *req;
  717. struct ceph_mds_client *mdsc = fsc->mdsc;
  718. int err;
  719. int i, nr_pages;
  720. struct page **pages = NULL;
  721. void *kaddr;
  722. /* copy value into some pages */
  723. nr_pages = calc_pages_for(0, size);
  724. if (nr_pages) {
  725. pages = kmalloc(sizeof(pages[0])*nr_pages, GFP_NOFS);
  726. if (!pages)
  727. return -ENOMEM;
  728. err = -ENOMEM;
  729. for (i = 0; i < nr_pages; i++) {
  730. pages[i] = __page_cache_alloc(GFP_NOFS);
  731. if (!pages[i]) {
  732. nr_pages = i;
  733. goto out;
  734. }
  735. kaddr = kmap(pages[i]);
  736. memcpy(kaddr, value + i*PAGE_CACHE_SIZE,
  737. min(PAGE_CACHE_SIZE, size-i*PAGE_CACHE_SIZE));
  738. }
  739. }
  740. dout("setxattr value=%.*s\n", (int)size, value);
  741. if (!value)
  742. flags |= CEPH_XATTR_REMOVE;
  743. /* do request */
  744. req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR,
  745. USE_AUTH_MDS);
  746. if (IS_ERR(req)) {
  747. err = PTR_ERR(req);
  748. goto out;
  749. }
  750. req->r_inode = inode;
  751. ihold(inode);
  752. req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
  753. req->r_num_caps = 1;
  754. req->r_args.setxattr.flags = cpu_to_le32(flags);
  755. req->r_path2 = kstrdup(name, GFP_NOFS);
  756. req->r_pages = pages;
  757. req->r_num_pages = nr_pages;
  758. req->r_data_len = size;
  759. dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
  760. parent_inode = ceph_get_dentry_parent_inode(dentry);
  761. err = ceph_mdsc_do_request(mdsc, parent_inode, req);
  762. iput(parent_inode);
  763. ceph_mdsc_put_request(req);
  764. dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
  765. out:
  766. if (pages) {
  767. for (i = 0; i < nr_pages; i++)
  768. __free_page(pages[i]);
  769. kfree(pages);
  770. }
  771. return err;
  772. }
  773. int __ceph_setxattr(struct dentry *dentry, const char *name,
  774. const void *value, size_t size, int flags)
  775. {
  776. struct inode *inode = dentry->d_inode;
  777. struct ceph_vxattr *vxattr;
  778. struct ceph_inode_info *ci = ceph_inode(inode);
  779. int issued;
  780. int err;
  781. int dirty = 0;
  782. int name_len = strlen(name);
  783. int val_len = size;
  784. char *newname = NULL;
  785. char *newval = NULL;
  786. struct ceph_inode_xattr *xattr = NULL;
  787. int required_blob_size;
  788. if (!ceph_is_valid_xattr(name))
  789. return -EOPNOTSUPP;
  790. vxattr = ceph_match_vxattr(inode, name);
  791. if (vxattr && vxattr->readonly)
  792. return -EOPNOTSUPP;
  793. /* pass any unhandled ceph.* xattrs through to the MDS */
  794. if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
  795. goto do_sync_unlocked;
  796. /* preallocate memory for xattr name, value, index node */
  797. err = -ENOMEM;
  798. newname = kmemdup(name, name_len + 1, GFP_NOFS);
  799. if (!newname)
  800. goto out;
  801. if (val_len) {
  802. newval = kmemdup(value, val_len, GFP_NOFS);
  803. if (!newval)
  804. goto out;
  805. }
  806. xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
  807. if (!xattr)
  808. goto out;
  809. spin_lock(&ci->i_ceph_lock);
  810. retry:
  811. issued = __ceph_caps_issued(ci, NULL);
  812. dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
  813. if (!(issued & CEPH_CAP_XATTR_EXCL))
  814. goto do_sync;
  815. __build_xattrs(inode);
  816. required_blob_size = __get_required_blob_size(ci, name_len, val_len);
  817. if (!ci->i_xattrs.prealloc_blob ||
  818. required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
  819. struct ceph_buffer *blob;
  820. spin_unlock(&ci->i_ceph_lock);
  821. dout(" preaallocating new blob size=%d\n", required_blob_size);
  822. blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
  823. if (!blob)
  824. goto out;
  825. spin_lock(&ci->i_ceph_lock);
  826. if (ci->i_xattrs.prealloc_blob)
  827. ceph_buffer_put(ci->i_xattrs.prealloc_blob);
  828. ci->i_xattrs.prealloc_blob = blob;
  829. goto retry;
  830. }
  831. err = __set_xattr(ci, newname, name_len, newval, val_len,
  832. flags, value ? 1 : -1, &xattr);
  833. if (!err) {
  834. dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
  835. ci->i_xattrs.dirty = true;
  836. inode->i_ctime = CURRENT_TIME;
  837. }
  838. spin_unlock(&ci->i_ceph_lock);
  839. if (dirty)
  840. __mark_inode_dirty(inode, dirty);
  841. return err;
  842. do_sync:
  843. spin_unlock(&ci->i_ceph_lock);
  844. do_sync_unlocked:
  845. err = ceph_sync_setxattr(dentry, name, value, size, flags);
  846. out:
  847. kfree(newname);
  848. kfree(newval);
  849. kfree(xattr);
  850. return err;
  851. }
  852. int ceph_setxattr(struct dentry *dentry, const char *name,
  853. const void *value, size_t size, int flags)
  854. {
  855. if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
  856. return -EROFS;
  857. if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
  858. return generic_setxattr(dentry, name, value, size, flags);
  859. return __ceph_setxattr(dentry, name, value, size, flags);
  860. }
  861. static int ceph_send_removexattr(struct dentry *dentry, const char *name)
  862. {
  863. struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
  864. struct ceph_mds_client *mdsc = fsc->mdsc;
  865. struct inode *inode = dentry->d_inode;
  866. struct inode *parent_inode;
  867. struct ceph_mds_request *req;
  868. int err;
  869. req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR,
  870. USE_AUTH_MDS);
  871. if (IS_ERR(req))
  872. return PTR_ERR(req);
  873. req->r_inode = inode;
  874. ihold(inode);
  875. req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
  876. req->r_num_caps = 1;
  877. req->r_path2 = kstrdup(name, GFP_NOFS);
  878. parent_inode = ceph_get_dentry_parent_inode(dentry);
  879. err = ceph_mdsc_do_request(mdsc, parent_inode, req);
  880. iput(parent_inode);
  881. ceph_mdsc_put_request(req);
  882. return err;
  883. }
  884. int __ceph_removexattr(struct dentry *dentry, const char *name)
  885. {
  886. struct inode *inode = dentry->d_inode;
  887. struct ceph_vxattr *vxattr;
  888. struct ceph_inode_info *ci = ceph_inode(inode);
  889. int issued;
  890. int err;
  891. int required_blob_size;
  892. int dirty;
  893. if (!ceph_is_valid_xattr(name))
  894. return -EOPNOTSUPP;
  895. vxattr = ceph_match_vxattr(inode, name);
  896. if (vxattr && vxattr->readonly)
  897. return -EOPNOTSUPP;
  898. /* pass any unhandled ceph.* xattrs through to the MDS */
  899. if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
  900. goto do_sync_unlocked;
  901. err = -ENOMEM;
  902. spin_lock(&ci->i_ceph_lock);
  903. retry:
  904. issued = __ceph_caps_issued(ci, NULL);
  905. dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
  906. if (!(issued & CEPH_CAP_XATTR_EXCL))
  907. goto do_sync;
  908. __build_xattrs(inode);
  909. required_blob_size = __get_required_blob_size(ci, 0, 0);
  910. if (!ci->i_xattrs.prealloc_blob ||
  911. required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
  912. struct ceph_buffer *blob;
  913. spin_unlock(&ci->i_ceph_lock);
  914. dout(" preaallocating new blob size=%d\n", required_blob_size);
  915. blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
  916. if (!blob)
  917. goto out;
  918. spin_lock(&ci->i_ceph_lock);
  919. if (ci->i_xattrs.prealloc_blob)
  920. ceph_buffer_put(ci->i_xattrs.prealloc_blob);
  921. ci->i_xattrs.prealloc_blob = blob;
  922. goto retry;
  923. }
  924. err = __remove_xattr_by_name(ceph_inode(inode), name);
  925. dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
  926. ci->i_xattrs.dirty = true;
  927. inode->i_ctime = CURRENT_TIME;
  928. spin_unlock(&ci->i_ceph_lock);
  929. if (dirty)
  930. __mark_inode_dirty(inode, dirty);
  931. return err;
  932. do_sync:
  933. spin_unlock(&ci->i_ceph_lock);
  934. do_sync_unlocked:
  935. err = ceph_send_removexattr(dentry, name);
  936. out:
  937. return err;
  938. }
  939. int ceph_removexattr(struct dentry *dentry, const char *name)
  940. {
  941. if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
  942. return -EROFS;
  943. if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
  944. return generic_removexattr(dentry, name);
  945. return __ceph_removexattr(dentry, name);
  946. }