xfs_inode_item.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
  4. * All Rights Reserved.
  5. */
  6. #include "xfs.h"
  7. #include "xfs_fs.h"
  8. #include "xfs_format.h"
  9. #include "xfs_log_format.h"
  10. #include "xfs_trans_resv.h"
  11. #include "xfs_mount.h"
  12. #include "xfs_inode.h"
  13. #include "xfs_trans.h"
  14. #include "xfs_inode_item.h"
  15. #include "xfs_error.h"
  16. #include "xfs_trace.h"
  17. #include "xfs_trans_priv.h"
  18. #include "xfs_buf_item.h"
  19. #include "xfs_log.h"
  20. #include <linux/iversion.h>
  21. kmem_zone_t *xfs_ili_zone; /* inode log item zone */
  22. static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
  23. {
  24. return container_of(lip, struct xfs_inode_log_item, ili_item);
  25. }
  26. STATIC void
  27. xfs_inode_item_data_fork_size(
  28. struct xfs_inode_log_item *iip,
  29. int *nvecs,
  30. int *nbytes)
  31. {
  32. struct xfs_inode *ip = iip->ili_inode;
  33. switch (ip->i_d.di_format) {
  34. case XFS_DINODE_FMT_EXTENTS:
  35. if ((iip->ili_fields & XFS_ILOG_DEXT) &&
  36. ip->i_d.di_nextents > 0 &&
  37. ip->i_df.if_bytes > 0) {
  38. /* worst case, doesn't subtract delalloc extents */
  39. *nbytes += XFS_IFORK_DSIZE(ip);
  40. *nvecs += 1;
  41. }
  42. break;
  43. case XFS_DINODE_FMT_BTREE:
  44. if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
  45. ip->i_df.if_broot_bytes > 0) {
  46. *nbytes += ip->i_df.if_broot_bytes;
  47. *nvecs += 1;
  48. }
  49. break;
  50. case XFS_DINODE_FMT_LOCAL:
  51. if ((iip->ili_fields & XFS_ILOG_DDATA) &&
  52. ip->i_df.if_bytes > 0) {
  53. *nbytes += roundup(ip->i_df.if_bytes, 4);
  54. *nvecs += 1;
  55. }
  56. break;
  57. case XFS_DINODE_FMT_DEV:
  58. break;
  59. default:
  60. ASSERT(0);
  61. break;
  62. }
  63. }
  64. STATIC void
  65. xfs_inode_item_attr_fork_size(
  66. struct xfs_inode_log_item *iip,
  67. int *nvecs,
  68. int *nbytes)
  69. {
  70. struct xfs_inode *ip = iip->ili_inode;
  71. switch (ip->i_d.di_aformat) {
  72. case XFS_DINODE_FMT_EXTENTS:
  73. if ((iip->ili_fields & XFS_ILOG_AEXT) &&
  74. ip->i_d.di_anextents > 0 &&
  75. ip->i_afp->if_bytes > 0) {
  76. /* worst case, doesn't subtract unused space */
  77. *nbytes += XFS_IFORK_ASIZE(ip);
  78. *nvecs += 1;
  79. }
  80. break;
  81. case XFS_DINODE_FMT_BTREE:
  82. if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
  83. ip->i_afp->if_broot_bytes > 0) {
  84. *nbytes += ip->i_afp->if_broot_bytes;
  85. *nvecs += 1;
  86. }
  87. break;
  88. case XFS_DINODE_FMT_LOCAL:
  89. if ((iip->ili_fields & XFS_ILOG_ADATA) &&
  90. ip->i_afp->if_bytes > 0) {
  91. *nbytes += roundup(ip->i_afp->if_bytes, 4);
  92. *nvecs += 1;
  93. }
  94. break;
  95. default:
  96. ASSERT(0);
  97. break;
  98. }
  99. }
  100. /*
  101. * This returns the number of iovecs needed to log the given inode item.
  102. *
  103. * We need one iovec for the inode log format structure, one for the
  104. * inode core, and possibly one for the inode data/extents/b-tree root
  105. * and one for the inode attribute data/extents/b-tree root.
  106. */
  107. STATIC void
  108. xfs_inode_item_size(
  109. struct xfs_log_item *lip,
  110. int *nvecs,
  111. int *nbytes)
  112. {
  113. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  114. struct xfs_inode *ip = iip->ili_inode;
  115. *nvecs += 2;
  116. *nbytes += sizeof(struct xfs_inode_log_format) +
  117. xfs_log_dinode_size(ip->i_d.di_version);
  118. xfs_inode_item_data_fork_size(iip, nvecs, nbytes);
  119. if (XFS_IFORK_Q(ip))
  120. xfs_inode_item_attr_fork_size(iip, nvecs, nbytes);
  121. }
  122. STATIC void
  123. xfs_inode_item_format_data_fork(
  124. struct xfs_inode_log_item *iip,
  125. struct xfs_inode_log_format *ilf,
  126. struct xfs_log_vec *lv,
  127. struct xfs_log_iovec **vecp)
  128. {
  129. struct xfs_inode *ip = iip->ili_inode;
  130. size_t data_bytes;
  131. switch (ip->i_d.di_format) {
  132. case XFS_DINODE_FMT_EXTENTS:
  133. iip->ili_fields &=
  134. ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
  135. if ((iip->ili_fields & XFS_ILOG_DEXT) &&
  136. ip->i_d.di_nextents > 0 &&
  137. ip->i_df.if_bytes > 0) {
  138. struct xfs_bmbt_rec *p;
  139. ASSERT(xfs_iext_count(&ip->i_df) > 0);
  140. p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT);
  141. data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK);
  142. xlog_finish_iovec(lv, *vecp, data_bytes);
  143. ASSERT(data_bytes <= ip->i_df.if_bytes);
  144. ilf->ilf_dsize = data_bytes;
  145. ilf->ilf_size++;
  146. } else {
  147. iip->ili_fields &= ~XFS_ILOG_DEXT;
  148. }
  149. break;
  150. case XFS_DINODE_FMT_BTREE:
  151. iip->ili_fields &=
  152. ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV);
  153. if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
  154. ip->i_df.if_broot_bytes > 0) {
  155. ASSERT(ip->i_df.if_broot != NULL);
  156. xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT,
  157. ip->i_df.if_broot,
  158. ip->i_df.if_broot_bytes);
  159. ilf->ilf_dsize = ip->i_df.if_broot_bytes;
  160. ilf->ilf_size++;
  161. } else {
  162. ASSERT(!(iip->ili_fields &
  163. XFS_ILOG_DBROOT));
  164. iip->ili_fields &= ~XFS_ILOG_DBROOT;
  165. }
  166. break;
  167. case XFS_DINODE_FMT_LOCAL:
  168. iip->ili_fields &=
  169. ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
  170. if ((iip->ili_fields & XFS_ILOG_DDATA) &&
  171. ip->i_df.if_bytes > 0) {
  172. /*
  173. * Round i_bytes up to a word boundary.
  174. * The underlying memory is guaranteed to
  175. * to be there by xfs_idata_realloc().
  176. */
  177. data_bytes = roundup(ip->i_df.if_bytes, 4);
  178. ASSERT(ip->i_df.if_real_bytes == 0 ||
  179. ip->i_df.if_real_bytes >= data_bytes);
  180. ASSERT(ip->i_df.if_u1.if_data != NULL);
  181. ASSERT(ip->i_d.di_size > 0);
  182. xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
  183. ip->i_df.if_u1.if_data, data_bytes);
  184. ilf->ilf_dsize = (unsigned)data_bytes;
  185. ilf->ilf_size++;
  186. } else {
  187. iip->ili_fields &= ~XFS_ILOG_DDATA;
  188. }
  189. break;
  190. case XFS_DINODE_FMT_DEV:
  191. iip->ili_fields &=
  192. ~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT | XFS_ILOG_DEXT);
  193. if (iip->ili_fields & XFS_ILOG_DEV)
  194. ilf->ilf_u.ilfu_rdev = sysv_encode_dev(VFS_I(ip)->i_rdev);
  195. break;
  196. default:
  197. ASSERT(0);
  198. break;
  199. }
  200. }
  201. STATIC void
  202. xfs_inode_item_format_attr_fork(
  203. struct xfs_inode_log_item *iip,
  204. struct xfs_inode_log_format *ilf,
  205. struct xfs_log_vec *lv,
  206. struct xfs_log_iovec **vecp)
  207. {
  208. struct xfs_inode *ip = iip->ili_inode;
  209. size_t data_bytes;
  210. switch (ip->i_d.di_aformat) {
  211. case XFS_DINODE_FMT_EXTENTS:
  212. iip->ili_fields &=
  213. ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT);
  214. if ((iip->ili_fields & XFS_ILOG_AEXT) &&
  215. ip->i_d.di_anextents > 0 &&
  216. ip->i_afp->if_bytes > 0) {
  217. struct xfs_bmbt_rec *p;
  218. ASSERT(xfs_iext_count(ip->i_afp) ==
  219. ip->i_d.di_anextents);
  220. p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT);
  221. data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK);
  222. xlog_finish_iovec(lv, *vecp, data_bytes);
  223. ilf->ilf_asize = data_bytes;
  224. ilf->ilf_size++;
  225. } else {
  226. iip->ili_fields &= ~XFS_ILOG_AEXT;
  227. }
  228. break;
  229. case XFS_DINODE_FMT_BTREE:
  230. iip->ili_fields &=
  231. ~(XFS_ILOG_ADATA | XFS_ILOG_AEXT);
  232. if ((iip->ili_fields & XFS_ILOG_ABROOT) &&
  233. ip->i_afp->if_broot_bytes > 0) {
  234. ASSERT(ip->i_afp->if_broot != NULL);
  235. xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT,
  236. ip->i_afp->if_broot,
  237. ip->i_afp->if_broot_bytes);
  238. ilf->ilf_asize = ip->i_afp->if_broot_bytes;
  239. ilf->ilf_size++;
  240. } else {
  241. iip->ili_fields &= ~XFS_ILOG_ABROOT;
  242. }
  243. break;
  244. case XFS_DINODE_FMT_LOCAL:
  245. iip->ili_fields &=
  246. ~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);
  247. if ((iip->ili_fields & XFS_ILOG_ADATA) &&
  248. ip->i_afp->if_bytes > 0) {
  249. /*
  250. * Round i_bytes up to a word boundary.
  251. * The underlying memory is guaranteed to
  252. * to be there by xfs_idata_realloc().
  253. */
  254. data_bytes = roundup(ip->i_afp->if_bytes, 4);
  255. ASSERT(ip->i_afp->if_real_bytes == 0 ||
  256. ip->i_afp->if_real_bytes >= data_bytes);
  257. ASSERT(ip->i_afp->if_u1.if_data != NULL);
  258. xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
  259. ip->i_afp->if_u1.if_data,
  260. data_bytes);
  261. ilf->ilf_asize = (unsigned)data_bytes;
  262. ilf->ilf_size++;
  263. } else {
  264. iip->ili_fields &= ~XFS_ILOG_ADATA;
  265. }
  266. break;
  267. default:
  268. ASSERT(0);
  269. break;
  270. }
  271. }
  272. static void
  273. xfs_inode_to_log_dinode(
  274. struct xfs_inode *ip,
  275. struct xfs_log_dinode *to,
  276. xfs_lsn_t lsn)
  277. {
  278. struct xfs_icdinode *from = &ip->i_d;
  279. struct inode *inode = VFS_I(ip);
  280. to->di_magic = XFS_DINODE_MAGIC;
  281. to->di_version = from->di_version;
  282. to->di_format = from->di_format;
  283. to->di_uid = from->di_uid;
  284. to->di_gid = from->di_gid;
  285. to->di_projid_lo = from->di_projid_lo;
  286. to->di_projid_hi = from->di_projid_hi;
  287. memset(to->di_pad, 0, sizeof(to->di_pad));
  288. memset(to->di_pad3, 0, sizeof(to->di_pad3));
  289. to->di_atime.t_sec = inode->i_atime.tv_sec;
  290. to->di_atime.t_nsec = inode->i_atime.tv_nsec;
  291. to->di_mtime.t_sec = inode->i_mtime.tv_sec;
  292. to->di_mtime.t_nsec = inode->i_mtime.tv_nsec;
  293. to->di_ctime.t_sec = inode->i_ctime.tv_sec;
  294. to->di_ctime.t_nsec = inode->i_ctime.tv_nsec;
  295. to->di_nlink = inode->i_nlink;
  296. to->di_gen = inode->i_generation;
  297. to->di_mode = inode->i_mode;
  298. to->di_size = from->di_size;
  299. to->di_nblocks = from->di_nblocks;
  300. to->di_extsize = from->di_extsize;
  301. to->di_nextents = from->di_nextents;
  302. to->di_anextents = from->di_anextents;
  303. to->di_forkoff = from->di_forkoff;
  304. to->di_aformat = from->di_aformat;
  305. to->di_dmevmask = from->di_dmevmask;
  306. to->di_dmstate = from->di_dmstate;
  307. to->di_flags = from->di_flags;
  308. /* log a dummy value to ensure log structure is fully initialised */
  309. to->di_next_unlinked = NULLAGINO;
  310. if (from->di_version == 3) {
  311. to->di_changecount = inode_peek_iversion(inode);
  312. to->di_crtime.t_sec = from->di_crtime.t_sec;
  313. to->di_crtime.t_nsec = from->di_crtime.t_nsec;
  314. to->di_flags2 = from->di_flags2;
  315. to->di_cowextsize = from->di_cowextsize;
  316. to->di_ino = ip->i_ino;
  317. to->di_lsn = lsn;
  318. memset(to->di_pad2, 0, sizeof(to->di_pad2));
  319. uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
  320. to->di_flushiter = 0;
  321. } else {
  322. to->di_flushiter = from->di_flushiter;
  323. }
  324. }
  325. /*
  326. * Format the inode core. Current timestamp data is only in the VFS inode
  327. * fields, so we need to grab them from there. Hence rather than just copying
  328. * the XFS inode core structure, format the fields directly into the iovec.
  329. */
  330. static void
  331. xfs_inode_item_format_core(
  332. struct xfs_inode *ip,
  333. struct xfs_log_vec *lv,
  334. struct xfs_log_iovec **vecp)
  335. {
  336. struct xfs_log_dinode *dic;
  337. dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE);
  338. xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn);
  339. xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_d.di_version));
  340. }
  341. /*
  342. * This is called to fill in the vector of log iovecs for the given inode
  343. * log item. It fills the first item with an inode log format structure,
  344. * the second with the on-disk inode structure, and a possible third and/or
  345. * fourth with the inode data/extents/b-tree root and inode attributes
  346. * data/extents/b-tree root.
  347. *
  348. * Note: Always use the 64 bit inode log format structure so we don't
  349. * leave an uninitialised hole in the format item on 64 bit systems. Log
  350. * recovery on 32 bit systems handles this just fine, so there's no reason
  351. * for not using an initialising the properly padded structure all the time.
  352. */
  353. STATIC void
  354. xfs_inode_item_format(
  355. struct xfs_log_item *lip,
  356. struct xfs_log_vec *lv)
  357. {
  358. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  359. struct xfs_inode *ip = iip->ili_inode;
  360. struct xfs_log_iovec *vecp = NULL;
  361. struct xfs_inode_log_format *ilf;
  362. ASSERT(ip->i_d.di_version > 1);
  363. ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT);
  364. ilf->ilf_type = XFS_LI_INODE;
  365. ilf->ilf_ino = ip->i_ino;
  366. ilf->ilf_blkno = ip->i_imap.im_blkno;
  367. ilf->ilf_len = ip->i_imap.im_len;
  368. ilf->ilf_boffset = ip->i_imap.im_boffset;
  369. ilf->ilf_fields = XFS_ILOG_CORE;
  370. ilf->ilf_size = 2; /* format + core */
  371. /*
  372. * make sure we don't leak uninitialised data into the log in the case
  373. * when we don't log every field in the inode.
  374. */
  375. ilf->ilf_dsize = 0;
  376. ilf->ilf_asize = 0;
  377. ilf->ilf_pad = 0;
  378. memset(&ilf->ilf_u, 0, sizeof(ilf->ilf_u));
  379. xlog_finish_iovec(lv, vecp, sizeof(*ilf));
  380. xfs_inode_item_format_core(ip, lv, &vecp);
  381. xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
  382. if (XFS_IFORK_Q(ip)) {
  383. xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp);
  384. } else {
  385. iip->ili_fields &=
  386. ~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT);
  387. }
  388. /* update the format with the exact fields we actually logged */
  389. ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
  390. }
  391. /*
  392. * This is called to pin the inode associated with the inode log
  393. * item in memory so it cannot be written out.
  394. */
  395. STATIC void
  396. xfs_inode_item_pin(
  397. struct xfs_log_item *lip)
  398. {
  399. struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
  400. ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
  401. trace_xfs_inode_pin(ip, _RET_IP_);
  402. atomic_inc(&ip->i_pincount);
  403. }
  404. /*
  405. * This is called to unpin the inode associated with the inode log
  406. * item which was previously pinned with a call to xfs_inode_item_pin().
  407. *
  408. * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
  409. */
  410. STATIC void
  411. xfs_inode_item_unpin(
  412. struct xfs_log_item *lip,
  413. int remove)
  414. {
  415. struct xfs_inode *ip = INODE_ITEM(lip)->ili_inode;
  416. trace_xfs_inode_unpin(ip, _RET_IP_);
  417. ASSERT(atomic_read(&ip->i_pincount) > 0);
  418. if (atomic_dec_and_test(&ip->i_pincount))
  419. wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
  420. }
  421. /*
  422. * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer
  423. * have been failed during writeback
  424. *
  425. * This informs the AIL that the inode is already flush locked on the next push,
  426. * and acquires a hold on the buffer to ensure that it isn't reclaimed before
  427. * dirty data makes it to disk.
  428. */
  429. STATIC void
  430. xfs_inode_item_error(
  431. struct xfs_log_item *lip,
  432. struct xfs_buf *bp)
  433. {
  434. ASSERT(xfs_isiflocked(INODE_ITEM(lip)->ili_inode));
  435. xfs_set_li_failed(lip, bp);
  436. }
  437. STATIC uint
  438. xfs_inode_item_push(
  439. struct xfs_log_item *lip,
  440. struct list_head *buffer_list)
  441. __releases(&lip->li_ailp->ail_lock)
  442. __acquires(&lip->li_ailp->ail_lock)
  443. {
  444. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  445. struct xfs_inode *ip = iip->ili_inode;
  446. struct xfs_buf *bp = lip->li_buf;
  447. uint rval = XFS_ITEM_SUCCESS;
  448. int error;
  449. if (xfs_ipincount(ip) > 0)
  450. return XFS_ITEM_PINNED;
  451. /*
  452. * The buffer containing this item failed to be written back
  453. * previously. Resubmit the buffer for IO.
  454. */
  455. if (test_bit(XFS_LI_FAILED, &lip->li_flags)) {
  456. if (!xfs_buf_trylock(bp))
  457. return XFS_ITEM_LOCKED;
  458. if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list))
  459. rval = XFS_ITEM_FLUSHING;
  460. xfs_buf_unlock(bp);
  461. return rval;
  462. }
  463. if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
  464. return XFS_ITEM_LOCKED;
  465. /*
  466. * Re-check the pincount now that we stabilized the value by
  467. * taking the ilock.
  468. */
  469. if (xfs_ipincount(ip) > 0) {
  470. rval = XFS_ITEM_PINNED;
  471. goto out_unlock;
  472. }
  473. /*
  474. * Stale inode items should force out the iclog.
  475. */
  476. if (ip->i_flags & XFS_ISTALE) {
  477. rval = XFS_ITEM_PINNED;
  478. goto out_unlock;
  479. }
  480. /*
  481. * Someone else is already flushing the inode. Nothing we can do
  482. * here but wait for the flush to finish and remove the item from
  483. * the AIL.
  484. */
  485. if (!xfs_iflock_nowait(ip)) {
  486. rval = XFS_ITEM_FLUSHING;
  487. goto out_unlock;
  488. }
  489. ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
  490. ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount));
  491. spin_unlock(&lip->li_ailp->ail_lock);
  492. error = xfs_iflush(ip, &bp);
  493. if (!error) {
  494. if (!xfs_buf_delwri_queue(bp, buffer_list))
  495. rval = XFS_ITEM_FLUSHING;
  496. xfs_buf_relse(bp);
  497. }
  498. spin_lock(&lip->li_ailp->ail_lock);
  499. out_unlock:
  500. xfs_iunlock(ip, XFS_ILOCK_SHARED);
  501. return rval;
  502. }
  503. /*
  504. * Unlock the inode associated with the inode log item.
  505. */
  506. STATIC void
  507. xfs_inode_item_unlock(
  508. struct xfs_log_item *lip)
  509. {
  510. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  511. struct xfs_inode *ip = iip->ili_inode;
  512. unsigned short lock_flags;
  513. ASSERT(ip->i_itemp != NULL);
  514. ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
  515. lock_flags = iip->ili_lock_flags;
  516. iip->ili_lock_flags = 0;
  517. if (lock_flags)
  518. xfs_iunlock(ip, lock_flags);
  519. }
  520. /*
  521. * This is called to find out where the oldest active copy of the inode log
  522. * item in the on disk log resides now that the last log write of it completed
  523. * at the given lsn. Since we always re-log all dirty data in an inode, the
  524. * latest copy in the on disk log is the only one that matters. Therefore,
  525. * simply return the given lsn.
  526. *
  527. * If the inode has been marked stale because the cluster is being freed, we
  528. * don't want to (re-)insert this inode into the AIL. There is a race condition
  529. * where the cluster buffer may be unpinned before the inode is inserted into
  530. * the AIL during transaction committed processing. If the buffer is unpinned
  531. * before the inode item has been committed and inserted, then it is possible
  532. * for the buffer to be written and IO completes before the inode is inserted
  533. * into the AIL. In that case, we'd be inserting a clean, stale inode into the
  534. * AIL which will never get removed. It will, however, get reclaimed which
  535. * triggers an assert in xfs_inode_free() complaining about freein an inode
  536. * still in the AIL.
  537. *
  538. * To avoid this, just unpin the inode directly and return a LSN of -1 so the
  539. * transaction committed code knows that it does not need to do any further
  540. * processing on the item.
  541. */
  542. STATIC xfs_lsn_t
  543. xfs_inode_item_committed(
  544. struct xfs_log_item *lip,
  545. xfs_lsn_t lsn)
  546. {
  547. struct xfs_inode_log_item *iip = INODE_ITEM(lip);
  548. struct xfs_inode *ip = iip->ili_inode;
  549. if (xfs_iflags_test(ip, XFS_ISTALE)) {
  550. xfs_inode_item_unpin(lip, 0);
  551. return -1;
  552. }
  553. return lsn;
  554. }
  555. STATIC void
  556. xfs_inode_item_committing(
  557. struct xfs_log_item *lip,
  558. xfs_lsn_t lsn)
  559. {
  560. INODE_ITEM(lip)->ili_last_lsn = lsn;
  561. }
  562. /*
  563. * This is the ops vector shared by all buf log items.
  564. */
  565. static const struct xfs_item_ops xfs_inode_item_ops = {
  566. .iop_size = xfs_inode_item_size,
  567. .iop_format = xfs_inode_item_format,
  568. .iop_pin = xfs_inode_item_pin,
  569. .iop_unpin = xfs_inode_item_unpin,
  570. .iop_unlock = xfs_inode_item_unlock,
  571. .iop_committed = xfs_inode_item_committed,
  572. .iop_push = xfs_inode_item_push,
  573. .iop_committing = xfs_inode_item_committing,
  574. .iop_error = xfs_inode_item_error
  575. };
  576. /*
  577. * Initialize the inode log item for a newly allocated (in-core) inode.
  578. */
  579. void
  580. xfs_inode_item_init(
  581. struct xfs_inode *ip,
  582. struct xfs_mount *mp)
  583. {
  584. struct xfs_inode_log_item *iip;
  585. ASSERT(ip->i_itemp == NULL);
  586. iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
  587. iip->ili_inode = ip;
  588. xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
  589. &xfs_inode_item_ops);
  590. }
  591. /*
  592. * Free the inode log item and any memory hanging off of it.
  593. */
  594. void
  595. xfs_inode_item_destroy(
  596. xfs_inode_t *ip)
  597. {
  598. kmem_free(ip->i_itemp->ili_item.li_lv_shadow);
  599. kmem_zone_free(xfs_ili_zone, ip->i_itemp);
  600. }
  601. /*
  602. * This is the inode flushing I/O completion routine. It is called
  603. * from interrupt level when the buffer containing the inode is
  604. * flushed to disk. It is responsible for removing the inode item
  605. * from the AIL if it has not been re-logged, and unlocking the inode's
  606. * flush lock.
  607. *
  608. * To reduce AIL lock traffic as much as possible, we scan the buffer log item
  609. * list for other inodes that will run this function. We remove them from the
  610. * buffer list so we can process all the inode IO completions in one AIL lock
  611. * traversal.
  612. */
  613. void
  614. xfs_iflush_done(
  615. struct xfs_buf *bp,
  616. struct xfs_log_item *lip)
  617. {
  618. struct xfs_inode_log_item *iip;
  619. struct xfs_log_item *blip, *n;
  620. struct xfs_ail *ailp = lip->li_ailp;
  621. int need_ail = 0;
  622. LIST_HEAD(tmp);
  623. /*
  624. * Scan the buffer IO completions for other inodes being completed and
  625. * attach them to the current inode log item.
  626. */
  627. list_add_tail(&lip->li_bio_list, &tmp);
  628. list_for_each_entry_safe(blip, n, &bp->b_li_list, li_bio_list) {
  629. if (lip->li_cb != xfs_iflush_done)
  630. continue;
  631. list_move_tail(&blip->li_bio_list, &tmp);
  632. /*
  633. * while we have the item, do the unlocked check for needing
  634. * the AIL lock.
  635. */
  636. iip = INODE_ITEM(blip);
  637. if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) ||
  638. test_bit(XFS_LI_FAILED, &blip->li_flags))
  639. need_ail++;
  640. }
  641. /* make sure we capture the state of the initial inode. */
  642. iip = INODE_ITEM(lip);
  643. if ((iip->ili_logged && lip->li_lsn == iip->ili_flush_lsn) ||
  644. test_bit(XFS_LI_FAILED, &lip->li_flags))
  645. need_ail++;
  646. /*
  647. * We only want to pull the item from the AIL if it is
  648. * actually there and its location in the log has not
  649. * changed since we started the flush. Thus, we only bother
  650. * if the ili_logged flag is set and the inode's lsn has not
  651. * changed. First we check the lsn outside
  652. * the lock since it's cheaper, and then we recheck while
  653. * holding the lock before removing the inode from the AIL.
  654. */
  655. if (need_ail) {
  656. bool mlip_changed = false;
  657. /* this is an opencoded batch version of xfs_trans_ail_delete */
  658. spin_lock(&ailp->ail_lock);
  659. list_for_each_entry(blip, &tmp, li_bio_list) {
  660. if (INODE_ITEM(blip)->ili_logged &&
  661. blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
  662. mlip_changed |= xfs_ail_delete_one(ailp, blip);
  663. else {
  664. xfs_clear_li_failed(blip);
  665. }
  666. }
  667. if (mlip_changed) {
  668. if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
  669. xlog_assign_tail_lsn_locked(ailp->ail_mount);
  670. if (list_empty(&ailp->ail_head))
  671. wake_up_all(&ailp->ail_empty);
  672. }
  673. spin_unlock(&ailp->ail_lock);
  674. if (mlip_changed)
  675. xfs_log_space_wake(ailp->ail_mount);
  676. }
  677. /*
  678. * clean up and unlock the flush lock now we are done. We can clear the
  679. * ili_last_fields bits now that we know that the data corresponding to
  680. * them is safely on disk.
  681. */
  682. list_for_each_entry_safe(blip, n, &tmp, li_bio_list) {
  683. list_del_init(&blip->li_bio_list);
  684. iip = INODE_ITEM(blip);
  685. iip->ili_logged = 0;
  686. iip->ili_last_fields = 0;
  687. xfs_ifunlock(iip->ili_inode);
  688. }
  689. list_del(&tmp);
  690. }
  691. /*
  692. * This is the inode flushing abort routine. It is called from xfs_iflush when
  693. * the filesystem is shutting down to clean up the inode state. It is
  694. * responsible for removing the inode item from the AIL if it has not been
  695. * re-logged, and unlocking the inode's flush lock.
  696. */
  697. void
  698. xfs_iflush_abort(
  699. xfs_inode_t *ip,
  700. bool stale)
  701. {
  702. xfs_inode_log_item_t *iip = ip->i_itemp;
  703. if (iip) {
  704. if (test_bit(XFS_LI_IN_AIL, &iip->ili_item.li_flags)) {
  705. xfs_trans_ail_remove(&iip->ili_item,
  706. stale ? SHUTDOWN_LOG_IO_ERROR :
  707. SHUTDOWN_CORRUPT_INCORE);
  708. }
  709. iip->ili_logged = 0;
  710. /*
  711. * Clear the ili_last_fields bits now that we know that the
  712. * data corresponding to them is safely on disk.
  713. */
  714. iip->ili_last_fields = 0;
  715. /*
  716. * Clear the inode logging fields so no more flushes are
  717. * attempted.
  718. */
  719. iip->ili_fields = 0;
  720. iip->ili_fsync_fields = 0;
  721. }
  722. /*
  723. * Release the inode's flush lock since we're done with it.
  724. */
  725. xfs_ifunlock(ip);
  726. }
  727. void
  728. xfs_istale_done(
  729. struct xfs_buf *bp,
  730. struct xfs_log_item *lip)
  731. {
  732. xfs_iflush_abort(INODE_ITEM(lip)->ili_inode, true);
  733. }
  734. /*
  735. * convert an xfs_inode_log_format struct from the old 32 bit version
  736. * (which can have different field alignments) to the native 64 bit version
  737. */
  738. int
  739. xfs_inode_item_format_convert(
  740. struct xfs_log_iovec *buf,
  741. struct xfs_inode_log_format *in_f)
  742. {
  743. struct xfs_inode_log_format_32 *in_f32 = buf->i_addr;
  744. if (buf->i_len != sizeof(*in_f32))
  745. return -EFSCORRUPTED;
  746. in_f->ilf_type = in_f32->ilf_type;
  747. in_f->ilf_size = in_f32->ilf_size;
  748. in_f->ilf_fields = in_f32->ilf_fields;
  749. in_f->ilf_asize = in_f32->ilf_asize;
  750. in_f->ilf_dsize = in_f32->ilf_dsize;
  751. in_f->ilf_ino = in_f32->ilf_ino;
  752. memcpy(&in_f->ilf_u, &in_f32->ilf_u, sizeof(in_f->ilf_u));
  753. in_f->ilf_blkno = in_f32->ilf_blkno;
  754. in_f->ilf_len = in_f32->ilf_len;
  755. in_f->ilf_boffset = in_f32->ilf_boffset;
  756. return 0;
  757. }